From 57c5c7e776dd9bb901d28d52aa112a892aa8ee40 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 24 Jul 2023 14:29:35 -0700 Subject: [PATCH 001/553] Code check-in: Test code to retrieve objects from existing HashStore and write a yaml file that contains the hex digests --- .../java/org/dataone/hashstore/Client.java | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/Client.java diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java new file mode 100644 index 00000000..4ce492e1 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -0,0 +1,154 @@ +package org.dataone.hashstore; + +import java.io.BufferedWriter; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import javax.xml.bind.DatatypeConverter; + +public class Client { + private static HashStore hashStore; + + enum DefaultHashAlgorithms { + MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); + + final String algoName; + + DefaultHashAlgorithms(String algo) { + algoName = algo; + } + + public String getName() { + return algoName; + } + } + + public static void main() throws Exception { + try { + Path storePath = Paths.get("/home/mok/testing/test_all"); + + // Initialize HashStore + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", storePath.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty("storeMetadataNamespace", "http://www.ns.test/v1"); + + + // Get HashStore + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + + // Get file names from `var/metacat/data` + // String originalObjDirectory = "/var/metacata/data"; + // Path originalObjDirectoryPath = Paths.get(originalObjDirectory); + // File[] storePathFileList = storePath.toFile().listFiles(); + + Files.createDirectories(Paths.get("/home/mok/testing/test_all/douyamlcheck")); + + // for (int i = 0; i < storePathFileList.length - 1; i++) { + for (int i = 0; i < 100; i++) { + String pid = "dou.test." + i; + + try { + InputStream pidObjStream = hashStore.retrieveObject(pid); + Map hexDigests = generateChecksums(pidObjStream); + String yamlObjectString = getHexDigestsYamlString( + hexDigests.get("MD5"), hexDigests.get("SHA-1"), hexDigests.get("SHA-256"), + hexDigests.get("SHA-384"), hexDigests.get("SHA-512") + ); + + Path pidObjectYaml = Paths.get("/home/mok/testing/test_all/douyamlcheck") + .resolve(pid + ".yaml"); + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter( + Files.newOutputStream(pidObjectYaml), StandardCharsets.UTF_8 + ) + )) { + writer.write(yamlObjectString); + + } catch (Exception e) { + e.fillInStackTrace(); + } + + } catch (FileNotFoundException fnfe) { + fnfe.fillInStackTrace(); + } catch (IOException ioe) { + ioe.fillInStackTrace(); + } catch (IllegalArgumentException iae) { + iae.fillInStackTrace(); + } catch (NoSuchAlgorithmException nsae) { + nsae.fillInStackTrace(); + } + } + + } catch (Exception e) { + e.fillInStackTrace(); + } + + + } + + private static String getHexDigestsYamlString( + String md5digest, String sha1digest, String sha256digest, String sha384digest, + String sha512digest + ) { + return String.format( + "md5digest:\n" + "- %s\n\n" + "sha1digest:\n" + "- %s\n\n" + "sha256digest:\n" + + "- %s\n\n" + "sha384digest:\n" + "- %s\n\n" + "sha512digest:\n" + "- %s\n\n", + md5digest, sha1digest, sha256digest, sha384digest, sha512digest + ); + } + + private static Map generateChecksums(InputStream pidObjStream) + throws NoSuchAlgorithmException { + MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); + MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); + MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); + MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); + MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); + + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = pidObjStream.read(buffer)) != -1) { + md5.update(buffer, 0, bytesRead); + sha1.update(buffer, 0, bytesRead); + sha256.update(buffer, 0, bytesRead); + sha384.update(buffer, 0, bytesRead); + sha512.update(buffer, 0, bytesRead); + } + + } catch (Exception e) { + e.fillInStackTrace(); + } + + Map hexDigests = new HashMap<>(); + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); + String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); + String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); + hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); + + return hexDigests; + } +} From 227420df225236ef9d285a68834521c26e39cdab Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 24 Jul 2023 14:41:36 -0700 Subject: [PATCH 002/553] Add missing 'String[] args' in main() for Client.py --- src/main/java/org/dataone/hashstore/Client.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 4ce492e1..69acbf4c 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -34,7 +34,8 @@ public String getName() { } } - public static void main() throws Exception { + public static void main(String[] args) throws Exception { + try { Path storePath = Paths.get("/home/mok/testing/test_all"); @@ -100,7 +101,6 @@ public static void main() throws Exception { e.fillInStackTrace(); } - } private static String getHexDigestsYamlString( From cc6db3b0db8c69872a69c2f41a256293e2126e34 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 27 Jul 2023 10:09:29 -0700 Subject: [PATCH 003/553] Code check-in: Refactor client.py to begin preparing to get pids from metacat db to retrieve and validate --- .../java/org/dataone/hashstore/Client.java | 149 ++++++++---------- 1 file changed, 62 insertions(+), 87 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 69acbf4c..074209c5 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -1,7 +1,6 @@ package org.dataone.hashstore; import java.io.BufferedWriter; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; @@ -11,8 +10,6 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.HashMap; -import java.util.Map; import java.util.Properties; import javax.xml.bind.DatatypeConverter; @@ -37,7 +34,7 @@ public String getName() { public static void main(String[] args) throws Exception { try { - Path storePath = Paths.get("/home/mok/testing/test_all"); + Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); // Initialize HashStore String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; @@ -47,53 +44,49 @@ public static void main(String[] args) throws Exception { storeProperties.setProperty("storeDepth", "3"); storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty("storeMetadataNamespace", "http://www.ns.test/v1"); - + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); // Get HashStore hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - // Get file names from `var/metacat/data` - // String originalObjDirectory = "/var/metacata/data"; - // Path originalObjDirectoryPath = Paths.get(originalObjDirectory); - // File[] storePathFileList = storePath.toFile().listFiles(); - - Files.createDirectories(Paths.get("/home/mok/testing/test_all/douyamlcheck")); - - // for (int i = 0; i < storePathFileList.length - 1; i++) { - for (int i = 0; i < 100; i++) { - String pid = "dou.test." + i; - - try { - InputStream pidObjStream = hashStore.retrieveObject(pid); - Map hexDigests = generateChecksums(pidObjStream); - String yamlObjectString = getHexDigestsYamlString( - hexDigests.get("MD5"), hexDigests.get("SHA-1"), hexDigests.get("SHA-256"), - hexDigests.get("SHA-384"), hexDigests.get("SHA-512") - ); - - Path pidObjectYaml = Paths.get("/home/mok/testing/test_all/douyamlcheck") - .resolve(pid + ".yaml"); - - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(pidObjectYaml), StandardCharsets.UTF_8 - ) - )) { - writer.write(yamlObjectString); - - } catch (Exception e) { - e.fillInStackTrace(); - } - - } catch (FileNotFoundException fnfe) { - fnfe.fillInStackTrace(); - } catch (IOException ioe) { - ioe.fillInStackTrace(); - } catch (IllegalArgumentException iae) { - iae.fillInStackTrace(); - } catch (NoSuchAlgorithmException nsae) { - nsae.fillInStackTrace(); + // Get guid, checksum and algorithm from metacat db into an array + // TODO: Loop over array with the following pattern + + String pid = "test"; + // TODO: Ensure algorithm is formatted properly + String algorithm = "SHA-256"; + String checksum = "abcdef12456789"; + + // Retrieve object + InputStream pidObjStream = hashStore.retrieveObject(pid); + + // Get hex digest + String streamDigest = calculateHexDigest(pidObjStream, algorithm); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + // Create directory to store the error files + Path errorDirectory = Paths.get( + "/home/mok/testing/knbvm_hashstore/java/obj/errors" + ); + Files.createDirectories(errorDirectory); + Path objectErrorTxtFile = errorDirectory.resolve("/" + pid + ".txt"); + + String errMsg = "Obj retrieved (pid/guid): " + pid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter( + Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 + ) + )) { + writer.write(errMsg); + + } catch (Exception e) { + e.fillInStackTrace(); } } @@ -103,52 +96,34 @@ public static void main(String[] args) throws Exception { } - private static String getHexDigestsYamlString( - String md5digest, String sha1digest, String sha256digest, String sha384digest, - String sha512digest - ) { - return String.format( - "md5digest:\n" + "- %s\n\n" + "sha1digest:\n" + "- %s\n\n" + "sha256digest:\n" - + "- %s\n\n" + "sha384digest:\n" + "- %s\n\n" + "sha512digest:\n" + "- %s\n\n", - md5digest, sha1digest, sha256digest, sha384digest, sha512digest - ); - } - - private static Map generateChecksums(InputStream pidObjStream) - throws NoSuchAlgorithmException { - MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); - MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); - MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); - MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); - MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); - + /** + * Calculate the hex digest of a pid's respective object with the given algorithm + * + * @param inputstream Path to object + * @param algorithm Hash algorithm to use + * @return Hex digest of the pid's respective object + * @throws IOException Error when calculating hex digest + * @throws NoSuchAlgorithmException Algorithm not supported + */ + private static String calculateHexDigest(InputStream stream, String algorithm) + throws IOException, NoSuchAlgorithmException { + MessageDigest mdObject = MessageDigest.getInstance(algorithm); try { byte[] buffer = new byte[8192]; int bytesRead; - while ((bytesRead = pidObjStream.read(buffer)) != -1) { - md5.update(buffer, 0, bytesRead); - sha1.update(buffer, 0, bytesRead); - sha256.update(buffer, 0, bytesRead); - sha384.update(buffer, 0, bytesRead); - sha512.update(buffer, 0, bytesRead); + while ((bytesRead = stream.read(buffer)) != -1) { + mdObject.update(buffer, 0, bytesRead); + } + // Close stream + stream.close(); + + } catch (IOException ioe) { + ioe.fillInStackTrace(); - } catch (Exception e) { - e.fillInStackTrace(); } + // mdObjectHexDigest + return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); - Map hexDigests = new HashMap<>(); - String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); - String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); - String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); - hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); - - return hexDigests; } } From eca7232a7061ad3ed00d47df8cb49d27436e8a51 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 27 Jul 2023 15:13:16 -0700 Subject: [PATCH 004/553] Rename variables in FileHashStore 'loadHashStoreYaml()' method to improve clarity --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d84590d5..62c11bcb 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -269,13 +269,13 @@ protected void verifyHashStoreProperties( * @throws IOException If `hashstore.yaml` doesn't exist */ protected HashMap loadHashStoreYaml(Path storePath) throws IOException { - Path hashstoreYaml = storePath.resolve("hashstore.yaml"); - File hashStoreYaml = hashstoreYaml.toFile(); + Path hashStoreYamlPath = storePath.resolve("hashstore.yaml"); + File hashStoreYamlFile = hashStoreYamlPath.toFile(); ObjectMapper om = new ObjectMapper(new YAMLFactory()); HashMap hsProperties = new HashMap<>(); try { - HashMap hashStoreYamlProperties = om.readValue(hashStoreYaml, HashMap.class); + HashMap hashStoreYamlProperties = om.readValue(hashStoreYamlFile, HashMap.class); String yamlStorePath = (String) hashStoreYamlProperties.get("store_path"); hsProperties.put(HashStoreProperties.storePath.name(), Paths.get(yamlStorePath)); hsProperties.put( From 07b083b5ea23121b7a55401e39d9b2c8191bf3ed Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 27 Jul 2023 15:47:04 -0700 Subject: [PATCH 005/553] Code check-in: Add code to connect to connect to metacat db by loading 'pgdb.yaml' file in HashStore path --- .../java/org/dataone/hashstore/Client.java | 89 +++++++++++++++---- 1 file changed, 70 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 074209c5..2403cf7b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -1,6 +1,7 @@ package org.dataone.hashstore; import java.io.BufferedWriter; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; @@ -10,10 +11,21 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HashMap; import java.util.Properties; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.Statement; + import javax.xml.bind.DatatypeConverter; +import org.dataone.hashstore.exceptions.HashStoreFactoryException; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; + public class Client { private static HashStore hashStore; @@ -32,28 +44,49 @@ public String getName() { } public static void main(String[] args) throws Exception { + // Get a HashStore + Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); + initializeHashStore(storePath); + + // Load metacat db yaml + Path pgdbYaml = storePath.resolve("pgdb.yaml"); + File pgdbYamlFile = pgdbYaml.toFile(); + ObjectMapper om = new ObjectMapper(new YAMLFactory()); + HashMap pgdbYamlProperties = om.readValue(pgdbYamlFile, HashMap.class); + // Get db values + String url = (String) pgdbYamlProperties.get("db_uri"); + String user = (String) pgdbYamlProperties.get("db_user"); + String password = (String) pgdbYamlProperties.get("db_password"); try { - Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); - - // Initialize HashStore - String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", storePath.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - // Get HashStore - hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + // Setup metacat db access + Connection connection = DriverManager.getConnection(url, user, password); + Statement statement = connection.createStatement(); + String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + + " systemmetadata.object_format, systemmetadata.checksum," + + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" + + " ON identifier.guid = systemmetadata.guid LIMIT 1000"; + ResultSet resultSet = statement.executeQuery(sqlQuery); + + // For each row, get guid, docid, rev, checksum and checksum_algorithm + while (resultSet.next()) { + String guid = resultSet.getString("guid"); + String docid = resultSet.getString("docid"); + int rev = resultSet.getInt("rev"); + String name = resultSet.getString("name"); + + Path objfilePath = Paths.get("/var/metacat/data").resolve(docid + "." + rev); + if (Files.exists(objfilePath)) { + // TODO: ... + } + } - // Get guid, checksum and algorithm from metacat db into an array - // TODO: Loop over array with the following pattern + // Close resources + resultSet.close(); + statement.close(); + connection.close(); + // TODO: Loop over final array generated with the pattern below String pid = "test"; // TODO: Ensure algorithm is formatted properly String algorithm = "SHA-256"; @@ -72,7 +105,7 @@ public static void main(String[] args) throws Exception { "/home/mok/testing/knbvm_hashstore/java/obj/errors" ); Files.createDirectories(errorDirectory); - Path objectErrorTxtFile = errorDirectory.resolve("/" + pid + ".txt"); + Path objectErrorTxtFile = errorDirectory.resolve(pid + ".txt"); String errMsg = "Obj retrieved (pid/guid): " + pid + ". Checksums do not match, checksum from db: " + checksum @@ -96,6 +129,24 @@ public static void main(String[] args) throws Exception { } + private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, + IOException { + // Initialize HashStore + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", storePath.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + // Get HashStore + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + } + /** * Calculate the hex digest of a pid's respective object with the given algorithm * From 209a624d79f68530e95240c53fddbd66cfe6c431 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 12:52:46 -0700 Subject: [PATCH 006/553] Refactor 'client.py' to retrieve and validate objects based on metacat db query --- .../java/org/dataone/hashstore/Client.java | 113 ++++++++++-------- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2403cf7b..55cd7206 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -13,7 +13,6 @@ import java.security.NoSuchAlgorithmException; import java.util.HashMap; import java.util.Properties; - import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; @@ -65,19 +64,48 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid LIMIT 1000"; + + " ON identifier.guid = systemmetadata.guid LIMIT 10000"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm while (resultSet.next()) { String guid = resultSet.getString("guid"); - String docid = resultSet.getString("docid"); - int rev = resultSet.getInt("rev"); - String name = resultSet.getString("name"); - - Path objfilePath = Paths.get("/var/metacat/data").resolve(docid + "." + rev); - if (Files.exists(objfilePath)) { - // TODO: ... + // String docid = resultSet.getString("docid"); + // int rev = resultSet.getInt("rev"); + // String name = resultSet.getString("name"); + String checksum = resultSet.getString("checksum"); + String checksumAlgorithm = resultSet.getString("checksum_algorithm"); + String formattedAlgo = formatAlgo(checksumAlgorithm); + + // Retrieve object + InputStream objstream = hashStore.retrieveObject(guid); + + // Get hex digest + String streamDigest = calculateHexDigest(objstream, formattedAlgo); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + // Create directory to store the error files + Path errorDirectory = Paths.get( + "/home/mok/testing/knbvm_hashstore/java/obj/errors" + ); + Files.createDirectories(errorDirectory); + Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); + + String errMsg = "Obj retrieved (pid/guid): " + guid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + formattedAlgo; + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter( + Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 + ) + )) { + writer.write(errMsg); + + } catch (Exception e) { + e.fillInStackTrace(); + } } } @@ -86,49 +114,38 @@ public static void main(String[] args) throws Exception { statement.close(); connection.close(); - // TODO: Loop over final array generated with the pattern below - String pid = "test"; - // TODO: Ensure algorithm is formatted properly - String algorithm = "SHA-256"; - String checksum = "abcdef12456789"; - - // Retrieve object - InputStream pidObjStream = hashStore.retrieveObject(pid); - - // Get hex digest - String streamDigest = calculateHexDigest(pidObjStream, algorithm); - - // If checksums don't match, write a .txt file - if (!streamDigest.equals(checksum)) { - // Create directory to store the error files - Path errorDirectory = Paths.get( - "/home/mok/testing/knbvm_hashstore/java/obj/errors" - ); - Files.createDirectories(errorDirectory); - Path objectErrorTxtFile = errorDirectory.resolve(pid + ".txt"); - - String errMsg = "Obj retrieved (pid/guid): " + pid - + ". Checksums do not match, checksum from db: " + checksum - + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; - - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 - ) - )) { - writer.write(errMsg); - - } catch (Exception e) { - e.fillInStackTrace(); - } - } - } catch (Exception e) { e.fillInStackTrace(); } } + /** + * Format an algorithm string value to be compatible with MessageDigest class + * + * @param value + * @return Formatted algorithm value + */ + private static String formatAlgo(String value) { + String checkedAlgorithm = ""; + String[] SUPPORTED_HASH_ALGORITHMS = {"SHA-1", "SHA-256", "SHA-384", "SHA-512", + "SHA-512/224", "SHA-512/256"}; + + String upperValue = value.toUpperCase(); + if (upperValue.equals("MD2") || upperValue.equals("MD5")) { + checkedAlgorithm = upperValue; + } + + String[] parts = upperValue.split("(?<=\\D)(?=\\d)"); + String formattedAlgorithm = parts[0] + "-" + parts[1]; + for (String element : SUPPORTED_HASH_ALGORITHMS) { + if (element.equals(formattedAlgorithm)) { + checkedAlgorithm = formattedAlgorithm; + } + } + return checkedAlgorithm; + } + private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, IOException { // Initialize HashStore @@ -150,8 +167,8 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE /** * Calculate the hex digest of a pid's respective object with the given algorithm * - * @param inputstream Path to object - * @param algorithm Hash algorithm to use + * @param stream Path to object + * @param algorithm Hash algorithm to use * @return Hex digest of the pid's respective object * @throws IOException Error when calculating hex digest * @throws NoSuchAlgorithmException Algorithm not supported From 74a48b22176f1981e256394db95fda26bcb9fb51 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 13:12:23 -0700 Subject: [PATCH 007/553] Add mvn 'shade' plugin to assist with testing Client class --- pom.xml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pom.xml b/pom.xml index 6a33ae99..a3cbb72e 100644 --- a/pom.xml +++ b/pom.xml @@ -60,6 +60,29 @@ + + org.apache.maven.plugins + maven-shade-plugin + 3.2.1 + + + package + + shade + + + + + + org.dataone.hashstore.Client + ${project.version} + + + + + + + maven-resources-plugin 3.3.0 From 7aacc826ff49b1c4e21e512466b3ae85fa9a4a95 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 14:44:03 -0700 Subject: [PATCH 008/553] Comment out 'pluginManagement' code in 'pom.xml' that is causing mvn clean package to not include dependencies in jar file --- pom.xml | 81 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/pom.xml b/pom.xml index a3cbb72e..b5b6c866 100644 --- a/pom.xml +++ b/pom.xml @@ -49,17 +49,17 @@ - + - - maven-clean-plugin - 3.1.0 + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 8 + 8 + - org.apache.maven.plugins maven-shade-plugin @@ -68,37 +68,70 @@ package - shade + shade - - - org.dataone.hashstore.Client - ${project.version} - - + + + org.dataone.hashstore.Client + ${project.version} + + + + + + *:* + + **/Log4j2Plugins.dat + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + - maven-resources-plugin - 3.3.0 + maven-jar-plugin + 2.4 + + + default-jar + package + + jar + + + + + + + maven-clean-plugin + 3.1.0 + - maven-compiler-plugin - 3.11.0 + maven-resources-plugin + 3.3.0 maven-surefire-plugin 2.22.1 - - maven-jar-plugin - 3.3.0 - maven-install-plugin 2.5.2 @@ -118,6 +151,6 @@ 3.4.2 - + \ No newline at end of file From 8620684561f860e674324d16ecb583dfa68f5680 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 14:56:22 -0700 Subject: [PATCH 009/553] Add print statements to 'client.py' --- src/main/java/org/dataone/hashstore/Client.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 55cd7206..de05c7d0 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -78,6 +78,7 @@ public static void main(String[] args) throws Exception { String formattedAlgo = formatAlgo(checksumAlgorithm); // Retrieve object + System.out.println("Retrieving object for guid: " + guid); InputStream objstream = hashStore.retrieveObject(guid); // Get hex digest @@ -106,6 +107,8 @@ public static void main(String[] args) throws Exception { } catch (Exception e) { e.fillInStackTrace(); } + } else { + System.out.println("Checksums match!"); } } From ce36b33016d876da27b15a390fe4ded9f2b92b70 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 14:58:47 -0700 Subject: [PATCH 010/553] Add missing return statement and fix info messaging in 'FileHashStore' --- src/main/java/org/dataone/hashstore/Client.java | 1 + .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index de05c7d0..09c9ae7d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -165,6 +165,7 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE // Get HashStore hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + return; } /** diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 62c11bcb..d69ecd7a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -167,7 +167,7 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep } else { logFileHashStore.info( "FileHashStore - 'hashstore.yaml' exists and has been verified." - + "Initializing FileHashStore." + + " Initializing FileHashStore." ); } } From 3686697e04827ba573729ab7b4b2bd32607443f7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 15:01:27 -0700 Subject: [PATCH 011/553] Add further print statements to help with testing --- src/main/java/org/dataone/hashstore/Client.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 09c9ae7d..538d42e8 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -48,6 +48,7 @@ public static void main(String[] args) throws Exception { initializeHashStore(storePath); // Load metacat db yaml + System.out.println("Loading metacat db yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); File pgdbYamlFile = pgdbYaml.toFile(); ObjectMapper om = new ObjectMapper(new YAMLFactory()); @@ -58,6 +59,7 @@ public static void main(String[] args) throws Exception { String password = (String) pgdbYamlProperties.get("db_password"); try { + System.out.println("Connecting to metacat db."); // Setup metacat db access Connection connection = DriverManager.getConnection(url, user, password); Statement statement = connection.createStatement(); From 49cb5fbae8f4843d61900bdc19f16ce94468d058 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 15:03:51 -0700 Subject: [PATCH 012/553] Add missing semicolon in sql query to execute --- src/main/java/org/dataone/hashstore/Client.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 538d42e8..e69ad5c9 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -66,7 +66,7 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid LIMIT 10000"; + + " ON identifier.guid = systemmetadata.guid LIMIT 10000;"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm From c2efbd90557c98f3589ce39d7e5776d2a48b7cfa Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 15:08:03 -0700 Subject: [PATCH 013/553] Add additional print statement and revise exception handling --- src/main/java/org/dataone/hashstore/Client.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index e69ad5c9..6100de54 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -71,6 +71,7 @@ public static void main(String[] args) throws Exception { // For each row, get guid, docid, rev, checksum and checksum_algorithm while (resultSet.next()) { + System.out.println("Calling resultSet.next()"); String guid = resultSet.getString("guid"); // String docid = resultSet.getString("docid"); // int rev = resultSet.getInt("rev"); @@ -107,7 +108,7 @@ public static void main(String[] args) throws Exception { writer.write(errMsg); } catch (Exception e) { - e.fillInStackTrace(); + e.printStackTrace(); } } else { System.out.println("Checksums match!"); @@ -120,7 +121,7 @@ public static void main(String[] args) throws Exception { connection.close(); } catch (Exception e) { - e.fillInStackTrace(); + e.printStackTrace(); } } @@ -193,7 +194,7 @@ private static String calculateHexDigest(InputStream stream, String algorithm) stream.close(); } catch (IOException ioe) { - ioe.fillInStackTrace(); + ioe.printStackTrace(); } // mdObjectHexDigest From 66258dabe0d84633ffa0dc5f900bf969736b7ddc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 15:12:23 -0700 Subject: [PATCH 014/553] Add code to register postgres Driver to resolve SQLException 'no suitable driver found' --- src/main/java/org/dataone/hashstore/Client.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 6100de54..ba58a576 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -61,6 +61,7 @@ public static void main(String[] args) throws Exception { try { System.out.println("Connecting to metacat db."); // Setup metacat db access + Class.forName("org.postgresql.Driver"); // Force driver to register itself Connection connection = DriverManager.getConnection(url, user, password); Statement statement = connection.createStatement(); String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," From 20ec4ee0ce72ce4460a927d0fd8bb82ad9b75632 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 15:17:10 -0700 Subject: [PATCH 015/553] Add missing 'postgresql' dependency to pom.xml --- pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pom.xml b/pom.xml index b5b6c866..8009e244 100644 --- a/pom.xml +++ b/pom.xml @@ -46,6 +46,11 @@ jackson-databind 2.15.2 + + org.postgresql + postgresql + 42.4.3 + From bc9616a6004d8947ec2c4a8199144091554393eb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 15:53:59 -0700 Subject: [PATCH 016/553] Clean up code and improve print statement for testing --- pom.xml | 6 +++--- src/main/java/org/dataone/hashstore/Client.java | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 8009e244..f1fb2130 100644 --- a/pom.xml +++ b/pom.xml @@ -47,9 +47,9 @@ 2.15.2 - org.postgresql - postgresql - 42.4.3 + org.postgresql + postgresql + 42.4.3 diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index ba58a576..7c6b235d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -86,6 +86,7 @@ public static void main(String[] args) throws Exception { InputStream objstream = hashStore.retrieveObject(guid); // Get hex digest + System.out.println("Calculating hex digest with algorithm: " + formattedAlgo); String streamDigest = calculateHexDigest(objstream, formattedAlgo); // If checksums don't match, write a .txt file From 9cebb4680f9fb66dd886a0c9a02d06d272e04e3f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 16:03:09 -0700 Subject: [PATCH 017/553] Revise client app to check for file existence before retrieving object --- .../java/org/dataone/hashstore/Client.java | 75 ++++++++++--------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 7c6b235d..4558038e 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -74,46 +74,51 @@ public static void main(String[] args) throws Exception { while (resultSet.next()) { System.out.println("Calling resultSet.next()"); String guid = resultSet.getString("guid"); - // String docid = resultSet.getString("docid"); - // int rev = resultSet.getInt("rev"); - // String name = resultSet.getString("name"); + String docid = resultSet.getString("docid"); + int rev = resultSet.getInt("rev"); String checksum = resultSet.getString("checksum"); String checksumAlgorithm = resultSet.getString("checksum_algorithm"); String formattedAlgo = formatAlgo(checksumAlgorithm); - // Retrieve object - System.out.println("Retrieving object for guid: " + guid); - InputStream objstream = hashStore.retrieveObject(guid); - - // Get hex digest - System.out.println("Calculating hex digest with algorithm: " + formattedAlgo); - String streamDigest = calculateHexDigest(objstream, formattedAlgo); - - // If checksums don't match, write a .txt file - if (!streamDigest.equals(checksum)) { - // Create directory to store the error files - Path errorDirectory = Paths.get( - "/home/mok/testing/knbvm_hashstore/java/obj/errors" - ); - Files.createDirectories(errorDirectory); - Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); - - String errMsg = "Obj retrieved (pid/guid): " + guid - + ". Checksums do not match, checksum from db: " + checksum - + ". Calculated digest: " + streamDigest + ". Algorithm: " + formattedAlgo; - - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 - ) - )) { - writer.write(errMsg); - - } catch (Exception e) { - e.printStackTrace(); + Path setItemFilePath = Paths.get("/var/metacata/data/" + docid + "." + rev); + if (Files.exists(setItemFilePath)) { + System.out.println("File exists at: " + setItemFilePath); + + // Retrieve object + System.out.println("Retrieving object for guid: " + guid); + InputStream objstream = hashStore.retrieveObject(guid); + + // Get hex digest + System.out.println("Calculating hex digest with algorithm: " + formattedAlgo); + String streamDigest = calculateHexDigest(objstream, formattedAlgo); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + // Create directory to store the error files + Path errorDirectory = Paths.get( + "/home/mok/testing/knbvm_hashstore/java/obj/errors" + ); + Files.createDirectories(errorDirectory); + Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); + + String errMsg = "Obj retrieved (pid/guid): " + guid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + + formattedAlgo; + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter( + Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 + ) + )) { + writer.write(errMsg); + + } catch (Exception e) { + e.printStackTrace(); + } + } else { + System.out.println("Checksums match!"); } - } else { - System.out.println("Checksums match!"); } } From 2b15313e0d2514c96c10620cb09f089ea9558b7a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 16:04:08 -0700 Subject: [PATCH 018/553] Clean up code --- .../java/org/dataone/hashstore/Client.java | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 4558038e..003c90b1 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -28,20 +28,6 @@ public class Client { private static HashStore hashStore; - enum DefaultHashAlgorithms { - MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); - - final String algoName; - - DefaultHashAlgorithms(String algo) { - algoName = algo; - } - - public String getName() { - return algoName; - } - } - public static void main(String[] args) throws Exception { // Get a HashStore Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); @@ -136,7 +122,7 @@ public static void main(String[] args) throws Exception { /** * Format an algorithm string value to be compatible with MessageDigest class * - * @param value + * @param value Algorithm value to format * @return Formatted algorithm value */ private static String formatAlgo(String value) { @@ -154,6 +140,7 @@ private static String formatAlgo(String value) { for (String element : SUPPORTED_HASH_ALGORITHMS) { if (element.equals(formattedAlgorithm)) { checkedAlgorithm = formattedAlgorithm; + break; } } return checkedAlgorithm; @@ -175,7 +162,6 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE // Get HashStore hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - return; } /** From 3e01ff8ec666aaccce28bbfc0a9f9d36e3fe3874 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 16:05:54 -0700 Subject: [PATCH 019/553] Revise testing print statement --- src/main/java/org/dataone/hashstore/Client.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 003c90b1..2821a83b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -67,8 +67,9 @@ public static void main(String[] args) throws Exception { String formattedAlgo = formatAlgo(checksumAlgorithm); Path setItemFilePath = Paths.get("/var/metacata/data/" + docid + "." + rev); + System.out.println(setItemFilePath); if (Files.exists(setItemFilePath)) { - System.out.println("File exists at: " + setItemFilePath); + System.out.println("File exists!"); // Retrieve object System.out.println("Retrieving object for guid: " + guid); From 5aa4886e4646ee7a65ef987ca8ecf20c6a75cede Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 28 Jul 2023 16:08:04 -0700 Subject: [PATCH 020/553] Fix typo in directory path --- src/main/java/org/dataone/hashstore/Client.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2821a83b..2e1f59b6 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -66,7 +66,7 @@ public static void main(String[] args) throws Exception { String checksumAlgorithm = resultSet.getString("checksum_algorithm"); String formattedAlgo = formatAlgo(checksumAlgorithm); - Path setItemFilePath = Paths.get("/var/metacata/data/" + docid + "." + rev); + Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); System.out.println(setItemFilePath); if (Files.exists(setItemFilePath)) { System.out.println("File exists!"); From 64a37c296dcda7baec7b0ce1eeee1052036710b0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 31 Jul 2023 09:38:11 -0700 Subject: [PATCH 021/553] Refactor 'formatAlgo()' to move testing along --- .../java/org/dataone/hashstore/Client.java | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2e1f59b6..7ecfbac5 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -127,22 +127,16 @@ public static void main(String[] args) throws Exception { * @return Formatted algorithm value */ private static String formatAlgo(String value) { - String checkedAlgorithm = ""; - String[] SUPPORTED_HASH_ALGORITHMS = {"SHA-1", "SHA-256", "SHA-384", "SHA-512", - "SHA-512/224", "SHA-512/256"}; - + // Temporary solution to format algorithm values + // Query: SELECT DISTINCT checksum_algorithm FROM systemmetadata; + // Output: MD5, SHA-256, SHA256, SHA-1, SHA1 String upperValue = value.toUpperCase(); - if (upperValue.equals("MD2") || upperValue.equals("MD5")) { - checkedAlgorithm = upperValue; + String checkedAlgorithm = upperValue; + if (upperValue.equals("SHA1")) { + checkedAlgorithm = "SHA-1"; } - - String[] parts = upperValue.split("(?<=\\D)(?=\\d)"); - String formattedAlgorithm = parts[0] + "-" + parts[1]; - for (String element : SUPPORTED_HASH_ALGORITHMS) { - if (element.equals(formattedAlgorithm)) { - checkedAlgorithm = formattedAlgorithm; - break; - } + if (upperValue.equals("SHA256")) { + checkedAlgorithm = "SHA-256"; } return checkedAlgorithm; } From 01a5c8a335d12290350d4e1a9e4fbc486f17af52 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 31 Jul 2023 10:00:18 -0700 Subject: [PATCH 022/553] Refactor main method to catch unexpected exceptions and log to file to help with debugging --- .../java/org/dataone/hashstore/Client.java | 76 +++++++++++-------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 7ecfbac5..cdb20533 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -71,40 +71,33 @@ public static void main(String[] args) throws Exception { if (Files.exists(setItemFilePath)) { System.out.println("File exists!"); - // Retrieve object - System.out.println("Retrieving object for guid: " + guid); - InputStream objstream = hashStore.retrieveObject(guid); - - // Get hex digest - System.out.println("Calculating hex digest with algorithm: " + formattedAlgo); - String streamDigest = calculateHexDigest(objstream, formattedAlgo); - - // If checksums don't match, write a .txt file - if (!streamDigest.equals(checksum)) { - // Create directory to store the error files - Path errorDirectory = Paths.get( - "/home/mok/testing/knbvm_hashstore/java/obj/errors" + try { + // Retrieve object + System.out.println("Retrieving object for guid: " + guid); + InputStream objstream = hashStore.retrieveObject(guid); + + // Get hex digest + System.out.println( + "Calculating hex digest with algorithm: " + formattedAlgo ); - Files.createDirectories(errorDirectory); - Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); - - String errMsg = "Obj retrieved (pid/guid): " + guid - + ". Checksums do not match, checksum from db: " + checksum - + ". Calculated digest: " + streamDigest + ". Algorithm: " - + formattedAlgo; - - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 - ) - )) { - writer.write(errMsg); - - } catch (Exception e) { - e.printStackTrace(); + String streamDigest = calculateHexDigest(objstream, formattedAlgo); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + + + String errMsg = "Obj retrieved (pid/guid): " + guid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + + formattedAlgo; + + logExceptionToFile(guid, errMsg); + } else { + System.out.println("Checksums match!"); } - } else { - System.out.println("Checksums match!"); + } catch (Exception e) { + String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + logExceptionToFile(guid, errMsg); } } } @@ -120,6 +113,25 @@ public static void main(String[] args) throws Exception { } + private static void logExceptionToFile(String guid, String errMsg) throws Exception { + // Create directory to store the error files + Path errorDirectory = Paths.get("/home/mok/testing/knbvm_hashstore/java/obj/errors"); + Files.createDirectories(errorDirectory); + Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter( + Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 + ) + )) { + writer.write(errMsg); + + } catch (Exception e) { + e.printStackTrace(); + + } + } + /** * Format an algorithm string value to be compatible with MessageDigest class * From 73e95d53edce2b067c10dcf7a4021a0da1238b9b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 31 Jul 2023 10:18:04 -0700 Subject: [PATCH 023/553] Improve exception handling and logging when retrieving and validating objects --- .../java/org/dataone/hashstore/Client.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index cdb20533..4b3646a1 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -2,6 +2,7 @@ import java.io.BufferedWriter; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; @@ -84,20 +85,20 @@ public static void main(String[] args) throws Exception { // If checksums don't match, write a .txt file if (!streamDigest.equals(checksum)) { - - String errMsg = "Obj retrieved (pid/guid): " + guid + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + formattedAlgo; - - logExceptionToFile(guid, errMsg); + logExceptionToFile(guid, errMsg, "checksum_mismatch"); } else { System.out.println("Checksums match!"); } + } catch (FileNotFoundException fnfe) { + String errMsg = "File not found: " + fnfe.fillInStackTrace(); + logExceptionToFile(guid, errMsg, "filenotfound"); } catch (Exception e) { String errMsg = "Unexpected Error: " + e.fillInStackTrace(); - logExceptionToFile(guid, errMsg); + logExceptionToFile(guid, errMsg, "general"); } } } @@ -113,9 +114,12 @@ public static void main(String[] args) throws Exception { } - private static void logExceptionToFile(String guid, String errMsg) throws Exception { + private static void logExceptionToFile(String guid, String errMsg, String directory) + throws Exception { // Create directory to store the error files - Path errorDirectory = Paths.get("/home/mok/testing/knbvm_hashstore/java/obj/errors"); + Path errorDirectory = Paths.get( + "/home/mok/testing/knbvm_hashstore/java/obj/errors/" + directory + ); Files.createDirectories(errorDirectory); Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); From 82bd973a0fc6db034ec040f72f3393456d877ec6 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 31 Jul 2023 13:11:04 -0700 Subject: [PATCH 024/553] Add initial code to test parallelizing retrieving objects from knbvm_hashstore via .parallelStream() --- .../java/org/dataone/hashstore/Client.java | 77 ++++++++++++------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 4b3646a1..23ab9f9d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -12,7 +12,10 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Properties; import java.sql.Connection; import java.sql.DriverManager; @@ -58,6 +61,8 @@ public static void main(String[] args) throws Exception { ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm + // and create a List to loop over + List> resultObjList = new ArrayList<>(); while (resultSet.next()) { System.out.println("Calling resultSet.next()"); String guid = resultSet.getString("guid"); @@ -68,40 +73,60 @@ public static void main(String[] args) throws Exception { String formattedAlgo = formatAlgo(checksumAlgorithm); Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); - System.out.println(setItemFilePath); if (Files.exists(setItemFilePath)) { - System.out.println("File exists!"); + System.out.println("File exists: " + setItemFilePath); + Map resultObj = new HashMap<>(); + resultObj.put("pid", guid); + resultObj.put("algorithm", formattedAlgo); + resultObj.put("checksum", checksum); + resultObjList.add(resultObj); + } + } + + // Loop over List + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + String algorithm = item.get("algorithm"); + String checksum = item.get("checksum"); + // Retrieve object + System.out.println("Retrieving object for guid: " + guid); + InputStream objstream = hashStore.retrieveObject(guid); + + // Get hex digest + System.out.println("Calculating hex digest with algorithm: " + algorithm); + String streamDigest = calculateHexDigest(objstream, algorithm); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + String errMsg = "Obj retrieved (pid/guid): " + guid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; + logExceptionToFile(guid, errMsg, "checksum_mismatch"); + } else { + System.out.println("Checksums match!"); + } + + } catch (FileNotFoundException fnfe) { + String errMsg = "File not found: " + fnfe.fillInStackTrace(); try { - // Retrieve object - System.out.println("Retrieving object for guid: " + guid); - InputStream objstream = hashStore.retrieveObject(guid); - - // Get hex digest - System.out.println( - "Calculating hex digest with algorithm: " + formattedAlgo - ); - String streamDigest = calculateHexDigest(objstream, formattedAlgo); - - // If checksums don't match, write a .txt file - if (!streamDigest.equals(checksum)) { - String errMsg = "Obj retrieved (pid/guid): " + guid - + ". Checksums do not match, checksum from db: " + checksum - + ". Calculated digest: " + streamDigest + ". Algorithm: " - + formattedAlgo; - logExceptionToFile(guid, errMsg, "checksum_mismatch"); - } else { - System.out.println("Checksums match!"); - } - } catch (FileNotFoundException fnfe) { - String errMsg = "File not found: " + fnfe.fillInStackTrace(); logExceptionToFile(guid, errMsg, "filenotfound"); } catch (Exception e) { - String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + e.printStackTrace(); + } + + } catch (Exception e) { + String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + try { logExceptionToFile(guid, errMsg, "general"); + } catch (Exception e1) { + e1.printStackTrace(); } + } - } + }); // Close resources resultSet.close(); From a905b84e7c98382b4b7dedb9b359926c8dca20e5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 31 Jul 2023 14:23:07 -0700 Subject: [PATCH 025/553] Remove limit on SQL query to test retrieving all data objects stored in knbvm_hashstore --- src/main/java/org/dataone/hashstore/Client.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 23ab9f9d..59e1896e 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid LIMIT 10000;"; + + " ON identifier.guid = systemmetadata.guid;"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm From 8727053355ef16d62d13c80fffba4640ddec4608 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 31 Jul 2023 14:53:35 -0700 Subject: [PATCH 026/553] Clean up code --- .../java/org/dataone/hashstore/Client.java | 93 ++++++++++--------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 59e1896e..06a92dbe 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -84,49 +84,7 @@ public static void main(String[] args) throws Exception { } } - // Loop over List - resultObjList.parallelStream().forEach(item -> { - String guid = null; - try { - guid = item.get("pid"); - String algorithm = item.get("algorithm"); - String checksum = item.get("checksum"); - // Retrieve object - System.out.println("Retrieving object for guid: " + guid); - InputStream objstream = hashStore.retrieveObject(guid); - - // Get hex digest - System.out.println("Calculating hex digest with algorithm: " + algorithm); - String streamDigest = calculateHexDigest(objstream, algorithm); - - // If checksums don't match, write a .txt file - if (!streamDigest.equals(checksum)) { - String errMsg = "Obj retrieved (pid/guid): " + guid - + ". Checksums do not match, checksum from db: " + checksum - + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; - logExceptionToFile(guid, errMsg, "checksum_mismatch"); - } else { - System.out.println("Checksums match!"); - } - - } catch (FileNotFoundException fnfe) { - String errMsg = "File not found: " + fnfe.fillInStackTrace(); - try { - logExceptionToFile(guid, errMsg, "filenotfound"); - } catch (Exception e) { - e.printStackTrace(); - } - - } catch (Exception e) { - String errMsg = "Unexpected Error: " + e.fillInStackTrace(); - try { - logExceptionToFile(guid, errMsg, "general"); - } catch (Exception e1) { - e1.printStackTrace(); - } - - } - }); + retrieveAndValidateObjs(resultObjList); // Close resources resultSet.close(); @@ -139,12 +97,55 @@ public static void main(String[] args) throws Exception { } + private static void retrieveAndValidateObjs(List> resultObjList) { + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + String algorithm = item.get("algorithm"); + String checksum = item.get("checksum"); + // Retrieve object + System.out.println("Retrieving object for guid: " + guid); + InputStream objstream = hashStore.retrieveObject(guid); + + // Get hex digest + System.out.println("Calculating hex digest with algorithm: " + algorithm); + String streamDigest = calculateHexDigest(objstream, algorithm); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + String errMsg = "Obj retrieved (pid/guid): " + guid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; + logExceptionToFile(guid, errMsg, "obj/errors/checksum_mismatch"); + } else { + System.out.println("Checksums match!"); + } + + } catch (FileNotFoundException fnfe) { + String errMsg = "File not found: " + fnfe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "obj/errors/filenotfound"); + } catch (Exception e) { + e.printStackTrace(); + } + + } catch (Exception e) { + String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "obj/errors/general"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } + }); + } + private static void logExceptionToFile(String guid, String errMsg, String directory) throws Exception { // Create directory to store the error files - Path errorDirectory = Paths.get( - "/home/mok/testing/knbvm_hashstore/java/obj/errors/" + directory - ); + Path errorDirectory = Paths.get("/home/mok/testing/knbvm_hashstore/java/" + directory); Files.createDirectories(errorDirectory); Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); From 7e241cae397eac4860cfda8a62526f42d4a6666d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 14:06:36 -0700 Subject: [PATCH 027/553] Add overload method declarations in 'HashStore' interface and update 'FileHashStore' class --- src/main/java/org/dataone/hashstore/HashStore.java | 12 ++++++++++++ .../hashstore/filehashstore/FileHashStore.java | 3 +++ 2 files changed, 15 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 8357ac58..b2e7f50d 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -59,6 +59,18 @@ ObjectInfo storeObject( String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + ObjectInfo storeObject( + InputStream object, String pid, String checksum, String checksumAlgorithm + ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + + ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) + throws NoSuchAlgorithmException, IOException, PidObjectExistsException, + RuntimeException; + + ObjectInfo storeObject(InputStream object, String pid, long objSize) + throws NoSuchAlgorithmException, IOException, PidObjectExistsException, + RuntimeException; + /** * The `storeMetadata` method is responsible for adding/updating metadata (ex. `sysmeta`) to * the HashStore by using a given InputStream, a persistent identifier (`pid`) and metadata diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d69ecd7a..428da24b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -512,6 +512,7 @@ private ObjectInfo syncPubObject( /** * Overload method for storeObject with an additionalAlgorithm */ + @Override public ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { logFileHashStore.debug( @@ -534,6 +535,7 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA /** * Overload method for storeObject with just a checksum and checksumAlgorithm */ + @Override public ObjectInfo storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { @@ -557,6 +559,7 @@ public ObjectInfo storeObject( /** * Overload method for storeObject with size of object to validate */ + @Override public ObjectInfo storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { logFileHashStore.debug( From fc1c4aac2c56436714ec6703be4f563e78d434af Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 14:09:07 -0700 Subject: [PATCH 028/553] Modify client app to store objects with checksums --- .../java/org/dataone/hashstore/Client.java | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 06a92dbe..e1b8e4d1 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid;"; + + " ON identifier.guid = systemmetadata.guid LIMIT 10000;"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm @@ -79,12 +79,14 @@ public static void main(String[] args) throws Exception { resultObj.put("pid", guid); resultObj.put("algorithm", formattedAlgo); resultObj.put("checksum", checksum); + resultObj.put("path", setItemFilePath.toString()); resultObjList.add(resultObj); } } - retrieveAndValidateObjs(resultObjList); + // retrieveAndValidateObjs(resultObjList); + storeObjectsWithChecksum(resultObjList); // Close resources resultSet.close(); @@ -97,6 +99,39 @@ public static void main(String[] args) throws Exception { } + private static void storeObjectsWithChecksum(List> resultObjList) { + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + InputStream objStream = Files.newInputStream(Paths.get(item.get("path"))); + String algorithm = item.get("algorithm"); + String checksum = item.get("checksum"); + + // Store object + System.out.println("Storing object for guid: " + guid); + hashStore.storeObject(objStream, guid, checksum, algorithm); + + } catch (IOException ioe) { + String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "obj/store_errors/io"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } catch (Exception e) { + String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "obj/store_errors/general"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } + }); + } + private static void retrieveAndValidateObjs(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; @@ -104,6 +139,7 @@ private static void retrieveAndValidateObjs(List> resultObjL guid = item.get("pid"); String algorithm = item.get("algorithm"); String checksum = item.get("checksum"); + // Retrieve object System.out.println("Retrieving object for guid: " + guid); InputStream objstream = hashStore.retrieveObject(guid); From 4efd67c50d20443959dc5beb239e54e195043c01 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 14:15:50 -0700 Subject: [PATCH 029/553] Change client store directory for testing purposes --- src/main/java/org/dataone/hashstore/Client.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index e1b8e4d1..c0006d1a 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -34,7 +34,7 @@ public class Client { public static void main(String[] args) throws Exception { // Get a HashStore - Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); + Path storePath = Paths.get("/home/mok/testing/knbvm_testlog"); initializeHashStore(storePath); // Load metacat db yaml From 9ed589d4e6f5c4a0865be05e6925eae9b5b00f34 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 14:36:30 -0700 Subject: [PATCH 030/553] Change HashStore directory and remove limit on SQL query to prepare for storing all objects --- src/main/java/org/dataone/hashstore/Client.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index c0006d1a..6f9cfd1d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -34,7 +34,7 @@ public class Client { public static void main(String[] args) throws Exception { // Get a HashStore - Path storePath = Paths.get("/home/mok/testing/knbvm_testlog"); + Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); initializeHashStore(storePath); // Load metacat db yaml @@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid LIMIT 10000;"; + + " ON identifier.guid = systemmetadata.guid"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm From 64479ac6fc13a6b7e9d75c0ee0cf7a7c8dbcdc43 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 16:35:07 -0700 Subject: [PATCH 031/553] Add new catch statement to specifically log for when checksums do not match --- src/main/java/org/dataone/hashstore/Client.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 6f9cfd1d..afcec56e 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -112,6 +112,14 @@ private static void storeObjectsWithChecksum(List> resultObj System.out.println("Storing object for guid: " + guid); hashStore.storeObject(objStream, guid, checksum, algorithm); + } catch (IllegalArgumentException iae) { + String errMsg = "Unexpected Error: " + iae.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "obj/store_errors/illegalargument"); + } catch (Exception e1) { + e1.printStackTrace(); + } + } catch (IOException ioe) { String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); try { From 0379136cad86708aa89640304ed832ec01cb4bc4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 16:38:01 -0700 Subject: [PATCH 032/553] Change client app to run shorter test (100000 objects) before attempting entire metacata obj directory --- src/main/java/org/dataone/hashstore/Client.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index afcec56e..20c35447 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -34,7 +34,7 @@ public class Client { public static void main(String[] args) throws Exception { // Get a HashStore - Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); + Path storePath = Paths.get("/home/mok/testing/knbvm_testlog"); initializeHashStore(storePath); // Load metacat db yaml @@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid"; + + " ON identifier.guid = systemmetadata.guid LIMIT 100000;"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm From 1049bc3800d14ede3c73d6e9d9d25d30de0e167c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 16:51:17 -0700 Subject: [PATCH 033/553] Refactor HashStore path to be a static variable --- src/main/java/org/dataone/hashstore/Client.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 20c35447..2c6ffffc 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -31,10 +31,10 @@ public class Client { private static HashStore hashStore; + private static Path storePath = Paths.get("/home/mok/testing/knbvm_testlog"); public static void main(String[] args) throws Exception { // Get a HashStore - Path storePath = Paths.get("/home/mok/testing/knbvm_testlog"); initializeHashStore(storePath); // Load metacat db yaml @@ -161,7 +161,7 @@ private static void retrieveAndValidateObjs(List> resultObjL String errMsg = "Obj retrieved (pid/guid): " + guid + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; - logExceptionToFile(guid, errMsg, "obj/errors/checksum_mismatch"); + logExceptionToFile(guid, errMsg, "obj/retrieve_errors/checksum_mismatch"); } else { System.out.println("Checksums match!"); } @@ -169,7 +169,7 @@ private static void retrieveAndValidateObjs(List> resultObjL } catch (FileNotFoundException fnfe) { String errMsg = "File not found: " + fnfe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/errors/filenotfound"); + logExceptionToFile(guid, errMsg, "obj/retrieve_errors/filenotfound"); } catch (Exception e) { e.printStackTrace(); } @@ -177,7 +177,7 @@ private static void retrieveAndValidateObjs(List> resultObjL } catch (Exception e) { String errMsg = "Unexpected Error: " + e.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/errors/general"); + logExceptionToFile(guid, errMsg, "obj/retrieve_errors/general"); } catch (Exception e1) { e1.printStackTrace(); } @@ -189,7 +189,7 @@ private static void retrieveAndValidateObjs(List> resultObjL private static void logExceptionToFile(String guid, String errMsg, String directory) throws Exception { // Create directory to store the error files - Path errorDirectory = Paths.get("/home/mok/testing/knbvm_hashstore/java/" + directory); + Path errorDirectory = storePath.resolve(directory); Files.createDirectories(errorDirectory); Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); From 17612e1ac094ac3b272d1928b34f7048f6d78486 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 1 Aug 2023 16:54:55 -0700 Subject: [PATCH 034/553] Fix error directory path --- src/main/java/org/dataone/hashstore/Client.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2c6ffffc..59309e9b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -115,7 +115,7 @@ private static void storeObjectsWithChecksum(List> resultObj } catch (IllegalArgumentException iae) { String errMsg = "Unexpected Error: " + iae.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/store_errors/illegalargument"); + logExceptionToFile(guid, errMsg, "java/store_errors/illegalargument"); } catch (Exception e1) { e1.printStackTrace(); } @@ -123,7 +123,7 @@ private static void storeObjectsWithChecksum(List> resultObj } catch (IOException ioe) { String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/store_errors/io"); + logExceptionToFile(guid, errMsg, "java/store_errors/io"); } catch (Exception e1) { e1.printStackTrace(); } @@ -131,7 +131,7 @@ private static void storeObjectsWithChecksum(List> resultObj } catch (Exception e) { String errMsg = "Unexpected Error: " + e.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/store_errors/general"); + logExceptionToFile(guid, errMsg, "java/store_errors/general"); } catch (Exception e1) { e1.printStackTrace(); } @@ -161,7 +161,7 @@ private static void retrieveAndValidateObjs(List> resultObjL String errMsg = "Obj retrieved (pid/guid): " + guid + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; - logExceptionToFile(guid, errMsg, "obj/retrieve_errors/checksum_mismatch"); + logExceptionToFile(guid, errMsg, "java/retrieve_errors/checksum_mismatch"); } else { System.out.println("Checksums match!"); } @@ -169,7 +169,7 @@ private static void retrieveAndValidateObjs(List> resultObjL } catch (FileNotFoundException fnfe) { String errMsg = "File not found: " + fnfe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/retrieve_errors/filenotfound"); + logExceptionToFile(guid, errMsg, "java/retrieve_errors/filenotfound"); } catch (Exception e) { e.printStackTrace(); } @@ -177,7 +177,7 @@ private static void retrieveAndValidateObjs(List> resultObjL } catch (Exception e) { String errMsg = "Unexpected Error: " + e.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "obj/retrieve_errors/general"); + logExceptionToFile(guid, errMsg, "java/retrieve_errors/general"); } catch (Exception e1) { e1.printStackTrace(); } From 80d5b1e4880b73f34dd407c5f81d014c14f15531 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 2 Aug 2023 08:52:21 -0700 Subject: [PATCH 035/553] Remove sql limit and change directory to test store all objects --- src/main/java/org/dataone/hashstore/Client.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 59309e9b..e2c5cee1 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -31,7 +31,7 @@ public class Client { private static HashStore hashStore; - private static Path storePath = Paths.get("/home/mok/testing/knbvm_testlog"); + private static Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); public static void main(String[] args) throws Exception { // Get a HashStore @@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid LIMIT 100000;"; + + " ON identifier.guid = systemmetadata.guid;"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm From d05a4b4ddafdc33cb66b3d03b93b0eabdb7f69c7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 6 Aug 2023 10:31:14 -0700 Subject: [PATCH 036/553] Explicitly catch 'PidObjectExistsException' to debug errors --- src/main/java/org/dataone/hashstore/Client.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index e2c5cee1..b91701ce 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -25,6 +25,7 @@ import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.exceptions.HashStoreFactoryException; +import org.dataone.hashstore.exceptions.PidObjectExistsException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -112,6 +113,14 @@ private static void storeObjectsWithChecksum(List> resultObj System.out.println("Storing object for guid: " + guid); hashStore.storeObject(objStream, guid, checksum, algorithm); + } catch (PidObjectExistsException poee) { + String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/store_errors/illegalargument"); + } catch (Exception e1) { + e1.printStackTrace(); + } + } catch (IllegalArgumentException iae) { String errMsg = "Unexpected Error: " + iae.fillInStackTrace(); try { From a36459c4ce9c724d649c9700382ced4b4be584fd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 6 Aug 2023 10:38:53 -0700 Subject: [PATCH 037/553] Print exception when object already exists instead of logging a file to reduce overhead --- src/main/java/org/dataone/hashstore/Client.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index b91701ce..89fb3fc6 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -114,12 +114,13 @@ private static void storeObjectsWithChecksum(List> resultObj hashStore.storeObject(objStream, guid, checksum, algorithm); } catch (PidObjectExistsException poee) { - String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); - try { - logExceptionToFile(guid, errMsg, "java/store_errors/illegalargument"); - } catch (Exception e1) { - e1.printStackTrace(); - } + poee.printStackTrace(); + // String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); + // try { + // logExceptionToFile(guid, errMsg, "java/store_errors/pidobjectexists"); + // } catch (Exception e1) { + // e1.printStackTrace(); + // } } catch (IllegalArgumentException iae) { String errMsg = "Unexpected Error: " + iae.fillInStackTrace(); From 34c1d4b3f22eec48a4b5d1b9095b6eefefc730f4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 6 Aug 2023 10:46:52 -0700 Subject: [PATCH 038/553] Change exception message for when pid already exists --- src/main/java/org/dataone/hashstore/Client.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 89fb3fc6..9bcfec97 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -114,7 +114,7 @@ private static void storeObjectsWithChecksum(List> resultObj hashStore.storeObject(objStream, guid, checksum, algorithm); } catch (PidObjectExistsException poee) { - poee.printStackTrace(); + System.out.println("Object already exists for pid: " + guid); // String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); // try { // logExceptionToFile(guid, errMsg, "java/store_errors/pidobjectexists"); From 73b9aacfaae008dfc204f5e1d254b70430bd5b4f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 10 Aug 2023 10:17:56 -0700 Subject: [PATCH 039/553] Update java client to test storing metadata --- .../java/org/dataone/hashstore/Client.java | 54 +++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 9bcfec97..9b43002b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -72,22 +72,34 @@ public static void main(String[] args) throws Exception { String checksum = resultSet.getString("checksum"); String checksumAlgorithm = resultSet.getString("checksum_algorithm"); String formattedAlgo = formatAlgo(checksumAlgorithm); - - Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); + String formatId = resultSet.getString("object_format"); + + // Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); + // if (Files.exists(setItemFilePath)) { + // System.out.println("File exists: " + setItemFilePath); + // Map resultObj = new HashMap<>(); + // resultObj.put("pid", guid); + // resultObj.put("algorithm", formattedAlgo); + // resultObj.put("checksum", checksum); + // resultObj.put("path", setItemFilePath.toString()); + + // resultObjList.add(resultObj); + // } + Path setItemFilePath = Paths.get("/var/metacat/documents/" + docid + "." + rev); if (Files.exists(setItemFilePath)) { System.out.println("File exists: " + setItemFilePath); Map resultObj = new HashMap<>(); resultObj.put("pid", guid); - resultObj.put("algorithm", formattedAlgo); - resultObj.put("checksum", checksum); resultObj.put("path", setItemFilePath.toString()); + resultObj.put("namespace", formatId); resultObjList.add(resultObj); } } // retrieveAndValidateObjs(resultObjList); - storeObjectsWithChecksum(resultObjList); + // storeObjectsWithChecksum(resultObjList); + storeMetadataFromDb(resultObjList); // Close resources resultSet.close(); @@ -196,6 +208,38 @@ private static void retrieveAndValidateObjs(List> resultObjL }); } + private static void storeMetadataFromDb(List> resultObjList) { + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + InputStream objStream = Files.newInputStream(Paths.get(item.get("path"))); + String formatId = item.get("namespace"); + + // Store object + System.out.println("Storing metadata for guid: " + guid); + hashStore.storeMetadata(objStream, guid, formatId); + + } catch (IOException ioe) { + String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/store_metadata_errors/io"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } catch (Exception e) { + String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/store_metadata_errors/general"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } + }); + } + private static void logExceptionToFile(String guid, String errMsg, String directory) throws Exception { // Create directory to store the error files From ce048e17646ea8ee39566c48a4f0735e90c0b5f5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 15 Aug 2023 11:56:38 -0700 Subject: [PATCH 040/553] Update client app with code to delete objects from list in HashStore --- .../java/org/dataone/hashstore/Client.java | 70 +++++++++++++++---- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 9b43002b..bec048b3 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -58,12 +58,13 @@ public static void main(String[] args) throws Exception { String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid;"; + + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid;"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm // and create a List to loop over List> resultObjList = new ArrayList<>(); + while (resultSet.next()) { System.out.println("Calling resultSet.next()"); String guid = resultSet.getString("guid"); @@ -74,32 +75,33 @@ public static void main(String[] args) throws Exception { String formattedAlgo = formatAlgo(checksumAlgorithm); String formatId = resultSet.getString("object_format"); - // Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); - // if (Files.exists(setItemFilePath)) { - // System.out.println("File exists: " + setItemFilePath); - // Map resultObj = new HashMap<>(); - // resultObj.put("pid", guid); - // resultObj.put("algorithm", formattedAlgo); - // resultObj.put("checksum", checksum); - // resultObj.put("path", setItemFilePath.toString()); - - // resultObjList.add(resultObj); - // } - Path setItemFilePath = Paths.get("/var/metacat/documents/" + docid + "." + rev); + Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); if (Files.exists(setItemFilePath)) { System.out.println("File exists: " + setItemFilePath); Map resultObj = new HashMap<>(); resultObj.put("pid", guid); + resultObj.put("algorithm", formattedAlgo); + resultObj.put("checksum", checksum); resultObj.put("path", setItemFilePath.toString()); - resultObj.put("namespace", formatId); resultObjList.add(resultObj); } + // Path setItemFilePath = Paths.get("/var/metacat/documents/" + docid + "." + rev); + // if (Files.exists(setItemFilePath)) { + // System.out.println("File exists: " + setItemFilePath); + // Map resultObj = new HashMap<>(); + // resultObj.put("pid", guid); + // resultObj.put("path", setItemFilePath.toString()); + // resultObj.put("namespace", formatId); + + // resultObjList.add(resultObj); + // } } // retrieveAndValidateObjs(resultObjList); // storeObjectsWithChecksum(resultObjList); - storeMetadataFromDb(resultObjList); + deleteObjectsFromStore(resultObjList); + // storeMetadataFromDb(resultObjList); // Close resources resultSet.close(); @@ -208,6 +210,44 @@ private static void retrieveAndValidateObjs(List> resultObjL }); } + private static void deleteObjectsFromStore(List> resultObjList) { + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + + // Delete object + System.out.println("Deleting object for guid: " + guid); + hashStore.deleteObject(guid); + + } catch (FileNotFoundException fnfe) { + String errMsg = "Unexpected Error: " + fnfe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/delete_errors/filenotfound"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } catch (IOException ioe) { + String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/delete_errors/io"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } catch (Exception e) { + String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/delete_errors/general"); + } catch (Exception e1) { + e1.printStackTrace(); + } + + } + }); + } + private static void storeMetadataFromDb(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; From 246b5c5562da4aa2b7f9a646855a0452fc3c9475 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 21 Aug 2023 10:03:11 -0700 Subject: [PATCH 041/553] Add new dependency (org.apache.commons.cli) to facilitate parsing arguments --- pom.xml | 5 + .../java/org/dataone/hashstore/Client.java | 173 +++++++++++------- 2 files changed, 110 insertions(+), 68 deletions(-) diff --git a/pom.xml b/pom.xml index f1fb2130..445792e3 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,11 @@ commons-logging 1.2 + + commons-cli + commons-cli + 1.5.0 + junit junit diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index bec048b3..28b2a465 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -24,6 +24,12 @@ import javax.xml.bind.DatatypeConverter; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; import org.dataone.hashstore.exceptions.HashStoreFactoryException; import org.dataone.hashstore.exceptions.PidObjectExistsException; @@ -35,81 +41,112 @@ public class Client { private static Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); public static void main(String[] args) throws Exception { - // Get a HashStore - initializeHashStore(storePath); - - // Load metacat db yaml - System.out.println("Loading metacat db yaml."); - Path pgdbYaml = storePath.resolve("pgdb.yaml"); - File pgdbYamlFile = pgdbYaml.toFile(); - ObjectMapper om = new ObjectMapper(new YAMLFactory()); - HashMap pgdbYamlProperties = om.readValue(pgdbYamlFile, HashMap.class); - // Get db values - String url = (String) pgdbYamlProperties.get("db_uri"); - String user = (String) pgdbYamlProperties.get("db_user"); - String password = (String) pgdbYamlProperties.get("db_password"); + if (args.length == 0) { + System.out.println("No arguments provided. Use flag '-h' for help."); + } + Options options = new Options(); + options.addOption("h", "help", false, "Show help options."); + options.addOption("knbvm", "knbvmtestadc", false, "Specify testing with knbvm"); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + boolean knbvmTest = false; try { - System.out.println("Connecting to metacat db."); - // Setup metacat db access - Class.forName("org.postgresql.Driver"); // Force driver to register itself - Connection connection = DriverManager.getConnection(url, user, password); - Statement statement = connection.createStatement(); - String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," - + " systemmetadata.object_format, systemmetadata.checksum," - + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid;"; - ResultSet resultSet = statement.executeQuery(sqlQuery); - - // For each row, get guid, docid, rev, checksum and checksum_algorithm - // and create a List to loop over - List> resultObjList = new ArrayList<>(); - - while (resultSet.next()) { - System.out.println("Calling resultSet.next()"); - String guid = resultSet.getString("guid"); - String docid = resultSet.getString("docid"); - int rev = resultSet.getInt("rev"); - String checksum = resultSet.getString("checksum"); - String checksumAlgorithm = resultSet.getString("checksum_algorithm"); - String formattedAlgo = formatAlgo(checksumAlgorithm); - String formatId = resultSet.getString("object_format"); - - Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); - if (Files.exists(setItemFilePath)) { - System.out.println("File exists: " + setItemFilePath); - Map resultObj = new HashMap<>(); - resultObj.put("pid", guid); - resultObj.put("algorithm", formattedAlgo); - resultObj.put("checksum", checksum); - resultObj.put("path", setItemFilePath.toString()); - - resultObjList.add(resultObj); + cmd = parser.parse(options, args); + + if (cmd.hasOption("h")) { + formatter.printHelp("CommandLineApp", options); + } else { + if (cmd.hasOption("knbvm")) { + System.out.println( + "Testing with KNBVM values. Please ensure all config files present." + ); } - // Path setItemFilePath = Paths.get("/var/metacat/documents/" + docid + "." + rev); - // if (Files.exists(setItemFilePath)) { - // System.out.println("File exists: " + setItemFilePath); - // Map resultObj = new HashMap<>(); - // resultObj.put("pid", guid); - // resultObj.put("path", setItemFilePath.toString()); - // resultObj.put("namespace", formatId); - - // resultObjList.add(resultObj); - // } } + } catch (ParseException e) { + System.err.println("Error parsing cli arguments: " + e.getMessage()); + formatter.printHelp("CommandLineApp", options); + } + + if (knbvmTest) { + // Get a HashStore + initializeHashStore(storePath); + + // Load metacat db yaml + System.out.println("Loading metacat db yaml."); + Path pgdbYaml = storePath.resolve("pgdb.yaml"); + File pgdbYamlFile = pgdbYaml.toFile(); + ObjectMapper om = new ObjectMapper(new YAMLFactory()); + HashMap pgdbYamlProperties = om.readValue(pgdbYamlFile, HashMap.class); + // Get db values + String url = (String) pgdbYamlProperties.get("db_uri"); + String user = (String) pgdbYamlProperties.get("db_user"); + String password = (String) pgdbYamlProperties.get("db_password"); + + try { + System.out.println("Connecting to metacat db."); + // Setup metacat db access + Class.forName("org.postgresql.Driver"); // Force driver to register itself + Connection connection = DriverManager.getConnection(url, user, password); + Statement statement = connection.createStatement(); + String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + + " systemmetadata.object_format, systemmetadata.checksum," + + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" + + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid;"; + ResultSet resultSet = statement.executeQuery(sqlQuery); + + // For each row, get guid, docid, rev, checksum and checksum_algorithm + // and create a List to loop over + List> resultObjList = new ArrayList<>(); + + while (resultSet.next()) { + System.out.println("Calling resultSet.next()"); + String guid = resultSet.getString("guid"); + String docid = resultSet.getString("docid"); + int rev = resultSet.getInt("rev"); + String checksum = resultSet.getString("checksum"); + String checksumAlgorithm = resultSet.getString("checksum_algorithm"); + String formattedAlgo = formatAlgo(checksumAlgorithm); + String formatId = resultSet.getString("object_format"); + + Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); + if (Files.exists(setItemFilePath)) { + System.out.println("File exists: " + setItemFilePath); + Map resultObj = new HashMap<>(); + resultObj.put("pid", guid); + resultObj.put("algorithm", formattedAlgo); + resultObj.put("checksum", checksum); + resultObj.put("path", setItemFilePath.toString()); + + resultObjList.add(resultObj); + } + // Path setItemFilePath = Paths.get("/var/metacat/documents/" + docid + "." + rev); + // if (Files.exists(setItemFilePath)) { + // System.out.println("File exists: " + setItemFilePath); + // Map resultObj = new HashMap<>(); + // resultObj.put("pid", guid); + // resultObj.put("path", setItemFilePath.toString()); + // resultObj.put("namespace", formatId); + + // resultObjList.add(resultObj); + // } + } - // retrieveAndValidateObjs(resultObjList); - // storeObjectsWithChecksum(resultObjList); - deleteObjectsFromStore(resultObjList); - // storeMetadataFromDb(resultObjList); + // retrieveAndValidateObjs(resultObjList); + // storeObjectsWithChecksum(resultObjList); + // deleteObjectsFromStore(resultObjList); + // storeMetadataFromDb(resultObjList); - // Close resources - resultSet.close(); - statement.close(); - connection.close(); + // Close resources + resultSet.close(); + statement.close(); + connection.close(); - } catch (Exception e) { - e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } } } From 4a8f741651773f985519fc7d70486944cf5f4c78 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 21 Aug 2023 10:50:32 -0700 Subject: [PATCH 042/553] Add command line options to work with HashStore --- .../java/org/dataone/hashstore/Client.java | 72 +++++++++++++++++-- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 28b2a465..9b8bdb22 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -44,20 +44,19 @@ public static void main(String[] args) throws Exception { if (args.length == 0) { System.out.println("No arguments provided. Use flag '-h' for help."); } - Options options = new Options(); - options.addOption("h", "help", false, "Show help options."); - options.addOption("knbvm", "knbvmtestadc", false, "Specify testing with knbvm"); + // Add HashStore client options + Options options = addHashStoreOptions(); + // Begin parsing options CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; - boolean knbvmTest = false; - try { cmd = parser.parse(options, args); if (cmd.hasOption("h")) { formatter.printHelp("CommandLineApp", options); + } else { if (cmd.hasOption("knbvm")) { System.out.println( @@ -70,6 +69,7 @@ public static void main(String[] args) throws Exception { formatter.printHelp("CommandLineApp", options); } + boolean knbvmTest = false; if (knbvmTest) { // Get a HashStore initializeHashStore(storePath); @@ -151,6 +151,68 @@ public static void main(String[] args) throws Exception { } + /** + * Create an options object to use with Apache Commons CLI library to manage command line + * options. + */ + private static Options addHashStoreOptions() { + Options options = new Options(); + options.addOption("h", "help", false, "Show help options."); + // Mandatory option + options.addOption("store", "storepath", true, "Path to HashStore."); + // HashStore creation options + options.addOption("chs", "createhashstore", false, "Create a HashStore."); + options.addOption("dp", "createhashstore", true, "Depth of HashStore."); + options.addOption("wp", "createhashstore", true, "Width of HashStore."); + options.addOption("ap", "createhashstore", true, "Algorithm of HashStore."); + options.addOption("nsp", "createhashstore", true, "Default metadata namespace"); + // Public API options + options.addOption( + "getchecksum", "client_getchecksum", false, + "Get the hex digest of a data object in a HashStore" + ); + options.addOption( + "storeobject", "client_storeobject", false, "Store object to a HashStore." + ); + options.addOption( + "storemetadata", "client_storemetadata", false, "Store metadata to a HashStore" + ); + options.addOption( + "retrieveobject", "client_retrieveobject", false, "Retrieve an object from a HashStore." + ); + options.addOption( + "retrievemetadata", "client_retrievemetadata", false, + "Retrieve a metadata obj from a HashStore." + ); + options.addOption( + "deleteobject", "client_deleteobject", false, "Delete an object from a HashStore." + ); + options.addOption( + "deletemetadata", "client_deletemetadata", false, + "Delete a metadata obj from a HashStore." + ); + options.addOption("pid", "pidguid", true, "PID or GUID of object."); + options.addOption("path", "filepath", true, "Path to object."); + options.addOption("algo", "objectalgo", true, "Algorithm to use in calculations."); + options.addOption("checksum", "obj_checksum", true, "Checksum of object."); + options.addOption( + "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied." + ); + options.addOption("size", "obj_size", true, "Size of object"); + options.addOption("format_id", "metadata_format", true, "Metadata format_id/namespace"); + // knbvm (test.arcticdata.io) options + options.addOption("knbvm", "knbvmtestadc", false, "Specify testing with knbvm."); + options.addOption("nobj", "numberofobj", false, "Number of objects to work with."); + options.addOption("sdir", "storedirectory", true, "Location of objects to convert."); + options.addOption("stype", "storetype", true, "Type of store 'objects' or 'metadata'"); + options.addOption("sts", "storetohs", false, "Flag to store objs to a HashStore"); + options.addOption( + "rav", "retandval", false, "Retrieve and validate objs from a HashStore." + ); + options.addOption("dfs", "delfromhs", false, "Delete objs from a HashStore."); + return options; + } + private static void storeObjectsWithChecksum(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; From 827e3e6b1f41a3d3570f330e43d1779bfc83cef7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 21 Aug 2023 11:58:52 -0700 Subject: [PATCH 043/553] Code check-in: Initial refactor java client for handling public api calls or testing with knbvm --- .../java/org/dataone/hashstore/Client.java | 156 ++++++++++-------- 1 file changed, 85 insertions(+), 71 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 9b8bdb22..0111c256 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -33,6 +33,8 @@ import org.dataone.hashstore.exceptions.HashStoreFactoryException; import org.dataone.hashstore.exceptions.PidObjectExistsException; +import com.fasterxml.jackson.core.exc.StreamReadException; +import com.fasterxml.jackson.databind.DatabindException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -56,99 +58,111 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption("h")) { formatter.printHelp("CommandLineApp", options); - } else { + // Get store path and get HashStore + if (!cmd.hasOption("store")) { + String err_msg = + "HashStore store path must be supplied, use '-store=[path/to/store]'"; + throw new IllegalArgumentException(err_msg); + } + Path storePath = Paths.get(cmd.getOptionValue("store")); + // Confirm HashStore + initializeHashStore(storePath); + + // Parse options if (cmd.hasOption("knbvm")) { System.out.println( "Testing with KNBVM values. Please ensure all config files present." ); + // TODO: Pass to method based on getOptions + String action = "sts"; + String objType = "data"; // Or "documents" + testWithKnbvm(action, objType); } } } catch (ParseException e) { System.err.println("Error parsing cli arguments: " + e.getMessage()); formatter.printHelp("CommandLineApp", options); } + } - boolean knbvmTest = false; - if (knbvmTest) { - // Get a HashStore - initializeHashStore(storePath); - - // Load metacat db yaml - System.out.println("Loading metacat db yaml."); - Path pgdbYaml = storePath.resolve("pgdb.yaml"); - File pgdbYamlFile = pgdbYaml.toFile(); - ObjectMapper om = new ObjectMapper(new YAMLFactory()); - HashMap pgdbYamlProperties = om.readValue(pgdbYamlFile, HashMap.class); - // Get db values - String url = (String) pgdbYamlProperties.get("db_uri"); - String user = (String) pgdbYamlProperties.get("db_user"); - String password = (String) pgdbYamlProperties.get("db_password"); + private static void testWithKnbvm(String actionFlag, String objType) throws IOException, + StreamReadException, DatabindException { + // Load metacat db yaml + System.out.println("Loading metacat db yaml."); + Path pgdbYaml = storePath.resolve("pgdb.yaml"); + File pgdbYamlFile = pgdbYaml.toFile(); + ObjectMapper om = new ObjectMapper(new YAMLFactory()); + HashMap pgdbYamlProperties = om.readValue(pgdbYamlFile, HashMap.class); + // Get db values + String url = (String) pgdbYamlProperties.get("db_uri"); + String user = (String) pgdbYamlProperties.get("db_user"); + String password = (String) pgdbYamlProperties.get("db_password"); - try { - System.out.println("Connecting to metacat db."); - // Setup metacat db access - Class.forName("org.postgresql.Driver"); // Force driver to register itself - Connection connection = DriverManager.getConnection(url, user, password); - Statement statement = connection.createStatement(); - String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," - + " systemmetadata.object_format, systemmetadata.checksum," - + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid;"; - ResultSet resultSet = statement.executeQuery(sqlQuery); - - // For each row, get guid, docid, rev, checksum and checksum_algorithm - // and create a List to loop over - List> resultObjList = new ArrayList<>(); - - while (resultSet.next()) { - System.out.println("Calling resultSet.next()"); - String guid = resultSet.getString("guid"); - String docid = resultSet.getString("docid"); - int rev = resultSet.getInt("rev"); - String checksum = resultSet.getString("checksum"); - String checksumAlgorithm = resultSet.getString("checksum_algorithm"); - String formattedAlgo = formatAlgo(checksumAlgorithm); - String formatId = resultSet.getString("object_format"); - - Path setItemFilePath = Paths.get("/var/metacat/data/" + docid + "." + rev); - if (Files.exists(setItemFilePath)) { - System.out.println("File exists: " + setItemFilePath); - Map resultObj = new HashMap<>(); + try { + System.out.println("Connecting to metacat db."); + // Setup metacat db access + Class.forName("org.postgresql.Driver"); // Force driver to register itself + Connection connection = DriverManager.getConnection(url, user, password); + Statement statement = connection.createStatement(); + String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + + " systemmetadata.object_format, systemmetadata.checksum," + + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" + + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid;"; + ResultSet resultSet = statement.executeQuery(sqlQuery); + + // For each row, get guid, docid, rev, checksum and checksum_algorithm + // and create a List to loop over + List> resultObjList = new ArrayList<>(); + while (resultSet.next()) { + System.out.println("Calling resultSet.next()"); + String guid = resultSet.getString("guid"); + String docid = resultSet.getString("docid"); + int rev = resultSet.getInt("rev"); + String checksum = resultSet.getString("checksum"); + String checksumAlgorithm = resultSet.getString("checksum_algorithm"); + String formattedChecksumAlgo = formatAlgo(checksumAlgorithm); + String formatId = resultSet.getString("object_format"); + + Path setItemFilePath = Paths.get( + "/var/metacat/" + objType + "/" + docid + "." + rev + ); + + if (Files.exists(setItemFilePath)) { + Map resultObj = new HashMap<>(); + if (objType == "data") { resultObj.put("pid", guid); - resultObj.put("algorithm", formattedAlgo); + resultObj.put("algorithm", formattedChecksumAlgo); resultObj.put("checksum", checksum); resultObj.put("path", setItemFilePath.toString()); - - resultObjList.add(resultObj); } - // Path setItemFilePath = Paths.get("/var/metacat/documents/" + docid + "." + rev); - // if (Files.exists(setItemFilePath)) { - // System.out.println("File exists: " + setItemFilePath); - // Map resultObj = new HashMap<>(); - // resultObj.put("pid", guid); - // resultObj.put("path", setItemFilePath.toString()); - // resultObj.put("namespace", formatId); - - // resultObjList.add(resultObj); - // } + if (objType == "documents") { + resultObj.put("pid", guid); + resultObj.put("path", setItemFilePath.toString()); + resultObj.put("namespace", formatId); + } + resultObjList.add(resultObj); } + } + // Check option + if (actionFlag == "sts") { + // TODO: Refactor/update methods to be object/metadata specific // retrieveAndValidateObjs(resultObjList); // storeObjectsWithChecksum(resultObjList); // deleteObjectsFromStore(resultObjList); // storeMetadataFromDb(resultObjList); + System.out.println("Placeholder"); + } - // Close resources - resultSet.close(); - statement.close(); - connection.close(); + // Close resources + resultSet.close(); + statement.close(); + connection.close(); - } catch (Exception e) { - e.printStackTrace(); - } + } catch (Exception e) { + e.printStackTrace(); } - } /** @@ -162,10 +176,10 @@ private static Options addHashStoreOptions() { options.addOption("store", "storepath", true, "Path to HashStore."); // HashStore creation options options.addOption("chs", "createhashstore", false, "Create a HashStore."); - options.addOption("dp", "createhashstore", true, "Depth of HashStore."); - options.addOption("wp", "createhashstore", true, "Width of HashStore."); - options.addOption("ap", "createhashstore", true, "Algorithm of HashStore."); - options.addOption("nsp", "createhashstore", true, "Default metadata namespace"); + options.addOption("dp", "storedepth", true, "Depth of HashStore."); + options.addOption("wp", "storewidth", true, "Width of HashStore."); + options.addOption("ap", "storealgo", true, "Algorithm of HashStore."); + options.addOption("nsp", "storenamespace", true, "Default metadata namespace"); // Public API options options.addOption( "getchecksum", "client_getchecksum", false, From 8136343694810784543e2b5d9c445526f7760340 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 21 Aug 2023 16:03:12 -0700 Subject: [PATCH 044/553] Add knbvm test methods for retrieving and deleting metadata, add missing documentation and cleanup comments --- .../java/org/dataone/hashstore/Client.java | 323 ++++++++++++++---- 1 file changed, 253 insertions(+), 70 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 0111c256..a4fe15dd 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -61,13 +61,13 @@ public static void main(String[] args) throws Exception { } else { // Get store path and get HashStore if (!cmd.hasOption("store")) { - String err_msg = + String errMsg = "HashStore store path must be supplied, use '-store=[path/to/store]'"; - throw new IllegalArgumentException(err_msg); + throw new IllegalArgumentException(errMsg); } Path storePath = Paths.get(cmd.getOptionValue("store")); // Confirm HashStore - initializeHashStore(storePath); + initializeHashStoreForKnb(storePath); // Parse options if (cmd.hasOption("knbvm")) { @@ -86,6 +86,15 @@ public static void main(String[] args) throws Exception { } } + /** + * Entry point for working with test data found in knbvm (test.arcticdata.io) + * + * @param actionFlag String representing a knbvm test-related method to call. + * @param objType "data" (objects) or "documents" (metadata). + * @throws IOException + * @throws StreamReadException + * @throws DatabindException + */ private static void testWithKnbvm(String actionFlag, String objType) throws IOException, StreamReadException, DatabindException { // Load metacat db yaml @@ -124,35 +133,43 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx String formattedChecksumAlgo = formatAlgo(checksumAlgorithm); String formatId = resultSet.getString("object_format"); + if (objType != "data" || objType != "documents") { + String errMsg = "HashStoreClient - objType must be 'data' or 'documents'"; + throw new IllegalArgumentException(errMsg); + } Path setItemFilePath = Paths.get( "/var/metacat/" + objType + "/" + docid + "." + rev ); if (Files.exists(setItemFilePath)) { Map resultObj = new HashMap<>(); - if (objType == "data") { - resultObj.put("pid", guid); - resultObj.put("algorithm", formattedChecksumAlgo); - resultObj.put("checksum", checksum); - resultObj.put("path", setItemFilePath.toString()); - } - if (objType == "documents") { - resultObj.put("pid", guid); - resultObj.put("path", setItemFilePath.toString()); - resultObj.put("namespace", formatId); - } + resultObj.put("pid", guid); + resultObj.put("algorithm", formattedChecksumAlgo); + resultObj.put("checksum", checksum); + resultObj.put("path", setItemFilePath.toString()); + resultObj.put("namespace", formatId); resultObjList.add(resultObj); } } - // Check option - if (actionFlag == "sts") { - // TODO: Refactor/update methods to be object/metadata specific - // retrieveAndValidateObjs(resultObjList); - // storeObjectsWithChecksum(resultObjList); - // deleteObjectsFromStore(resultObjList); - // storeMetadataFromDb(resultObjList); - System.out.println("Placeholder"); + // Check options + if (actionFlag == "sts" && objType == "data") { + storeObjsWithChecksumFromDb(resultObjList); + } + if (actionFlag == "sts" && objType == "documents") { + storeMetadataFromDb(resultObjList); + } + if (actionFlag == "rav" && objType == "data") { + retrieveAndValidateObjs(resultObjList); + } + if (actionFlag == "rav" && objType == "documents") { + retrieveAndValidateMetadata(resultObjList); + } + if (actionFlag == "dfs" && objType == "data") { + deleteObjectsFromStore(resultObjList); + } + if (actionFlag == "dfs" && objType == "documents") { + deleteMetadataFromStore(resultObjList); } // Close resources @@ -166,8 +183,8 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx } /** - * Create an options object to use with Apache Commons CLI library to manage command line - * options. + * Returns an options object to use with Apache Commons CLI library to manage command line + * options for HashStore client. */ private static Options addHashStoreOptions() { Options options = new Options(); @@ -227,7 +244,13 @@ private static Options addHashStoreOptions() { return options; } - private static void storeObjectsWithChecksum(List> resultObjList) { + /** + * Store objects to a HashStore with a checksum and checksum algorithm + * + * @param resultObjList List containing items with the following properties: 'pid', 'path', + * 'algorithm', 'checksum' + */ + private static void storeObjsWithChecksumFromDb(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -241,42 +264,47 @@ private static void storeObjectsWithChecksum(List> resultObj hashStore.storeObject(objStream, guid, checksum, algorithm); } catch (PidObjectExistsException poee) { - System.out.println("Object already exists for pid: " + guid); - // String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); - // try { - // logExceptionToFile(guid, errMsg, "java/store_errors/pidobjectexists"); - // } catch (Exception e1) { - // e1.printStackTrace(); - // } + String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/store_obj_errors/pidobjectexists"); + } catch (Exception e) { + e.printStackTrace(); + } } catch (IllegalArgumentException iae) { String errMsg = "Unexpected Error: " + iae.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/store_errors/illegalargument"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/store_obj_errors/illegalargument"); + } catch (Exception e) { + e.printStackTrace(); } } catch (IOException ioe) { String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/store_errors/io"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/store_obj_errors/io"); + } catch (Exception e) { + e.printStackTrace(); } - } catch (Exception e) { - String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + } catch (Exception ge) { + String errMsg = "Unexpected Error: " + ge.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/store_errors/general"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/store_obj_errors/general"); + } catch (Exception e) { + e.printStackTrace(); } } }); } + /** + * Retrieve objects from a HashStore and validate its contents by comparing checksums. + * + * @param resultObjList List containing items with the following properties: 'pid', 'algorithm', + * 'checksum' + */ private static void retrieveAndValidateObjs(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; @@ -287,18 +315,19 @@ private static void retrieveAndValidateObjs(List> resultObjL // Retrieve object System.out.println("Retrieving object for guid: " + guid); - InputStream objstream = hashStore.retrieveObject(guid); + InputStream objStream = hashStore.retrieveObject(guid); // Get hex digest System.out.println("Calculating hex digest with algorithm: " + algorithm); - String streamDigest = calculateHexDigest(objstream, algorithm); + String streamDigest = calculateHexDigest(objStream, algorithm); + objStream.close(); // If checksums don't match, write a .txt file if (!streamDigest.equals(checksum)) { String errMsg = "Obj retrieved (pid/guid): " + guid + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; - logExceptionToFile(guid, errMsg, "java/retrieve_errors/checksum_mismatch"); + logExceptionToFile(guid, errMsg, "java/retrieve_obj_errors/checksum_mismatch"); } else { System.out.println("Checksums match!"); } @@ -306,23 +335,36 @@ private static void retrieveAndValidateObjs(List> resultObjL } catch (FileNotFoundException fnfe) { String errMsg = "File not found: " + fnfe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/retrieve_errors/filenotfound"); + logExceptionToFile(guid, errMsg, "java/retrieve_obj_errors/filenotfound"); + } catch (Exception e) { + e.printStackTrace(); + } + + } catch (IOException ioe) { + String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/retrieve_obj_errors/io"); } catch (Exception e) { e.printStackTrace(); } - } catch (Exception e) { - String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + } catch (Exception ge) { + String errMsg = "Unexpected Error: " + ge.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/retrieve_errors/general"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/retrieve_obj_errors/general"); + } catch (Exception e) { + e.printStackTrace(); } } }); } + /** + * Deletes a list of objects from a HashStore + * + * @param resultObjList List containing items with the following property: 'pid' + */ private static void deleteObjectsFromStore(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; @@ -336,31 +378,37 @@ private static void deleteObjectsFromStore(List> resultObjLi } catch (FileNotFoundException fnfe) { String errMsg = "Unexpected Error: " + fnfe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/delete_errors/filenotfound"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/delete_obj_errors/filenotfound"); + } catch (Exception e) { + e.printStackTrace(); } } catch (IOException ioe) { String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/delete_errors/io"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/delete_obj_errors/io"); + } catch (Exception e) { + e.printStackTrace(); } - } catch (Exception e) { - String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + } catch (Exception ge) { + String errMsg = "Unexpected Error: " + ge.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/delete_errors/general"); - } catch (Exception e1) { - e1.printStackTrace(); + logExceptionToFile(guid, errMsg, "java/delete_obj_errors/general"); + } catch (Exception e) { + e.printStackTrace(); } } }); } + /** + * Store a list containing info about metadata to a HashStore + * + * @param resultObjList List containing items that have the following properties: 'pid', 'path' + * and 'namespace' + */ private static void storeMetadataFromDb(List> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; @@ -373,26 +421,154 @@ private static void storeMetadataFromDb(List> resultObjList) System.out.println("Storing metadata for guid: " + guid); hashStore.storeMetadata(objStream, guid, formatId); + } catch (IllegalArgumentException iae) { + String errMsg = "Unexpected Error: " + iae.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/store_metadata_errors/illegalargument"); + } catch (Exception e) { + e.printStackTrace(); + } + } catch (IOException ioe) { String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); try { logExceptionToFile(guid, errMsg, "java/store_metadata_errors/io"); - } catch (Exception e1) { - e1.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); } - } catch (Exception e) { - String errMsg = "Unexpected Error: " + e.fillInStackTrace(); + } catch (Exception ge) { + String errMsg = "Unexpected Error: " + ge.fillInStackTrace(); try { logExceptionToFile(guid, errMsg, "java/store_metadata_errors/general"); - } catch (Exception e1) { - e1.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + + } + }); + } + + + /** + * Retrieve metadata from a HashStore and validate its contents by comparing checksums. + * + * @param resultObjList List containing items with the following properties: 'pid', 'namespace', + * 'algorithm', 'checksum' + */ + private static void retrieveAndValidateMetadata(List> resultObjList) { + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + String algorithm = item.get("algorithm"); + String checksum = item.get("checksum"); + String formatId = item.get("namespace"); + + // Retrieve object + System.out.println("Retrieving metadata for guid: " + guid); + InputStream metadataStream = hashStore.retrieveMetadata(guid, formatId); + + // Get hex digest + System.out.println("Calculating hex digest with algorithm: " + algorithm); + String streamDigest = calculateHexDigest(metadataStream, algorithm); + metadataStream.close(); + + // If checksums don't match, write a .txt file + if (!streamDigest.equals(checksum)) { + String errMsg = "Metadata retrieved (pid/guid): " + guid + + ". Checksums do not match, checksum from db: " + checksum + + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; + logExceptionToFile( + guid, errMsg, "java/retrieve_metadata_errors/checksum_mismatch" + ); + } else { + System.out.println("Checksums match!"); + } + + } catch (FileNotFoundException fnfe) { + String errMsg = "File not found: " + fnfe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/retrieve_metadata_errors/filenotfound"); + } catch (Exception e) { + e.printStackTrace(); + } + + } catch (IOException ioe) { + String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/retrieve_metadata_errors/io"); + } catch (Exception e) { + e.printStackTrace(); + } + + } catch (Exception ge) { + String errMsg = "Unexpected Error: " + ge.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/retrieve_metadata_errors/general"); + } catch (Exception e) { + e.printStackTrace(); + } + + } + }); + } + + + /** + * Deletes a list of metadata from a HashStore + * + * @param resultObjList List containing items with the following property: 'pid' + */ + private static void deleteMetadataFromStore(List> resultObjList) { + resultObjList.parallelStream().forEach(item -> { + String guid = null; + try { + guid = item.get("pid"); + String formatId = item.get("namespace"); + + // Delete object + System.out.println("Deleting metadata for guid: " + guid); + hashStore.deleteMetadata(guid, formatId); + + } catch (FileNotFoundException fnfe) { + String errMsg = "Unexpected Error: " + fnfe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/delete_metadata_errors/filenotfound"); + } catch (Exception e) { + e.printStackTrace(); + } + + } catch (IOException ioe) { + String errMsg = "Unexpected Error: " + ioe.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/delete_metadata_errors/io"); + } catch (Exception e) { + e.printStackTrace(); + } + + } catch (Exception ge) { + String errMsg = "Unexpected Error: " + ge.fillInStackTrace(); + try { + logExceptionToFile(guid, errMsg, "java/delete_metadata_errors/general"); + } catch (Exception e) { + e.printStackTrace(); } } }); } + // Utility methods for testing in Knbvm (test.arcticdata.io) + + /** + * Log a plain text file with the guid/pid as the file name with a message. + * + * @param guid Pid/guid for which an exception was encountered. + * @param errMsg Message to write into text file. + * @param directory Directory within HashStore to log error (txt) files. + * @throws Exception + */ private static void logExceptionToFile(String guid, String errMsg, String directory) throws Exception { // Create directory to store the error files @@ -434,7 +610,14 @@ private static String formatAlgo(String value) { return checkedAlgorithm; } - private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, + /** + * Initialize HashStore for testing in knbvm with default values. + * + * @param storePath Path to store. + * @throws HashStoreFactoryException + * @throws IOException + */ + private static void initializeHashStoreForKnb(Path storePath) throws HashStoreFactoryException, IOException { // Initialize HashStore String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; From b8ca784d16edabc2b245ce7a43c1a9415a317a9a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 21 Aug 2023 16:15:19 -0700 Subject: [PATCH 045/553] Add parsing code to get values to test with knbvm and clean up code to conform to java standards --- .../java/org/dataone/hashstore/Client.java | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index a4fe15dd..44be972d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -74,9 +74,17 @@ public static void main(String[] args) throws Exception { System.out.println( "Testing with KNBVM values. Please ensure all config files present." ); - // TODO: Pass to method based on getOptions - String action = "sts"; - String objType = "data"; // Or "documents" + String action = null; + if (cmd.hasOption("sts")) { + action = "sts"; + } + if (cmd.hasOption("rav")) { + action = "rav"; + } + if (cmd.hasOption("dfs")) { + action = "dfs"; + } + String objType = cmd.getOptionValue("stype"); testWithKnbvm(action, objType); } } @@ -133,9 +141,11 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx String formattedChecksumAlgo = formatAlgo(checksumAlgorithm); String formatId = resultSet.getString("object_format"); - if (objType != "data" || objType != "documents") { - String errMsg = "HashStoreClient - objType must be 'data' or 'documents'"; - throw new IllegalArgumentException(errMsg); + if (!objType.equals("data")) { + if (!objType.equals("documents")) { + String errMsg = "HashStoreClient - objType must be 'data' or 'documents'"; + throw new IllegalArgumentException(errMsg); + } } Path setItemFilePath = Paths.get( "/var/metacat/" + objType + "/" + docid + "." + rev @@ -153,22 +163,22 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx } // Check options - if (actionFlag == "sts" && objType == "data") { + if (actionFlag.equals("sts") && objType.equals("data")) { storeObjsWithChecksumFromDb(resultObjList); } - if (actionFlag == "sts" && objType == "documents") { + if (actionFlag.equals("sts") && objType.equals("documents")) { storeMetadataFromDb(resultObjList); } - if (actionFlag == "rav" && objType == "data") { + if (actionFlag.equals("rav") && objType.equals("data")) { retrieveAndValidateObjs(resultObjList); } - if (actionFlag == "rav" && objType == "documents") { + if (actionFlag.equals("rav") && objType.equals("documents")) { retrieveAndValidateMetadata(resultObjList); } - if (actionFlag == "dfs" && objType == "data") { + if (actionFlag.equals("dfs") && objType.equals("data")) { deleteObjectsFromStore(resultObjList); } - if (actionFlag == "dfs" && objType == "documents") { + if (actionFlag.equals("dfs") && objType.equals("documents")) { deleteMetadataFromStore(resultObjList); } From 02c0afb1133b63b84bfdf90771f18d2e2a5f3e1a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 21 Aug 2023 16:42:58 -0700 Subject: [PATCH 046/553] Add parsing code to call public api methods 'getHexDigest', 'storeObject' and 'storeMetadata' --- .../java/org/dataone/hashstore/Client.java | 68 ++++++++++++++++++- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 44be972d..09589964 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -40,7 +40,7 @@ public class Client { private static HashStore hashStore; - private static Path storePath = Paths.get("/home/mok/testing/knbvm_hashstore"); + private static Path storePath; public static void main(String[] args) throws Exception { if (args.length == 0) { @@ -65,7 +65,7 @@ public static void main(String[] args) throws Exception { "HashStore store path must be supplied, use '-store=[path/to/store]'"; throw new IllegalArgumentException(errMsg); } - Path storePath = Paths.get(cmd.getOptionValue("store")); + storePath = Paths.get(cmd.getOptionValue("store")); // Confirm HashStore initializeHashStoreForKnb(storePath); @@ -86,7 +86,56 @@ public static void main(String[] args) throws Exception { } String objType = cmd.getOptionValue("stype"); testWithKnbvm(action, objType); + + } else if (cmd.hasOption("getchecksum")) { + String pid = cmd.getOptionValue("pid"); + String algo = cmd.getOptionValue("algo"); + ensureNotNull(pid, "-pid"); + ensureNotNull(algo, "-algo"); + String hexDigest = hashStore.getHexDigest(pid, algo); + System.out.println("Hex Digest (pid: " + pid + ", algorithm: " + algo + "):"); + System.out.println(hexDigest); + + } else if (cmd.hasOption("storeobject")) { + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + String additional_algo = cmd.getOptionValue("algo"); + String checksum = cmd.getOptionValue("checksum"); + String checksum_algo = cmd.getOptionValue("checksum_algo"); + long size = Long.parseLong(cmd.getOptionValue("size")); + ensureNotNull(pid, "-pid"); + ensureNotNull(path, "-path"); + + InputStream pidObjStream = Files.newInputStream(path); + ObjectInfo objInfo = hashStore.storeObject( + pidObjStream, pid, additional_algo, checksum, checksum_algo, size + ); + System.out.println("Object Info for pid (" + pid + "):"); + System.out.println(objInfo); + + } else if (cmd.hasOption("storemetadata")) { + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(path, "-path"); + ensureNotNull(formatId, "-format_id"); + + InputStream pidObjStream = Files.newInputStream(path); + String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); + System.out.println("Metadata Content Identifier:"); + System.out.println(metadataCid); + + } else if (cmd.hasOption("retrieveobject")) { + // TODO + } else if (cmd.hasOption("retrievemetadata")) { + // TODO + } else if (cmd.hasOption("deleteobject")) { + // TODO + } else if (cmd.hasOption("deletemetadata")) { + // TODO } + } } catch (ParseException e) { System.err.println("Error parsing cli arguments: " + e.getMessage()); @@ -94,6 +143,21 @@ public static void main(String[] args) throws Exception { } } + /** + * Checks whether a given object is null and throws an exception if so + * + * @param object Object to check + * @param argument Value that is being checked + * @param method Calling method + */ + private static void ensureNotNull(Object object, String argument) { + if (object == null) { + String errMsg = "HashStoreClient - " + argument + " cannot be null."; + throw new NullPointerException(errMsg); + } + } + + /** * Entry point for working with test data found in knbvm (test.arcticdata.io) * From c1881692db32124c368ac9a84bce0f0146a2bfe7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 09:42:58 -0700 Subject: [PATCH 047/553] Refactor client to initialize HashStore based on config file or create a new HashStore, and reorganize client codebase --- .../java/org/dataone/hashstore/Client.java | 522 ++++++++++-------- 1 file changed, 304 insertions(+), 218 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 09589964..1755df9b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -44,120 +44,282 @@ public class Client { public static void main(String[] args) throws Exception { if (args.length == 0) { - System.out.println("No arguments provided. Use flag '-h' for help."); + System.out.println("HashStoreClient - No arguments provided. Use flag '-h' for help."); } // Add HashStore client options - Options options = addHashStoreOptions(); + Options options = addHashStoreClientOptions(); - // Begin parsing options + // Begin parsing arguments CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; try { cmd = parser.parse(options, args); + // First check if user is looking for help if (cmd.hasOption("h")) { formatter.printHelp("CommandLineApp", options); + return; + } + + // Then get store path and initialize HashStore + if (!cmd.hasOption("store")) { + String errMsg = + "HashStoreClient - store path must be supplied, use '-store=[path/to/store]'"; + throw new IllegalArgumentException(errMsg); + } + // Create or initialize HashStore + if (!cmd.hasOption("chs")) { + String storePath = cmd.getOptionValue("store"); + String storeDepth = cmd.getOptionValue("dp"); + String storeWidth = cmd.getOptionValue("wp"); + String storeAlgorithm = cmd.getOptionValue("storealgo"); + String storeNameSpace = cmd.getOptionValue("storenamespace"); + createNewHashStore( + storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace + ); + return; } else { - // Get store path and get HashStore - if (!cmd.hasOption("store")) { - String errMsg = - "HashStore store path must be supplied, use '-store=[path/to/store]'"; - throw new IllegalArgumentException(errMsg); - } storePath = Paths.get(cmd.getOptionValue("store")); - // Confirm HashStore - initializeHashStoreForKnb(storePath); - - // Parse options - if (cmd.hasOption("knbvm")) { - System.out.println( - "Testing with KNBVM values. Please ensure all config files present." - ); - String action = null; - if (cmd.hasOption("sts")) { - action = "sts"; - } - if (cmd.hasOption("rav")) { - action = "rav"; - } - if (cmd.hasOption("dfs")) { - action = "dfs"; - } - String objType = cmd.getOptionValue("stype"); - testWithKnbvm(action, objType); - - } else if (cmd.hasOption("getchecksum")) { - String pid = cmd.getOptionValue("pid"); - String algo = cmd.getOptionValue("algo"); - ensureNotNull(pid, "-pid"); - ensureNotNull(algo, "-algo"); - String hexDigest = hashStore.getHexDigest(pid, algo); - System.out.println("Hex Digest (pid: " + pid + ", algorithm: " + algo + "):"); - System.out.println(hexDigest); - - } else if (cmd.hasOption("storeobject")) { - String pid = cmd.getOptionValue("pid"); - Path path = Paths.get(cmd.getOptionValue("path")); - String additional_algo = cmd.getOptionValue("algo"); - String checksum = cmd.getOptionValue("checksum"); - String checksum_algo = cmd.getOptionValue("checksum_algo"); - long size = Long.parseLong(cmd.getOptionValue("size")); - ensureNotNull(pid, "-pid"); - ensureNotNull(path, "-path"); - - InputStream pidObjStream = Files.newInputStream(path); - ObjectInfo objInfo = hashStore.storeObject( - pidObjStream, pid, additional_algo, checksum, checksum_algo, size - ); - System.out.println("Object Info for pid (" + pid + "):"); - System.out.println(objInfo); - - } else if (cmd.hasOption("storemetadata")) { - String pid = cmd.getOptionValue("pid"); - Path path = Paths.get(cmd.getOptionValue("path")); - String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(path, "-path"); - ensureNotNull(formatId, "-format_id"); - - InputStream pidObjStream = Files.newInputStream(path); - String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); - System.out.println("Metadata Content Identifier:"); - System.out.println(metadataCid); - - } else if (cmd.hasOption("retrieveobject")) { - // TODO - } else if (cmd.hasOption("retrievemetadata")) { - // TODO - } else if (cmd.hasOption("deleteobject")) { - // TODO - } else if (cmd.hasOption("deletemetadata")) { - // TODO + Path hashstoreYaml = storePath.resolve("hashstore.yaml"); + if (!Files.exists(hashstoreYaml)) { + String errMsg = "HashStoreClient - Missing hashstore.yaml at storePath (" + + storePath + + "), please create a store with '-chs'. Use '-h' to see options."; + throw new FileNotFoundException(errMsg); } + initializeHashStore(storePath); + } + // Parse remaining options + if (cmd.hasOption("knbvm")) { + System.out.println( + "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore.yaml." + ); + + String action = null; + if (cmd.hasOption("sts")) { + action = "sts"; + } + if (cmd.hasOption("rav")) { + action = "rav"; + } + if (cmd.hasOption("dfs")) { + action = "dfs"; + } + String objType = cmd.getOptionValue("stype"); + testWithKnbvm(action, objType); + return; + + } else if (cmd.hasOption("getchecksum")) { + String pid = cmd.getOptionValue("pid"); + String algo = cmd.getOptionValue("algo"); + ensureNotNull(pid, "-pid"); + ensureNotNull(algo, "-algo"); + String hexDigest = hashStore.getHexDigest(pid, algo); + System.out.println("Hex Digest (pid: " + pid + ", algorithm: " + algo + "):"); + System.out.println(hexDigest); + + } else if (cmd.hasOption("storeobject")) { + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + String additional_algo = cmd.getOptionValue("algo"); + String checksum = cmd.getOptionValue("checksum"); + String checksum_algo = cmd.getOptionValue("checksum_algo"); + long size = Long.parseLong(cmd.getOptionValue("size")); + ensureNotNull(pid, "-pid"); + ensureNotNull(path, "-path"); + + InputStream pidObjStream = Files.newInputStream(path); + ObjectInfo objInfo = hashStore.storeObject( + pidObjStream, pid, additional_algo, checksum, checksum_algo, size + ); + pidObjStream.close(); + System.out.println("Object Info for pid (" + pid + "):"); + System.out.println(objInfo); + + } else if (cmd.hasOption("storemetadata")) { + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(path, "-path"); + ensureNotNull(formatId, "-format_id"); + + InputStream pidObjStream = Files.newInputStream(path); + String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); + pidObjStream.close(); + System.out.println("Metadata Content Identifier:"); + System.out.println(metadataCid); + + } else if (cmd.hasOption("retrieveobject")) { + // TODO + } else if (cmd.hasOption("retrievemetadata")) { + // TODO + } else if (cmd.hasOption("deleteobject")) { + // TODO + } else if (cmd.hasOption("deletemetadata")) { + // TODO } + return; + } catch (ParseException e) { System.err.println("Error parsing cli arguments: " + e.getMessage()); formatter.printHelp("CommandLineApp", options); } } + + // Configuration methods to initialize HashStore client + /** - * Checks whether a given object is null and throws an exception if so + * Returns an options object to use with Apache Commons CLI library to manage command line + * options for HashStore client. + */ + private static Options addHashStoreClientOptions() { + Options options = new Options(); + options.addOption("h", "help", false, "Show help options."); + // Mandatory option + options.addOption("store", "storepath", true, "Path to HashStore."); + // HashStore creation options + options.addOption("chs", "createhashstore", false, "Create a HashStore."); + options.addOption("dp", "storedepth", true, "Depth of HashStore."); + options.addOption("wp", "storewidth", true, "Width of HashStore."); + options.addOption("ap", "storealgo", true, "Algorithm of HashStore."); + options.addOption("nsp", "storenamespace", true, "Default metadata namespace"); + // Public API options + options.addOption( + "getchecksum", "client_getchecksum", false, + "Get the hex digest of a data object in a HashStore" + ); + options.addOption( + "storeobject", "client_storeobject", false, "Store object to a HashStore." + ); + options.addOption( + "storemetadata", "client_storemetadata", false, "Store metadata to a HashStore" + ); + options.addOption( + "retrieveobject", "client_retrieveobject", false, "Retrieve an object from a HashStore." + ); + options.addOption( + "retrievemetadata", "client_retrievemetadata", false, + "Retrieve a metadata obj from a HashStore." + ); + options.addOption( + "deleteobject", "client_deleteobject", false, "Delete an object from a HashStore." + ); + options.addOption( + "deletemetadata", "client_deletemetadata", false, + "Delete a metadata obj from a HashStore." + ); + options.addOption("pid", "pidguid", true, "PID or GUID of object."); + options.addOption("path", "filepath", true, "Path to object."); + options.addOption("algo", "objectalgo", true, "Algorithm to use in calculations."); + options.addOption("checksum", "obj_checksum", true, "Checksum of object."); + options.addOption( + "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied." + ); + options.addOption("size", "obj_size", true, "Size of object"); + options.addOption("format_id", "metadata_format", true, "Metadata format_id/namespace"); + // knbvm (test.arcticdata.io) options + options.addOption("knbvm", "knbvmtestadc", false, "Specify testing with knbvm."); + options.addOption("nobj", "numberofobj", false, "Number of objects to work with."); + options.addOption("sdir", "storedirectory", true, "Location of objects to convert."); + options.addOption("stype", "storetype", true, "Type of store 'objects' or 'metadata'"); + options.addOption("sts", "storetohs", false, "Flag to store objs to a HashStore"); + options.addOption( + "rav", "retandval", false, "Retrieve and validate objs from a HashStore." + ); + options.addOption("dfs", "delfromhs", false, "Delete objs from a HashStore."); + return options; + } + + /** + * Create a new HashStore with the given properties. + * + * @param storePath Path to HashStore. + * @param storeDepth Depth of store. + * @param storeWidth Width of store. + * @param storeAlgorithm Algorithm to use. + * @param storeNameSpace Default metadata namespace. + * @throws HashStoreFactoryException When unable to get HashStore from factory. + */ + private static void createNewHashStore( + String storePath, String storeDepth, String storeWidth, String storeAlgorithm, + String storeNameSpace + ) throws HashStoreFactoryException, IOException { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", storePath); + storeProperties.setProperty("storeDepth", storeDepth); + storeProperties.setProperty("storeWidth", storeWidth); + storeProperties.setProperty("storeAlgorithm", storeAlgorithm); + storeProperties.setProperty("storeMetadataNamespace", storeNameSpace); + + // Get HashStore + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + } + + /** + * Get the properties of HashStore from 'hashstore.yaml' * - * @param object Object to check - * @param argument Value that is being checked - * @param method Calling method + * @param storePath Path to root of store + * @return HashMap of the properties */ - private static void ensureNotNull(Object object, String argument) { - if (object == null) { - String errMsg = "HashStoreClient - " + argument + " cannot be null."; - throw new NullPointerException(errMsg); + private static HashMap loadHashStoreYaml(Path storePath) { + Path hashStoreYamlPath = storePath.resolve("hashstore.yaml"); + File hashStoreYamlFile = hashStoreYamlPath.toFile(); + ObjectMapper om = new ObjectMapper(new YAMLFactory()); + HashMap hsProperties = new HashMap<>(); + + try { + HashMap hashStoreYamlProperties = om.readValue(hashStoreYamlFile, HashMap.class); + String yamlStorePath = (String) hashStoreYamlProperties.get("store_path"); + hsProperties.put("storePath", Paths.get(yamlStorePath)); + hsProperties.put("storeDepth", hashStoreYamlProperties.get("store_depth")); + hsProperties.put("storeWidth", hashStoreYamlProperties.get("store_width")); + hsProperties.put("storeAlgorithm", hashStoreYamlProperties.get("store_algorithm")); + hsProperties.put( + "storeMetadataNamespace", hashStoreYamlProperties.get("store_metadata_namespace") + ); + + } catch (IOException ioe) { + ioe.printStackTrace(); } + + return hsProperties; + } + + /** + * Initialize HashStore to use in client app. HashStore must already exist or an exception will + * be thrown. + * + * @param storePath Path to store. + * @throws HashStoreFactoryException If unable to initialize HashStore. + * @throws IOException If 'hashstore.yaml' cannot be loaded. + * @throws FileNotFoundException When 'hashstore.yaml' is missing. + */ + private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, + IOException, FileNotFoundException { + // Load properties and get HashStore + HashMap hsProperties = loadHashStoreYaml(storePath); + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", (String) hsProperties.get("storePath")); + storeProperties.setProperty("storeDepth", (String) hsProperties.get("storeDepth")); + storeProperties.setProperty("storeWidth", (String) hsProperties.get("storeWidth")); + storeProperties.setProperty("storeAlgorithm", (String) hsProperties.get("storeAlgorithm")); + storeProperties.setProperty( + "storeMetadataNamespace", (String) hsProperties.get("storeMetadataNamespace") + ); + + // Get HashStore + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); } + // Core methods for testing in Knbvm (test.arcticdata.io) + /** * Entry point for working with test data found in knbvm (test.arcticdata.io) * @@ -256,68 +418,6 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx } } - /** - * Returns an options object to use with Apache Commons CLI library to manage command line - * options for HashStore client. - */ - private static Options addHashStoreOptions() { - Options options = new Options(); - options.addOption("h", "help", false, "Show help options."); - // Mandatory option - options.addOption("store", "storepath", true, "Path to HashStore."); - // HashStore creation options - options.addOption("chs", "createhashstore", false, "Create a HashStore."); - options.addOption("dp", "storedepth", true, "Depth of HashStore."); - options.addOption("wp", "storewidth", true, "Width of HashStore."); - options.addOption("ap", "storealgo", true, "Algorithm of HashStore."); - options.addOption("nsp", "storenamespace", true, "Default metadata namespace"); - // Public API options - options.addOption( - "getchecksum", "client_getchecksum", false, - "Get the hex digest of a data object in a HashStore" - ); - options.addOption( - "storeobject", "client_storeobject", false, "Store object to a HashStore." - ); - options.addOption( - "storemetadata", "client_storemetadata", false, "Store metadata to a HashStore" - ); - options.addOption( - "retrieveobject", "client_retrieveobject", false, "Retrieve an object from a HashStore." - ); - options.addOption( - "retrievemetadata", "client_retrievemetadata", false, - "Retrieve a metadata obj from a HashStore." - ); - options.addOption( - "deleteobject", "client_deleteobject", false, "Delete an object from a HashStore." - ); - options.addOption( - "deletemetadata", "client_deletemetadata", false, - "Delete a metadata obj from a HashStore." - ); - options.addOption("pid", "pidguid", true, "PID or GUID of object."); - options.addOption("path", "filepath", true, "Path to object."); - options.addOption("algo", "objectalgo", true, "Algorithm to use in calculations."); - options.addOption("checksum", "obj_checksum", true, "Checksum of object."); - options.addOption( - "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied." - ); - options.addOption("size", "obj_size", true, "Size of object"); - options.addOption("format_id", "metadata_format", true, "Metadata format_id/namespace"); - // knbvm (test.arcticdata.io) options - options.addOption("knbvm", "knbvmtestadc", false, "Specify testing with knbvm."); - options.addOption("nobj", "numberofobj", false, "Number of objects to work with."); - options.addOption("sdir", "storedirectory", true, "Location of objects to convert."); - options.addOption("stype", "storetype", true, "Type of store 'objects' or 'metadata'"); - options.addOption("sts", "storetohs", false, "Flag to store objs to a HashStore"); - options.addOption( - "rav", "retandval", false, "Retrieve and validate objs from a HashStore." - ); - options.addOption("dfs", "delfromhs", false, "Delete objs from a HashStore."); - return options; - } - /** * Store objects to a HashStore with a checksum and checksum algorithm * @@ -398,7 +498,7 @@ private static void retrieveAndValidateObjs(List> resultObjL // If checksums don't match, write a .txt file if (!streamDigest.equals(checksum)) { - String errMsg = "Obj retrieved (pid/guid): " + guid + String errMsg = "Object retrieved (pid/guid): " + guid + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; logExceptionToFile(guid, errMsg, "java/retrieve_obj_errors/checksum_mismatch"); @@ -523,7 +623,6 @@ private static void storeMetadataFromDb(List> resultObjList) }); } - /** * Retrieve metadata from a HashStore and validate its contents by comparing checksums. * @@ -588,7 +687,6 @@ private static void retrieveAndValidateMetadata(List> result }); } - /** * Deletes a list of metadata from a HashStore * @@ -633,34 +731,50 @@ private static void deleteMetadataFromStore(List> resultObjL }); } - // Utility methods for testing in Knbvm (test.arcticdata.io) + + // Utility methods /** - * Log a plain text file with the guid/pid as the file name with a message. - * - * @param guid Pid/guid for which an exception was encountered. - * @param errMsg Message to write into text file. - * @param directory Directory within HashStore to log error (txt) files. - * @throws Exception + * Checks whether a given object is null and throws an exception if so + * + * @param object Object to check + * @param argument Value that is being checked */ - private static void logExceptionToFile(String guid, String errMsg, String directory) - throws Exception { - // Create directory to store the error files - Path errorDirectory = storePath.resolve(directory); - Files.createDirectories(errorDirectory); - Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); + private static void ensureNotNull(Object object, String argument) { + if (object == null) { + String errMsg = "HashStoreClient - " + argument + " cannot be null."; + throw new NullPointerException(errMsg); + } + } - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 - ) - )) { - writer.write(errMsg); + /** + * Calculate the hex digest of a pid's respective object with the given algorithm + * + * @param stream Path to object + * @param algorithm Hash algorithm to use + * @return Hex digest of the pid's respective object + * @throws IOException Error when calculating hex digest + * @throws NoSuchAlgorithmException Algorithm not supported + */ + private static String calculateHexDigest(InputStream stream, String algorithm) + throws IOException, NoSuchAlgorithmException { + MessageDigest mdObject = MessageDigest.getInstance(algorithm); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = stream.read(buffer)) != -1) { + mdObject.update(buffer, 0, bytesRead); - } catch (Exception e) { - e.printStackTrace(); + } + // Close stream + stream.close(); + + } catch (IOException ioe) { + ioe.printStackTrace(); } + // mdObjectHexDigest + return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); } /** @@ -685,58 +799,30 @@ private static String formatAlgo(String value) { } /** - * Initialize HashStore for testing in knbvm with default values. + * Log a plain text file with the guid/pid as the file name with a message. * - * @param storePath Path to store. - * @throws HashStoreFactoryException - * @throws IOException - */ - private static void initializeHashStoreForKnb(Path storePath) throws HashStoreFactoryException, - IOException { - // Initialize HashStore - String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", storePath.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - // Get HashStore - hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - } - - /** - * Calculate the hex digest of a pid's respective object with the given algorithm - * - * @param stream Path to object - * @param algorithm Hash algorithm to use - * @return Hex digest of the pid's respective object - * @throws IOException Error when calculating hex digest - * @throws NoSuchAlgorithmException Algorithm not supported + * @param guid Pid/guid for which an exception was encountered. + * @param errMsg Message to write into text file. + * @param directory Directory within HashStore to log error (txt) files. + * @throws Exception */ - private static String calculateHexDigest(InputStream stream, String algorithm) - throws IOException, NoSuchAlgorithmException { - MessageDigest mdObject = MessageDigest.getInstance(algorithm); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = stream.read(buffer)) != -1) { - mdObject.update(buffer, 0, bytesRead); + private static void logExceptionToFile(String guid, String errMsg, String directory) + throws Exception { + // Create directory to store the error files + Path errorDirectory = storePath.resolve(directory); + Files.createDirectories(errorDirectory); + Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); - } - // Close stream - stream.close(); + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter( + Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 + ) + )) { + writer.write(errMsg); - } catch (IOException ioe) { - ioe.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); } - // mdObjectHexDigest - return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); - } } From 9fce208acb3d034c267737fceca0d394f732dc02 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 10:34:22 -0700 Subject: [PATCH 048/553] Add logic to limit rows queried when executing sql statement --- .../java/org/dataone/hashstore/Client.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 1755df9b..5dacf59e 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -34,7 +34,6 @@ import org.dataone.hashstore.exceptions.PidObjectExistsException; import com.fasterxml.jackson.core.exc.StreamReadException; -import com.fasterxml.jackson.databind.DatabindException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -107,9 +106,9 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption("dfs")) { action = "dfs"; } + String numObjects = cmd.getOptionValue("nobj"); String objType = cmd.getOptionValue("stype"); - testWithKnbvm(action, objType); - return; + testWithKnbvm(action, objType, numObjects); } else if (cmd.hasOption("getchecksum")) { String pid = cmd.getOptionValue("pid"); @@ -161,7 +160,6 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deletemetadata")) { // TODO } - return; } catch (ParseException e) { System.err.println("Error parsing cli arguments: " + e.getMessage()); @@ -300,7 +298,7 @@ private static HashMap loadHashStoreYaml(Path storePath) { * @throws FileNotFoundException When 'hashstore.yaml' is missing. */ private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, - IOException, FileNotFoundException { + IOException { // Load properties and get HashStore HashMap hsProperties = loadHashStoreYaml(storePath); Properties storeProperties = new Properties(); @@ -325,12 +323,13 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE * * @param actionFlag String representing a knbvm test-related method to call. * @param objType "data" (objects) or "documents" (metadata). + * @param numObjects Number of rows to retrieve from metacat db, + * if null, will retrieve all rows. * @throws IOException * @throws StreamReadException - * @throws DatabindException */ - private static void testWithKnbvm(String actionFlag, String objType) throws IOException, - StreamReadException, DatabindException { + private static void testWithKnbvm(String actionFlag, String objType, String numObjects) + throws IOException, StreamReadException { // Load metacat db yaml System.out.println("Loading metacat db yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); @@ -342,6 +341,11 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx String user = (String) pgdbYamlProperties.get("db_user"); String password = (String) pgdbYamlProperties.get("db_password"); + String sqlLimitQuery = ""; + if (numObjects != null) { + sqlLimitQuery = " LIMIT " + numObjects; + } + try { System.out.println("Connecting to metacat db."); // Setup metacat db access @@ -351,7 +355,8 @@ private static void testWithKnbvm(String actionFlag, String objType) throws IOEx String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid;"; + + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid" + + sqlLimitQuery + ";"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm From efe33abf3be9907fd911d5bb36e96b7addf8e918 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 10:46:27 -0700 Subject: [PATCH 049/553] Add code to call public api methods for retrieving or deleting objects or metadata via client --- .../java/org/dataone/hashstore/Client.java | 51 +++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 5dacf59e..97193917 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -33,7 +33,6 @@ import org.dataone.hashstore.exceptions.HashStoreFactoryException; import org.dataone.hashstore.exceptions.PidObjectExistsException; -import com.fasterxml.jackson.core.exc.StreamReadException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -152,13 +151,47 @@ public static void main(String[] args) throws Exception { System.out.println(metadataCid); } else if (cmd.hasOption("retrieveobject")) { - // TODO + String pid = cmd.getOptionValue("pid"); + ensureNotNull(pid, "-pid"); + + InputStream objStream = hashStore.retrieveObject(pid); + byte[] buffer = new byte[1000]; + int bytesRead = objStream.read(buffer, 0, buffer.length); + String objPreview = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + objStream.close(); + System.out.println(objPreview); + } else if (cmd.hasOption("retrievemetadata")) { - // TODO + String pid = cmd.getOptionValue("pid"); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(formatId, "-format_id"); + + InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); + byte[] buffer = new byte[1000]; + int bytesRead = metadataStream.read(buffer, 0, buffer.length); + String metadataPreview = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + metadataStream.close(); + System.out.println(metadataPreview); + } else if (cmd.hasOption("deleteobject")) { - // TODO + String pid = cmd.getOptionValue("pid"); + ensureNotNull(pid, "-pid"); + + hashStore.deleteObject(pid); + System.out.println("Object for pid (" + pid + ") has been deleted."); + } else if (cmd.hasOption("deletemetadata")) { - // TODO + String pid = cmd.getOptionValue("pid"); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(formatId, "-format_id"); + + hashStore.deleteMetadata(pid, formatId); + System.out.println( + "Metadata for pid (" + pid + ") and namespace (" + formatId + + ") has been deleted." + ); } } catch (ParseException e) { @@ -325,11 +358,10 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE * @param objType "data" (objects) or "documents" (metadata). * @param numObjects Number of rows to retrieve from metacat db, * if null, will retrieve all rows. - * @throws IOException - * @throws StreamReadException + * @throws IOException Related to accessing config files or objects */ private static void testWithKnbvm(String actionFlag, String objType, String numObjects) - throws IOException, StreamReadException { + throws IOException { // Load metacat db yaml System.out.println("Loading metacat db yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); @@ -809,7 +841,7 @@ private static String formatAlgo(String value) { * @param guid Pid/guid for which an exception was encountered. * @param errMsg Message to write into text file. * @param directory Directory within HashStore to log error (txt) files. - * @throws Exception + * @throws Exception Catch all for unexpected exceptions */ private static void logExceptionToFile(String guid, String errMsg, String directory) throws Exception { @@ -827,7 +859,6 @@ private static void logExceptionToFile(String guid, String errMsg, String direct } catch (Exception e) { e.printStackTrace(); - } } } From b828408b6eb0c64576a64e068f035286c3660ec4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 12:23:44 -0700 Subject: [PATCH 050/553] Clean up code and fix minor bugs --- .../java/org/dataone/hashstore/Client.java | 247 ++++++++++-------- 1 file changed, 133 insertions(+), 114 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 97193917..a1805f0b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -48,10 +48,11 @@ public static void main(String[] args) throws Exception { Options options = addHashStoreClientOptions(); // Begin parsing arguments - CommandLineParser parser = new DefaultParser(); + CommandLineParser parser = new DefaultParser(false); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; try { + System.out.println("Parsing options"); cmd = parser.parse(options, args); // First check if user is looking for help @@ -63,20 +64,19 @@ public static void main(String[] args) throws Exception { // Then get store path and initialize HashStore if (!cmd.hasOption("store")) { String errMsg = - "HashStoreClient - store path must be supplied, use '-store=[path/to/store]'"; + "HashStoreClient - store path must be supplied, use 'store=[path/to/store]'"; throw new IllegalArgumentException(errMsg); } // Create or initialize HashStore - if (!cmd.hasOption("chs")) { + if (cmd.hasOption("chs")) { String storePath = cmd.getOptionValue("store"); String storeDepth = cmd.getOptionValue("dp"); String storeWidth = cmd.getOptionValue("wp"); - String storeAlgorithm = cmd.getOptionValue("storealgo"); - String storeNameSpace = cmd.getOptionValue("storenamespace"); + String storeAlgorithm = cmd.getOptionValue("ap"); + String storeNameSpace = cmd.getOptionValue("nsp"); createNewHashStore( storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace ); - return; } else { storePath = Paths.get(cmd.getOptionValue("store")); Path hashstoreYaml = storePath.resolve("hashstore.yaml"); @@ -87,116 +87,133 @@ public static void main(String[] args) throws Exception { throw new FileNotFoundException(errMsg); } initializeHashStore(storePath); - } - - // Parse remaining options - if (cmd.hasOption("knbvm")) { - System.out.println( - "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore.yaml." - ); - - String action = null; - if (cmd.hasOption("sts")) { - action = "sts"; - } - if (cmd.hasOption("rav")) { - action = "rav"; - } - if (cmd.hasOption("dfs")) { - action = "dfs"; - } - String numObjects = cmd.getOptionValue("nobj"); - String objType = cmd.getOptionValue("stype"); - testWithKnbvm(action, objType, numObjects); - - } else if (cmd.hasOption("getchecksum")) { - String pid = cmd.getOptionValue("pid"); - String algo = cmd.getOptionValue("algo"); - ensureNotNull(pid, "-pid"); - ensureNotNull(algo, "-algo"); - String hexDigest = hashStore.getHexDigest(pid, algo); - System.out.println("Hex Digest (pid: " + pid + ", algorithm: " + algo + "):"); - System.out.println(hexDigest); - - } else if (cmd.hasOption("storeobject")) { - String pid = cmd.getOptionValue("pid"); - Path path = Paths.get(cmd.getOptionValue("path")); - String additional_algo = cmd.getOptionValue("algo"); - String checksum = cmd.getOptionValue("checksum"); - String checksum_algo = cmd.getOptionValue("checksum_algo"); - long size = Long.parseLong(cmd.getOptionValue("size")); - ensureNotNull(pid, "-pid"); - ensureNotNull(path, "-path"); - - InputStream pidObjStream = Files.newInputStream(path); - ObjectInfo objInfo = hashStore.storeObject( - pidObjStream, pid, additional_algo, checksum, checksum_algo, size - ); - pidObjStream.close(); - System.out.println("Object Info for pid (" + pid + "):"); - System.out.println(objInfo); - - } else if (cmd.hasOption("storemetadata")) { - String pid = cmd.getOptionValue("pid"); - Path path = Paths.get(cmd.getOptionValue("path")); - String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(path, "-path"); - ensureNotNull(formatId, "-format_id"); - - InputStream pidObjStream = Files.newInputStream(path); - String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); - pidObjStream.close(); - System.out.println("Metadata Content Identifier:"); - System.out.println(metadataCid); - - } else if (cmd.hasOption("retrieveobject")) { - String pid = cmd.getOptionValue("pid"); - ensureNotNull(pid, "-pid"); - - InputStream objStream = hashStore.retrieveObject(pid); - byte[] buffer = new byte[1000]; - int bytesRead = objStream.read(buffer, 0, buffer.length); - String objPreview = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); - objStream.close(); - System.out.println(objPreview); - - } else if (cmd.hasOption("retrievemetadata")) { - String pid = cmd.getOptionValue("pid"); - String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(formatId, "-format_id"); - - InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); - byte[] buffer = new byte[1000]; - int bytesRead = metadataStream.read(buffer, 0, buffer.length); - String metadataPreview = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); - metadataStream.close(); - System.out.println(metadataPreview); - } else if (cmd.hasOption("deleteobject")) { - String pid = cmd.getOptionValue("pid"); - ensureNotNull(pid, "-pid"); - - hashStore.deleteObject(pid); - System.out.println("Object for pid (" + pid + ") has been deleted."); + // Parse remaining options + if (cmd.hasOption("knbvm")) { + System.out.println( + "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore.yaml." + ); - } else if (cmd.hasOption("deletemetadata")) { - String pid = cmd.getOptionValue("pid"); - String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(formatId, "-format_id"); + String action = null; + if (cmd.hasOption("sts")) { + action = "sts"; + } + if (cmd.hasOption("rav")) { + action = "rav"; + } + if (cmd.hasOption("dfs")) { + action = "dfs"; + } + String numObjects = cmd.getOptionValue("nobj"); + String objType = cmd.getOptionValue("stype"); + testWithKnbvm(action, objType, numObjects); + + } else if (cmd.hasOption("getchecksum")) { + String pid = cmd.getOptionValue("pid"); + String algo = cmd.getOptionValue("algo"); + ensureNotNull(pid, "-pid"); + ensureNotNull(algo, "-algo"); + String hexDigest = hashStore.getHexDigest(pid, algo); + System.out.println("Hex Digest (pid: " + pid + ", algorithm: " + algo + "):"); + System.out.println(hexDigest); + + } else if (cmd.hasOption("storeobject")) { + System.out.println("Storing object"); + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + ensureNotNull(pid, "-pid"); + ensureNotNull(path, "-path"); + String additional_algo = null; + if (cmd.hasOption("algo")) { + additional_algo = cmd.getOptionValue("algo"); + } + String checksum = null; + if (cmd.hasOption("checksum")) { + checksum = cmd.getOptionValue("checksum"); + } + String checksum_algo = null; + if (cmd.hasOption("checksum_algo")) { + checksum_algo = cmd.getOptionValue("checksum_algo"); + } + long size = 0; + if (cmd.hasOption("size")) { + size = Long.parseLong(cmd.getOptionValue("size")); + } - hashStore.deleteMetadata(pid, formatId); - System.out.println( - "Metadata for pid (" + pid + ") and namespace (" + formatId - + ") has been deleted." - ); + InputStream pidObjStream = Files.newInputStream(path); + ObjectInfo objInfo = hashStore.storeObject( + pidObjStream, pid, additional_algo, checksum, checksum_algo, size + ); + pidObjStream.close(); + System.out.println("Object Info for pid (" + pid + "):"); + System.out.println(objInfo); + + } else if (cmd.hasOption("storemetadata")) { + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(path, "-path"); + ensureNotNull(formatId, "-format_id"); + + InputStream pidObjStream = Files.newInputStream(path); + String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); + pidObjStream.close(); + System.out.println("Metadata Content Identifier:"); + System.out.println(metadataCid); + + } else if (cmd.hasOption("retrieveobject")) { + String pid = cmd.getOptionValue("pid"); + ensureNotNull(pid, "-pid"); + + InputStream objStream = hashStore.retrieveObject(pid); + byte[] buffer = new byte[1000]; + int bytesRead = objStream.read(buffer, 0, buffer.length); + String objPreview = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + objStream.close(); + System.out.println(objPreview); + + } else if (cmd.hasOption("retrievemetadata")) { + String pid = cmd.getOptionValue("pid"); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(formatId, "-format_id"); + + InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); + byte[] buffer = new byte[1000]; + int bytesRead = metadataStream.read(buffer, 0, buffer.length); + String metadataPreview = new String( + buffer, 0, bytesRead, StandardCharsets.UTF_8 + ); + metadataStream.close(); + System.out.println(metadataPreview); + + } else if (cmd.hasOption("deleteobject")) { + String pid = cmd.getOptionValue("pid"); + ensureNotNull(pid, "-pid"); + + hashStore.deleteObject(pid); + System.out.println("Object for pid (" + pid + ") has been deleted."); + + } else if (cmd.hasOption("deletemetadata")) { + String pid = cmd.getOptionValue("pid"); + String formatId = cmd.getOptionValue("format_id"); + ensureNotNull(pid, "-pid"); + ensureNotNull(formatId, "-format_id"); + + hashStore.deleteMetadata(pid, formatId); + System.out.println( + "Metadata for pid (" + pid + ") and namespace (" + formatId + + ") has been deleted." + ); + } else { + System.out.println("HashStoreClient - No options found, use -h for help."); + } } } catch (ParseException e) { System.err.println("Error parsing cli arguments: " + e.getMessage()); - formatter.printHelp("CommandLineApp", options); + formatter.printHelp("HashStore Client Options", options); } } @@ -335,12 +352,14 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE // Load properties and get HashStore HashMap hsProperties = loadHashStoreYaml(storePath); Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", (String) hsProperties.get("storePath")); - storeProperties.setProperty("storeDepth", (String) hsProperties.get("storeDepth")); - storeProperties.setProperty("storeWidth", (String) hsProperties.get("storeWidth")); - storeProperties.setProperty("storeAlgorithm", (String) hsProperties.get("storeAlgorithm")); + storeProperties.setProperty("storePath", hsProperties.get("storePath").toString()); + storeProperties.setProperty("storeDepth", hsProperties.get("storeDepth").toString()); + storeProperties.setProperty("storeWidth", hsProperties.get("storeWidth").toString()); + storeProperties.setProperty( + "storeAlgorithm", hsProperties.get("storeAlgorithm").toString() + ); storeProperties.setProperty( - "storeMetadataNamespace", (String) hsProperties.get("storeMetadataNamespace") + "storeMetadataNamespace", hsProperties.get("storeMetadataNamespace").toString() ); // Get HashStore From 5290d3de22b397277c0e15a98ba11ba9451f181d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 12:32:22 -0700 Subject: [PATCH 051/553] Update README.md with instructions on how to use HashStore java client --- README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/README.md b/README.md index 78fe7ed2..7610e524 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,41 @@ and then install or build the package with `mvn install` or `mvn package`, respe We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). +## Usage Example + +How to use HashStore Java client (command line app) +``` +# Step 1: Get HashStore Client Jar file +> mvn clean package -Dmaven.test.skip=true + +# Step 2: +## Create a HashStore (long option) +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +## Create a HashStore (short option) +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 + +# Get the checksum of a data object +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 + +# Store a data object +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 + +# Store a metadata object +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 + +# Retrieve a data object +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrieveobject -pid testpid1 + +# Retrieve a metadata object +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 + +# Delete a data object +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deleteobject -pid testpid1 + +# Delete a metadata file +> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +``` + ## License ```txt From 1f32b7f8a530a7b842c18e51871835893a412c08 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 13:55:47 -0700 Subject: [PATCH 052/553] Refactor testing with knbvm to use options value over hardcoded path for data or documents origin directory --- .../java/org/dataone/hashstore/Client.java | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index a1805f0b..a6e0a4d1 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -104,9 +104,14 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption("dfs")) { action = "dfs"; } - String numObjects = cmd.getOptionValue("nobj"); + String objType = cmd.getOptionValue("stype"); - testWithKnbvm(action, objType, numObjects); + String originDirectory = cmd.getOptionValue("sdir"); + String numObjects = cmd.getOptionValue("nobj"); + ensureNotNull(objType, "-stype"); + ensureNotNull(originDirectory, "-sdir"); + ensureNotNull(action, "-sts, -rav, -dfs"); + testWithKnbvm(action, objType, originDirectory, numObjects); } else if (cmd.hasOption("getchecksum")) { String pid = cmd.getOptionValue("pid"); @@ -379,8 +384,9 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE * if null, will retrieve all rows. * @throws IOException Related to accessing config files or objects */ - private static void testWithKnbvm(String actionFlag, String objType, String numObjects) - throws IOException { + private static void testWithKnbvm( + String actionFlag, String objType, String originDir, String numObjects + ) throws IOException { // Load metacat db yaml System.out.println("Loading metacat db yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); @@ -429,11 +435,12 @@ private static void testWithKnbvm(String actionFlag, String objType, String numO throw new IllegalArgumentException(errMsg); } } - Path setItemFilePath = Paths.get( - "/var/metacat/" + objType + "/" + docid + "." + rev - ); + Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev); if (Files.exists(setItemFilePath)) { + System.out.println( + "File exists (" + setItemFilePath + ")! Adding to resultObjList." + ); Map resultObj = new HashMap<>(); resultObj.put("pid", guid); resultObj.put("algorithm", formattedChecksumAlgo); From 836017d5245bbceb9a7165b18370c9c3df566937 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 22 Aug 2023 14:00:11 -0700 Subject: [PATCH 053/553] Change objType options argument check to be consistent with python client --- .../java/org/dataone/hashstore/Client.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index a6e0a4d1..03a1dc9b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -429,9 +429,9 @@ private static void testWithKnbvm( String formattedChecksumAlgo = formatAlgo(checksumAlgorithm); String formatId = resultSet.getString("object_format"); - if (!objType.equals("data")) { - if (!objType.equals("documents")) { - String errMsg = "HashStoreClient - objType must be 'data' or 'documents'"; + if (!objType.equals("object")) { + if (!objType.equals("metadata")) { + String errMsg = "HashStoreClient - objType must be 'object' or 'metadata'"; throw new IllegalArgumentException(errMsg); } } @@ -452,22 +452,22 @@ private static void testWithKnbvm( } // Check options - if (actionFlag.equals("sts") && objType.equals("data")) { + if (actionFlag.equals("sts") && objType.equals("object")) { storeObjsWithChecksumFromDb(resultObjList); } - if (actionFlag.equals("sts") && objType.equals("documents")) { + if (actionFlag.equals("sts") && objType.equals("metadata")) { storeMetadataFromDb(resultObjList); } - if (actionFlag.equals("rav") && objType.equals("data")) { + if (actionFlag.equals("rav") && objType.equals("object")) { retrieveAndValidateObjs(resultObjList); } - if (actionFlag.equals("rav") && objType.equals("documents")) { + if (actionFlag.equals("rav") && objType.equals("metadata")) { retrieveAndValidateMetadata(resultObjList); } - if (actionFlag.equals("dfs") && objType.equals("data")) { + if (actionFlag.equals("dfs") && objType.equals("object")) { deleteObjectsFromStore(resultObjList); } - if (actionFlag.equals("dfs") && objType.equals("documents")) { + if (actionFlag.equals("dfs") && objType.equals("metadata")) { deleteMetadataFromStore(resultObjList); } From 255ce781be34838c3539bb7ff4d0d67abbebc6db Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 23 Aug 2023 11:22:33 -0700 Subject: [PATCH 054/553] Add missing check for 'pgdb.yaml' when calling methods to test with knbvm --- src/main/java/org/dataone/hashstore/Client.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 03a1dc9b..9c77c180 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -93,6 +93,13 @@ public static void main(String[] args) throws Exception { System.out.println( "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore.yaml." ); + Path pgdbYaml = storePath.resolve("pgdb.yaml"); + if (!Files.exists(pgdbYaml)) { + String errMsg = "HashStoreClient - Missing pgdb.yaml at storePath (" + + storePath + "), please manually create it with the following keys: " + + "db_user, db_password, db_host, db_port, db_name"; + throw new FileNotFoundException(errMsg); + } String action = null; if (cmd.hasOption("sts")) { @@ -420,7 +427,6 @@ private static void testWithKnbvm( // and create a List to loop over List> resultObjList = new ArrayList<>(); while (resultSet.next()) { - System.out.println("Calling resultSet.next()"); String guid = resultSet.getString("guid"); String docid = resultSet.getString("docid"); int rev = resultSet.getInt("rev"); From 63060c8b6c2e12885fa6aa71cd6ea3ccb17fb30a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 23 Aug 2023 12:47:58 -0700 Subject: [PATCH 055/553] Update test harness values for pids with metadata cid --- .../dataone/hashstore/testdata/TestDataHarness.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java index 391d10a2..e523d40a 100644 --- a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java +++ b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java @@ -46,6 +46,10 @@ public TestDataHarness() { values1.put( "sha512-224", "107f9facb268471de250625440b6c8b7ff8296fbe5d89bed4a61fd35" ); + values1.put( + "metadata_cid", + "323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7" + ); values1.put( "metadata_sha256", "158d7e55c36a810d7c14479c952a4d0b370f2b844808f2ea2b20d7df66768b04" @@ -75,6 +79,10 @@ public TestDataHarness() { values2.put( "sha512-224", "7a2b22e36ced9e91cf8cdf6971897ec4ae21780e11d1c3903011af33" ); + values2.put( + "metadata_cid", + "ddf07952ef28efc099d10d8b682480f7d2da60015f5d8873b6e1ea75b4baf689" + ); values2.put( "metadata_sha256", "d87c386943ceaeba5644c52b23111e4f47972e6530df0e6f0f41964b25855b08" @@ -104,6 +112,10 @@ public TestDataHarness() { values3.put( "sha512-224", "e1789a91c9df334fdf6ee5d295932ad96028c426a18b17016a627099" ); + values3.put( + "metadata_cid", + "9a2e08c666b728e6cbd04d247b9e556df3de5b2ca49f7c5a24868eb27cddbff2" + ); values3.put( "metadata_sha256", "27003e07f2ab374020de73298dd24a1d8b1b57647b8fa3c49db00f8c342afa1d" From bf7b7b403136f1c959bdf56e08b103438081ce3e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 23 Aug 2023 13:18:23 -0700 Subject: [PATCH 056/553] Update 'HashStore' interface with storeMetadata override method and add decorator to respective method in 'FileHashStore' --- src/main/java/org/dataone/hashstore/Client.java | 2 +- src/main/java/org/dataone/hashstore/HashStore.java | 4 ++++ .../org/dataone/hashstore/filehashstore/FileHashStore.java | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 9c77c180..9b21a99d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -158,7 +158,7 @@ public static void main(String[] args) throws Exception { ); pidObjStream.close(); System.out.println("Object Info for pid (" + pid + "):"); - System.out.println(objInfo); + System.out.println(objInfo.getHexDigests()); } else if (cmd.hasOption("storemetadata")) { String pid = cmd.getOptionValue("pid"); diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index b2e7f50d..e0a08c38 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -96,6 +96,10 @@ String storeMetadata(InputStream metadata, String pid, String formatId) throws I IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException; + String storeMetadata(InputStream metadata, String pid) throws IOException, + IllegalArgumentException, FileNotFoundException, InterruptedException, + NoSuchAlgorithmException; + /** * The `retrieveObject` method retrieves an object from HashStore using a given persistent * identifier (pid). diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 428da24b..99e5df22 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -670,6 +670,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF /** * Overload method for storeMetadata with default metadata namespace */ + @Override public String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { logFileHashStore.debug( From 8b8cef1cd46af1f24254dcb7c221e2444c7b41f8 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 23 Aug 2023 13:32:32 -0700 Subject: [PATCH 057/553] Add new test class to confirm 'Client' calls public api successfully and remove redundant print statements --- .../java/org/dataone/hashstore/Client.java | 2 - .../org/dataone/hashstore/ClientTest.java | 432 ++++++++++++++++++ 2 files changed, 432 insertions(+), 2 deletions(-) create mode 100644 src/test/java/org/dataone/hashstore/ClientTest.java diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 9b21a99d..779fd4d8 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -52,7 +52,6 @@ public static void main(String[] args) throws Exception { HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; try { - System.out.println("Parsing options"); cmd = parser.parse(options, args); // First check if user is looking for help @@ -126,7 +125,6 @@ public static void main(String[] args) throws Exception { ensureNotNull(pid, "-pid"); ensureNotNull(algo, "-algo"); String hexDigest = hashStore.getHexDigest(pid, algo); - System.out.println("Hex Digest (pid: " + pid + ", algorithm: " + algo + "):"); System.out.println(hexDigest); } else if (cmd.hasOption("storeobject")) { diff --git a/src/test/java/org/dataone/hashstore/ClientTest.java b/src/test/java/org/dataone/hashstore/ClientTest.java new file mode 100644 index 00000000..23818615 --- /dev/null +++ b/src/test/java/org/dataone/hashstore/ClientTest.java @@ -0,0 +1,432 @@ +package org.dataone.hashstore; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + + +import org.dataone.hashstore.testdata.TestDataHarness; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class ClientTest { + private static HashStore hashStore; + private static final TestDataHarness testData = new TestDataHarness(); + private Properties hsProperties; + + /** + * Temporary folder for tests to run in + */ + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + @Before + public void getHashStore() { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.getRoot().toPath().resolve("metacat"); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + try { + hsProperties = storeProperties; + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + + } catch (Exception e) { + e.printStackTrace(); + fail("ClientTest - Exception encountered: " + e.getMessage()); + + } + } + + /** + * Generates a hierarchical path by dividing a given digest into tokens of fixed width, and + * concatenating them with '/' as the delimiter. + * + * @param dirDepth integer to represent number of directories + * @param dirWidth width of each directory + * @param digest value to shard + * @return String + */ + protected String getHierarchicalPathString(int dirDepth, int dirWidth, String digest) { + List tokens = new ArrayList<>(); + int digestLength = digest.length(); + for (int i = 0; i < dirDepth; i++) { + int start = i * dirWidth; + int end = Math.min((i + 1) * dirWidth, digestLength); + tokens.add(digest.substring(start, end)); + } + + if (dirDepth * dirWidth < digestLength) { + tokens.add(digest.substring(dirDepth * dirWidth)); + } + + List stringArray = new ArrayList<>(); + for (String str : tokens) { + if (!str.trim().isEmpty()) { + stringArray.add(str); + } + } + // stringShard + return String.join("/", stringArray); + } + + /** + * Utility method to get absolute path of a given object and objType + * ("objects" or "metadata"). + */ + public Path getObjectAbsPath(String id, String objType) { + int shardDepth = Integer.parseInt(hsProperties.getProperty("storeDepth")); + int shardWidth = Integer.parseInt(hsProperties.getProperty("storeWidth")); + // Get relative path + String objCidShardString = this.getHierarchicalPathString(shardDepth, shardWidth, id); + // Get absolute path + Path storePath = Paths.get(hsProperties.getProperty("storePath")); + Path absPath = null; + if (objType.equals("object")) { + absPath = storePath.resolve("objects/" + objCidShardString); + } + if (objType.equals("metadata")) { + absPath = storePath.resolve("metadata/" + objCidShardString); + } + return absPath; + } + + /** + * Test creating a HashStore through client. + */ + @Test + public void client_createHashStore() throws Exception { + String optCreateHashstore = "-chs"; + String optStore = "-store"; + String optStorePath = tempFolder.getRoot() + "/metacat"; + String optStoreDepth = "-dp"; + String optStoreDepthValue = "3"; + String optStoreWidth = "-wp"; + String optStoreWidthValue = "2"; + String optAlgo = "-ap"; + String optAlgoValue = "SHA-256"; + String optFormatId = "-nsp"; + String optFormatIdValue = "http://ns.dataone.org/service/types/v2.0"; + String[] args = {optCreateHashstore, optStore, optStorePath, optStoreDepth, + optStoreDepthValue, optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, + optFormatId, optFormatIdValue}; + Client.main(args); + + Path storePath = Paths.get(optStorePath); + Path hashStoreObjectsPath = storePath.resolve("objects"); + Path hashStoreMetadataPath = storePath.resolve("metadata"); + Path hashStoreYaml = storePath.resolve("hashstore.yaml"); + System.out.println(hashStoreYaml); + assertTrue(Files.exists(hashStoreObjectsPath)); + assertTrue(Files.exists(hashStoreMetadataPath)); + assertTrue(Files.exists(hashStoreYaml)); + } + + /** + * Test hashStore client stores objects. + */ + @Test + public void client_storeObjects() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + // Call client + String optStoreObject = "-storeobject"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPath = "-path"; + String optObjectPath = testDataFile.toString(); + String optPid = "-pid"; + String optPidValue = pid; + String[] args = {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, + optPidValue}; + Client.main(args); + + // Confirm object was stored + Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); + assertTrue(Files.exists(absPath)); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm client printed content + String pidStdOut = outputStream.toString(); + assertFalse(pidStdOut.isEmpty()); + } + } + + /** + * Test hashStore client stores metadata. + */ + @Test + public void client_storeMetadata() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + // Call client + String optStoreMetadata = "-storemetadata"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPath = "-path"; + String optObjectPath = testDataFile.toString(); + String optPid = "-pid"; + String optPidValue = pid; + String optFormatId = "-format_id"; + String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); + String[] args = {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, + optPid, optPidValue, optFormatId, optFormatIdValue}; + Client.main(args); + + // Confirm metadata was stored + Path absPath = getObjectAbsPath( + testData.pidData.get(pid).get("metadata_cid"), "metadata" + ); + assertTrue(Files.exists(absPath)); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm client printed content + String pidStdOut = outputStream.toString(); + assertFalse(pidStdOut.isEmpty()); + } + } + + /** + * Test hashStore client retrieves objects. + */ + @Test + public void client_retrieveObjects() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + InputStream dataStream = Files.newInputStream(testDataFile); + hashStore.storeObject(dataStream, pid, null, null, null, 0); + + // Call client + String optRetrieveObject = "-retrieveobject"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPid = "-pid"; + String optPidValue = pid; + String[] args = {optRetrieveObject, optStore, optStorePath, optPid, optPidValue}; + Client.main(args); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm client printed content + String pidStdOut = outputStream.toString(); + assertFalse(pidStdOut.isEmpty()); + } + } + + /** + * Test hashStore client retrieves objects. + */ + @Test + public void client_retrieveMetadata() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + hashStore.storeMetadata(metadataStream, pid); + + // Call client + String optRetrieveMetadata = "-retrievemetadata"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPid = "-pid"; + String optPidValue = pid; + String optFormatId = "-format_id"; + String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); + String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, optPidValue, + optFormatId, optFormatIdValue}; + Client.main(args); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm client printed content + String pidStdOut = outputStream.toString(); + assertFalse(pidStdOut.isEmpty()); + } + } + + /** + * Test hashStore client deletes objects. + */ + @Test + public void client_deleteObjects() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + InputStream dataStream = Files.newInputStream(testDataFile); + hashStore.storeObject(dataStream, pid, null, null, null, 0); + + // Call client + String optDeleteObject = "-deleteobject"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPid = "-pid"; + String optPidValue = pid; + String[] args = {optDeleteObject, optStore, optStorePath, optPid, optPidValue}; + Client.main(args); + + // Confirm object was deleted + Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); + assertFalse(Files.exists(absPath)); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm client printed content + String pidStdOut = outputStream.toString(); + assertFalse(pidStdOut.isEmpty()); + } + } + + /** + * Test hashStore client retrieves objects. + */ + @Test + public void client_deleteMetadata() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + hashStore.storeMetadata(metadataStream, pid); + + // Call client + String optDeleteMetadata = "-deletemetadata"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPid = "-pid"; + String optPidValue = pid; + String optFormatId = "-format_id"; + String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); + String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, optPidValue, + optFormatId, optFormatIdValue}; + Client.main(args); + + // Confirm metadata was deleted + Path absPath = getObjectAbsPath( + testData.pidData.get(pid).get("metadata_cid"), "metadata" + ); + assertFalse(Files.exists(absPath)); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm client printed content + String pidStdOut = outputStream.toString(); + assertFalse(pidStdOut.isEmpty()); + } + } + + /** + * Test hashStore client returns hex digest of object. + */ + @Test + public void client_getHexDigest() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + InputStream dataStream = Files.newInputStream(testDataFile); + hashStore.storeObject(dataStream, pid, null, null, null, 0); + + // Call client + String optGetChecksum = "-getchecksum"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPid = "-pid"; + String optPidValue = pid; + String optAlgo = "-algo"; + String optAlgoValue = "SHA-256"; + String[] args = {optGetChecksum, optStore, optStorePath, optPid, optPidValue, optAlgo, + optAlgoValue}; + Client.main(args); + + + String testDataChecksum = testData.pidData.get(pid).get("sha256"); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm hex digest matches + String pidStdOut = outputStream.toString(); + assertEquals(testDataChecksum, pidStdOut.trim()); + } + } +} From 8b437a6ff004bf36db6f2a647c0897692f947c38 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 7 Sep 2023 15:19:18 -0700 Subject: [PATCH 058/553] Update README.md formatting --- README.md | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 7610e524..af76b859 100644 --- a/README.md +++ b/README.md @@ -24,39 +24,43 @@ and then install or build the package with `mvn install` or `mvn package`, respe We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). -## Usage Example +## HashStore Client Usage -How to use HashStore Java client (command line app) -``` -# Step 1: Get HashStore Client Jar file -> mvn clean package -Dmaven.test.skip=true +```sh + +# Step 1: Get HashStore Jar file +$ mvn clean package -Dmaven.test.skip=true + +# Get help +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -h # Step 2: ## Create a HashStore (long option) -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 + ## Create a HashStore (short option) -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 # Get the checksum of a data object -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 # Store a data object -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Retrieve a data object -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrieveobject -pid testpid1 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Delete a data object -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deleteobject -pid testpid1 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -> java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 ``` ## License From 906ff2c124971f72b050cc25cee869e81017a217 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 11:51:41 -0700 Subject: [PATCH 059/553] Add new class 'FileHashStoreUtility' and extract 'ensureNotNull' method from 'Client' and 'FileHashStore' --- .../java/org/dataone/hashstore/Client.java | 60 ++++++------ .../filehashstore/FileHashStore.java | 92 +++++++++---------- .../filehashstore/FileHashStoreUtility.java | 22 +++++ 3 files changed, 95 insertions(+), 79 deletions(-) create mode 100644 src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 779fd4d8..5893f351 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -24,6 +24,8 @@ import javax.xml.bind.DatatypeConverter; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -73,6 +75,7 @@ public static void main(String[] args) throws Exception { String storeWidth = cmd.getOptionValue("wp"); String storeAlgorithm = cmd.getOptionValue("ap"); String storeNameSpace = cmd.getOptionValue("nsp"); + createNewHashStore( storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace ); @@ -114,16 +117,20 @@ public static void main(String[] args) throws Exception { String objType = cmd.getOptionValue("stype"); String originDirectory = cmd.getOptionValue("sdir"); String numObjects = cmd.getOptionValue("nobj"); - ensureNotNull(objType, "-stype"); - ensureNotNull(originDirectory, "-sdir"); - ensureNotNull(action, "-sts, -rav, -dfs"); + FileHashStoreUtility.ensureNotNull(objType, "-stype", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull( + action, "-sts, -rav, -dfs", "HashStoreClient" + ); + testWithKnbvm(action, objType, originDirectory, numObjects); } else if (cmd.hasOption("getchecksum")) { String pid = cmd.getOptionValue("pid"); String algo = cmd.getOptionValue("algo"); - ensureNotNull(pid, "-pid"); - ensureNotNull(algo, "-algo"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(algo, "-algo", "HashStoreClient"); + String hexDigest = hashStore.getHexDigest(pid, algo); System.out.println(hexDigest); @@ -131,8 +138,9 @@ public static void main(String[] args) throws Exception { System.out.println("Storing object"); String pid = cmd.getOptionValue("pid"); Path path = Paths.get(cmd.getOptionValue("path")); - ensureNotNull(pid, "-pid"); - ensureNotNull(path, "-path"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(path, "-path", "HashStoreClient"); + String additional_algo = null; if (cmd.hasOption("algo")) { additional_algo = cmd.getOptionValue("algo"); @@ -162,9 +170,9 @@ public static void main(String[] args) throws Exception { String pid = cmd.getOptionValue("pid"); Path path = Paths.get(cmd.getOptionValue("path")); String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(path, "-path"); - ensureNotNull(formatId, "-format_id"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(path, "-path", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); InputStream pidObjStream = Files.newInputStream(path); String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); @@ -174,7 +182,7 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("retrieveobject")) { String pid = cmd.getOptionValue("pid"); - ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); InputStream objStream = hashStore.retrieveObject(pid); byte[] buffer = new byte[1000]; @@ -186,8 +194,8 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("retrievemetadata")) { String pid = cmd.getOptionValue("pid"); String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(formatId, "-format_id"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); byte[] buffer = new byte[1000]; @@ -200,16 +208,15 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deleteobject")) { String pid = cmd.getOptionValue("pid"); - ensureNotNull(pid, "-pid"); - + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); hashStore.deleteObject(pid); System.out.println("Object for pid (" + pid + ") has been deleted."); } else if (cmd.hasOption("deletemetadata")) { String pid = cmd.getOptionValue("pid"); String formatId = cmd.getOptionValue("format_id"); - ensureNotNull(pid, "-pid"); - ensureNotNull(formatId, "-format_id"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); hashStore.deleteMetadata(pid, formatId); System.out.println( @@ -306,6 +313,12 @@ private static void createNewHashStore( String storePath, String storeDepth, String storeWidth, String storeAlgorithm, String storeNameSpace ) throws HashStoreFactoryException, IOException { + FileHashStoreUtility.ensureNotNull(storePath, "storePath", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(storeDepth, "storeDepth", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(storeWidth, "storeWidth", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(storeAlgorithm, "storeAlgorithm", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(storeNameSpace, "storeNameSpace", "HashStoreClient"); + Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", storePath); storeProperties.setProperty("storeDepth", storeDepth); @@ -801,19 +814,6 @@ private static void deleteMetadataFromStore(List> resultObjL // Utility methods - /** - * Checks whether a given object is null and throws an exception if so - * - * @param object Object to check - * @param argument Value that is being checked - */ - private static void ensureNotNull(Object object, String argument) { - if (object == null) { - String errMsg = "HashStoreClient - " + argument + " cannot be null."; - throw new NullPointerException(errMsg); - } - } - /** * Calculate the hex digest of a pid's respective object with the given algorithm * diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 99e5df22..ba5788de 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -94,7 +94,9 @@ enum HashStoreProperties { public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { logFileHashStore.info("FileHashStore - Call received to instantiate FileHashStore"); - ensureNotNull(hashstoreProperties, "hashstoreProperties", "FileHashStore - constructor"); + FileHashStoreUtility.ensureNotNull( + hashstoreProperties, "hashstoreProperties", "FileHashStore - constructor" + ); // Get properties // Note - Paths.get() throws NullPointerException if arg is null @@ -206,9 +208,10 @@ protected void verifyHashStoreProperties( // Ensure algorithm supplied is not empty, not null and supported validateAlgorithm(storeAlgorithm); // Review metadata format (formatId) - ensureNotNull( + FileHashStoreUtility.ensureNotNull( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); + checkForEmptyString( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); @@ -412,8 +415,10 @@ public ObjectInfo storeObject( ); // Begin input validation - ensureNotNull(object, "object", "storeObject"); - ensureNotNull(pid, "pid", "storeObject"); + FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); + // ensureNotNull(object, "object", "storeObject"); + // ensureNotNull(pid, "pid", "storeObject"); checkForEmptyString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { @@ -520,8 +525,8 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA ); // Begin input validation - ensureNotNull(object, "object", "storeObject"); - ensureNotNull(pid, "pid", "storeObject"); + FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); checkForEmptyString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { @@ -544,8 +549,8 @@ public ObjectInfo storeObject( ); // Begin input validation - ensureNotNull(object, "object", "storeObject"); - ensureNotNull(pid, "pid", "storeObject"); + FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); checkForEmptyString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (checksumAlgorithm != null) { @@ -567,8 +572,8 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) ); // Begin input validation - ensureNotNull(object, "object", "storeObject"); - ensureNotNull(pid, "pid", "storeObject"); + FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); checkForEmptyString(pid, "pid", "storeObject"); checkNotNegative(objSize, "storeObject"); @@ -584,8 +589,8 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) + ", with formatId: " + formatId ); // Validate input parameters - ensureNotNull(metadata, "metadata", "storeMetadata"); - ensureNotNull(pid, "pid", "storeMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); checkForEmptyString(pid, "pid", "storeMetadata"); // Determine metadata namespace @@ -678,8 +683,8 @@ public String storeMetadata(InputStream metadata, String pid) throws IOException + ", with default namespace." ); // Validate input parameters - ensureNotNull(metadata, "metadata", "storeMetadata"); - ensureNotNull(pid, "pid", "storeMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); checkForEmptyString(pid, "pid", "storeMetadata"); return syncPutMetadata(metadata, pid, METADATA_NAMESPACE); @@ -692,7 +697,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, "FileHashStore.retrieveObject - Called to retrieve object for pid: " + pid ); // Validate input parameters - ensureNotNull(pid, "pid", "retrieveObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); checkForEmptyString(pid, "pid", "retrieveObject"); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -731,9 +736,9 @@ public InputStream retrieveMetadata(String pid, String formatId) throws Exceptio + " with formatId: " + formatId ); // Validate input parameters - ensureNotNull(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); checkForEmptyString(pid, "pid", "retrieveMetadata"); - ensureNotNull(formatId, "formatId", "retrieveMetadata"); + FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); checkForEmptyString(formatId, "formatId", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -775,7 +780,7 @@ public InputStream retrieveMetadata(String pid) throws Exception { + " with default metadata namespace: " + METADATA_NAMESPACE ); // Validate input parameters - ensureNotNull(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); checkForEmptyString(pid, "pid", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -817,7 +822,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou "FileHashStore.deleteObject - Called to delete object for pid: " + pid ); // Validate input parameters - ensureNotNull(pid, "pid", "deleteObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); checkForEmptyString(pid, "pid", "deleteObject"); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -846,9 +851,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid ); // Validate input parameters - ensureNotNull(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); checkForEmptyString(pid, "pid", "deleteMetadata"); - ensureNotNull(formatId, "formatId", "deleteMetadata"); + FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); checkForEmptyString(formatId, "formatId", "deleteMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -879,7 +884,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotF "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid ); // Validate input parameters - ensureNotNull(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); checkForEmptyString(pid, "pid", "deleteMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -908,7 +913,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE "FileHashStore.getHexDigest - Called to calculate hex digest for pid: " + pid ); - ensureNotNull(pid, "pid", "getHexDigest"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); checkForEmptyString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); @@ -973,8 +978,8 @@ protected ObjectInfo putObject( logFileHashStore.debug("FileHashStore.putObject - Called to put object for pid: " + pid); // Begin input validation - ensureNotNull(object, "object", "putObject"); - ensureNotNull(pid, "pid", "putObject"); + FileHashStoreUtility.ensureNotNull(object, "object", "putObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "putObject"); checkForEmptyString(pid, "pid", "putObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { @@ -1133,7 +1138,7 @@ private void validateTmpObject( */ protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { - ensureNotNull(algorithm, "algorithm", "validateAlgorithm"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "putObject"); checkForEmptyString(algorithm, "algorithm", "validateAlgorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); @@ -1159,7 +1164,9 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor throws NoSuchAlgorithmException { // If checksum is supplied, checksumAlgorithm cannot be empty if (checksum != null && !checksum.trim().isEmpty()) { - ensureNotNull(checksumAlgorithm, "algorithm", "verifyChecksumParameters"); + FileHashStoreUtility.ensureNotNull( + checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" + ); checkForEmptyString(checksumAlgorithm, "algorithm", "verifyChecksumParameters"); } // Ensure algorithm is supported, not null and not empty @@ -1168,7 +1175,9 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor requestValidation = validateAlgorithm(checksumAlgorithm); // Ensure checksum is not null or empty if checksumAlgorithm is supplied in if (requestValidation) { - ensureNotNull(checksum, "checksum", "verifyChecksumParameters"); + FileHashStoreUtility.ensureNotNull( + checksum, "checksum", "verifyChecksumParameters" + ); assert checksum != null; checkForEmptyString(checksum, "checksum", "verifyChecksumParameters"); } @@ -1187,9 +1196,9 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor */ protected String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { - ensureNotNull(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); checkForEmptyString(pid, "pid", "getPidHexDigest"); - ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); validateAlgorithm(algorithm); @@ -1409,7 +1418,8 @@ protected void move(File source, File target, String entity) throws IOException, + source + ", to target: " + target ); // Validate input parameters - ensureNotNull(entity, "entity", "move"); + FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); + checkForEmptyString(entity, "entity", "move"); // Entity is only used when checking for an existence of an object if (entity.equals("object") && target.exists()) { @@ -1474,8 +1484,8 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) ); // Validate input parameters - ensureNotNull(metadata, "metadata", "putMetadata"); - ensureNotNull(pid, "pid", "putMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); checkForEmptyString(pid, "pid", "putMetadata"); // Determine metadata namespace @@ -1634,22 +1644,6 @@ private boolean isDirectoryEmpty(Path directory) throws IOException { } } - /** - * Checks whether a given object is null and throws an exception if so - * - * @param object Object to check - * @param argument Value that is being checked - * @param method Calling method - */ - private void ensureNotNull(Object object, String argument, String method) { - if (object == null) { - String errMsg = "FileHashStore.isStringNullOrEmpty - Calling Method: " + method + "(): " - + argument + " cannot be null."; - logFileHashStore.error(errMsg); - throw new NullPointerException(errMsg); - } - } - /** * Checks whether a given string is empty and throws an exception if so * diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java new file mode 100644 index 00000000..4b140aca --- /dev/null +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -0,0 +1,22 @@ +package org.dataone.hashstore.filehashstore; + +/** + * FileHashStoreUtility is a utility class that provides shared functionality between FileHashStore + * and related classes. + */ +public class FileHashStoreUtility { + /** + * Checks whether a given object is null and throws an exception if so + * + * @param object Object to check + * @param argument Value that is being checked + * @param method Calling method or class + */ + public static void ensureNotNull(Object object, String argument, String method) { + if (object == null) { + String errMsg = "FileHashStoreUtility.isStringNullOrEmpty - Calling Method: " + method + + "(): " + argument + " cannot be null."; + throw new NullPointerException(errMsg); + } + } +} From 7a3e6fbcf13b746005d57d9654a07969b44fdd4e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 12:03:27 -0700 Subject: [PATCH 060/553] Extract 'calculateHexDigest' method to 'FileHashStoreUtility' class and refactor 'Client' and 'FileHashStore' --- .../java/org/dataone/hashstore/Client.java | 42 +++---------------- .../filehashstore/FileHashStore.java | 38 +---------------- .../filehashstore/FileHashStoreUtility.java | 42 +++++++++++++++++++ 3 files changed, 49 insertions(+), 73 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 5893f351..2bfcf970 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -10,8 +10,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -22,8 +20,6 @@ import java.sql.ResultSet; import java.sql.Statement; -import javax.xml.bind.DatatypeConverter; - import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import org.apache.commons.cli.CommandLine; @@ -573,7 +569,7 @@ private static void retrieveAndValidateObjs(List> resultObjL // Get hex digest System.out.println("Calculating hex digest with algorithm: " + algorithm); - String streamDigest = calculateHexDigest(objStream, algorithm); + String streamDigest = FileHashStoreUtility.calculateHexDigest(objStream, algorithm); objStream.close(); // If checksums don't match, write a .txt file @@ -724,7 +720,9 @@ private static void retrieveAndValidateMetadata(List> result // Get hex digest System.out.println("Calculating hex digest with algorithm: " + algorithm); - String streamDigest = calculateHexDigest(metadataStream, algorithm); + String streamDigest = FileHashStoreUtility.calculateHexDigest( + metadataStream, algorithm + ); metadataStream.close(); // If checksums don't match, write a .txt file @@ -812,37 +810,7 @@ private static void deleteMetadataFromStore(List> resultObjL } - // Utility methods - - /** - * Calculate the hex digest of a pid's respective object with the given algorithm - * - * @param stream Path to object - * @param algorithm Hash algorithm to use - * @return Hex digest of the pid's respective object - * @throws IOException Error when calculating hex digest - * @throws NoSuchAlgorithmException Algorithm not supported - */ - private static String calculateHexDigest(InputStream stream, String algorithm) - throws IOException, NoSuchAlgorithmException { - MessageDigest mdObject = MessageDigest.getInstance(algorithm); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = stream.read(buffer)) != -1) { - mdObject.update(buffer, 0, bytesRead); - - } - // Close stream - stream.close(); - - } catch (IOException ioe) { - ioe.printStackTrace(); - - } - // mdObjectHexDigest - return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); - } + // Utility methods specific to Client /** * Format an algorithm string value to be compatible with MessageDigest class diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ba5788de..f16ce9e5 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -928,7 +928,8 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE throw new FileNotFoundException(errMsg); } - String mdObjectHexDigest = calculateHexDigest(objRealPath, algorithm); + InputStream dataStream = Files.newInputStream(objRealPath); + String mdObjectHexDigest = FileHashStoreUtility.calculateHexDigest(dataStream, algorithm); logFileHashStore.info( "FileHashStore.getHexDigest - Hex digest calculated for pid: " + pid + ", with hex digest value: " + mdObjectHexDigest @@ -1675,39 +1676,4 @@ private void checkNotNegative(long object, String method) { } } - /** - * Calculate the hex digest of a pid's respective object with the given algorithm - * - * @param objectPath Path to object - * @param algorithm Hash algorithm to use - * @return Hex digest of the pid's respective object - * @throws IOException Error when calculating hex digest - * @throws NoSuchAlgorithmException Algorithm not supported - */ - private String calculateHexDigest(Path objectPath, String algorithm) throws IOException, - NoSuchAlgorithmException { - MessageDigest mdObject = MessageDigest.getInstance(algorithm); - try { - InputStream dataStream = Files.newInputStream(objectPath); - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = dataStream.read(buffer)) != -1) { - mdObject.update(buffer, 0, bytesRead); - - } - // Close dataStream - dataStream.close(); - - } catch (IOException ioe) { - String errMsg = "FileHashStore.getHexDigest - Unexpected IOException encountered: " - + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw ioe; - - } - // mdObjectHexDigest - return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); - - } - } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 4b140aca..3b074e1e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -1,10 +1,18 @@ package org.dataone.hashstore.filehashstore; +import java.io.IOException; +import java.io.InputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import javax.xml.bind.DatatypeConverter; + /** * FileHashStoreUtility is a utility class that provides shared functionality between FileHashStore * and related classes. */ public class FileHashStoreUtility { + /** * Checks whether a given object is null and throws an exception if so * @@ -19,4 +27,38 @@ public static void ensureNotNull(Object object, String argument, String method) throw new NullPointerException(errMsg); } } + + /** + * Calculate the hex digest of a pid's respective object with the given algorithm + * + * @param dataStream InputStream to object + * @param algorithm Hash algorithm to use + * @return Hex digest of the pid's respective object + * @throws IOException Error when calculating hex digest + * @throws NoSuchAlgorithmException Algorithm not supported + */ + public static String calculateHexDigest(InputStream dataStream, String algorithm) + throws IOException, NoSuchAlgorithmException { + MessageDigest mdObject = MessageDigest.getInstance(algorithm); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = dataStream.read(buffer)) != -1) { + mdObject.update(buffer, 0, bytesRead); + + } + // Close dataStream + dataStream.close(); + + } catch (IOException ioe) { + String errMsg = + "FileHashStoreUtility.calculateHexDigest - Unexpected IOException encountered: " + + ioe.getMessage(); + throw new IOException(errMsg); + + } + // mdObjectHexDigest + return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); + + } } From 7ef3826a13695bef7c6d9c394792fc1ff4306006 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 12:20:51 -0700 Subject: [PATCH 061/553] Extract generic methods from 'FileHashStore' to 'FileHashStoreUtility' class --- .../filehashstore/FileHashStore.java | 151 +++++++----------- .../filehashstore/FileHashStoreUtility.java | 57 ++++++- 2 files changed, 110 insertions(+), 98 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f16ce9e5..d0962416 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -24,7 +24,6 @@ import java.util.Objects; import java.util.Properties; import java.util.Random; -import java.util.stream.Stream; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -211,8 +210,7 @@ protected void verifyHashStoreProperties( FileHashStoreUtility.ensureNotNull( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); - - checkForEmptyString( + FileHashStoreUtility.checkForEmptyString( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); @@ -249,7 +247,7 @@ protected void verifyHashStoreProperties( ); if (Files.isDirectory(storePath)) { - if (!isDirectoryEmpty(storePath)) { + if (!FileHashStoreUtility.isDirectoryEmpty(storePath)) { String errMsg = "FileHashStore - Missing 'hashstore.yaml' but directories" + " and/or objects found."; logFileHashStore.fatal(errMsg); @@ -417,19 +415,21 @@ public ObjectInfo storeObject( // Begin input validation FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - // ensureNotNull(object, "object", "storeObject"); - // ensureNotNull(pid, "pid", "storeObject"); - checkForEmptyString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - checkForEmptyString(additionalAlgorithm, "additionalAlgorithm", "storeObject"); + FileHashStoreUtility.checkForEmptyString( + additionalAlgorithm, "additionalAlgorithm", "storeObject" + ); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { - checkForEmptyString(checksumAlgorithm, "checksumAlgorithm", "storeObject"); + FileHashStoreUtility.checkForEmptyString( + checksumAlgorithm, "checksumAlgorithm", "storeObject" + ); validateAlgorithm(checksumAlgorithm); } - checkNotNegative(objSize, "storeObject"); + FileHashStoreUtility.checkNotNegative(objSize, "storeObject"); return syncPubObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize @@ -527,10 +527,12 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA // Begin input validation FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - checkForEmptyString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - checkForEmptyString(additionalAlgorithm, "additionalAlgorithm", "storeObject"); + FileHashStoreUtility.checkForEmptyString( + additionalAlgorithm, "additionalAlgorithm", "storeObject" + ); validateAlgorithm(additionalAlgorithm); } @@ -551,10 +553,12 @@ public ObjectInfo storeObject( // Begin input validation FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - checkForEmptyString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (checksumAlgorithm != null) { - checkForEmptyString(checksumAlgorithm, "checksumAlgorithm", "storeObject"); + FileHashStoreUtility.checkForEmptyString( + checksumAlgorithm, "checksumAlgorithm", "storeObject" + ); validateAlgorithm(checksumAlgorithm); } @@ -574,8 +578,8 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) // Begin input validation FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - checkForEmptyString(pid, "pid", "storeObject"); - checkNotNegative(objSize, "storeObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkNotNegative(objSize, "storeObject"); return syncPubObject(object, pid, null, null, null, objSize); } @@ -591,7 +595,7 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - checkForEmptyString(pid, "pid", "storeMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); // Determine metadata namespace // If no formatId is supplied, use the default namespace to store metadata @@ -599,7 +603,7 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) if (formatId == null) { checkedFormatId = METADATA_NAMESPACE; } else { - checkForEmptyString(formatId, "formatId", "storeMetadata"); + FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "storeMetadata"); checkedFormatId = formatId; } @@ -685,7 +689,7 @@ public String storeMetadata(InputStream metadata, String pid) throws IOException // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - checkForEmptyString(pid, "pid", "storeMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); return syncPutMetadata(metadata, pid, METADATA_NAMESPACE); } @@ -698,7 +702,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); - checkForEmptyString(pid, "pid", "retrieveObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); // Get permanent address of the pid by calculating its sha-256 hex digest Path objRealPath = getRealPath(pid, "object", null); @@ -737,9 +741,9 @@ public InputStream retrieveMetadata(String pid, String formatId) throws Exceptio ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - checkForEmptyString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); - checkForEmptyString(formatId, "formatId", "retrieveMetadata"); + FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest Path metadataCidPath = getRealPath(pid, "metadata", formatId); @@ -781,7 +785,7 @@ public InputStream retrieveMetadata(String pid) throws Exception { ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - checkForEmptyString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest Path metadataCidPath = getRealPath(pid, "metadata", METADATA_NAMESPACE); @@ -823,7 +827,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); - checkForEmptyString(pid, "pid", "deleteObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteObject"); // Get permanent address of the pid by calculating its sha-256 hex digest Path objRealPath = getRealPath(pid, "object", null); @@ -852,9 +856,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - checkForEmptyString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); - checkForEmptyString(formatId, "formatId", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest Path metadataCidPath = getRealPath(pid, "metadata", formatId); @@ -885,7 +889,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotF ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - checkForEmptyString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest Path metadataCidPath = getRealPath(pid, "metadata", METADATA_NAMESPACE); @@ -914,7 +918,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE ); FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); - checkForEmptyString(pid, "pid", "getHexDigest"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); // Get permanent address of the pid by calculating its sha-256 hex digest @@ -981,17 +985,21 @@ protected ObjectInfo putObject( // Begin input validation FileHashStoreUtility.ensureNotNull(object, "object", "putObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putObject"); - checkForEmptyString(pid, "pid", "putObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "putObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - checkForEmptyString(additionalAlgorithm, "additionalAlgorithm", "putObject"); + FileHashStoreUtility.checkForEmptyString( + additionalAlgorithm, "additionalAlgorithm", "putObject" + ); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { - checkForEmptyString(checksumAlgorithm, "checksumAlgorithm", "putObject"); + FileHashStoreUtility.checkForEmptyString( + checksumAlgorithm, "checksumAlgorithm", "putObject" + ); validateAlgorithm(checksumAlgorithm); } - checkNotNegative(objSize, "putObject"); + FileHashStoreUtility.checkNotNegative(objSize, "putObject"); // If validation is desired, checksumAlgorithm and checksum must both be present boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); @@ -1140,7 +1148,7 @@ private void validateTmpObject( protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "putObject"); - checkForEmptyString(algorithm, "algorithm", "validateAlgorithm"); + FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "validateAlgorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { @@ -1168,7 +1176,9 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" ); - checkForEmptyString(checksumAlgorithm, "algorithm", "verifyChecksumParameters"); + FileHashStoreUtility.checkForEmptyString( + checksumAlgorithm, "algorithm", "verifyChecksumParameters" + ); } // Ensure algorithm is supported, not null and not empty boolean requestValidation = false; @@ -1180,7 +1190,9 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor checksum, "checksum", "verifyChecksumParameters" ); assert checksum != null; - checkForEmptyString(checksum, "checksum", "verifyChecksumParameters"); + FileHashStoreUtility.checkForEmptyString( + checksum, "checksum", "verifyChecksumParameters" + ); } } return requestValidation; @@ -1198,9 +1210,9 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor protected String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); - checkForEmptyString(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "getPidHexDigest"); FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); - checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); validateAlgorithm(algorithm); MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); @@ -1302,13 +1314,13 @@ protected Map writeToTmpFileAndGenerateChecksums( File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { if (additionalAlgorithm != null) { - checkForEmptyString( + FileHashStoreUtility.checkForEmptyString( additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { - checkForEmptyString( + FileHashStoreUtility.checkForEmptyString( checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(checksumAlgorithm); @@ -1421,7 +1433,7 @@ protected void move(File source, File target, String entity) throws IOException, // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); - checkForEmptyString(entity, "entity", "move"); + FileHashStoreUtility.checkForEmptyString(entity, "entity", "move"); // Entity is only used when checking for an existence of an object if (entity.equals("object") && target.exists()) { String errMsg = "FileHashStore.move - File already exists for target: " + target; @@ -1487,7 +1499,7 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); - checkForEmptyString(pid, "pid", "putMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "putMetadata"); // Determine metadata namespace // If no formatId is supplied, use the default namespace to store metadata @@ -1495,7 +1507,7 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) if (formatId == null) { checkedFormatId = METADATA_NAMESPACE; } else { - checkForEmptyString(formatId, "formatId", "putMetadata"); + FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "putMetadata"); checkedFormatId = formatId; } @@ -1606,7 +1618,7 @@ private void deleteObjectAndParentDirectories(Path objectAbsPath, String pid, St // Then delete any empty directories Path parent = objectAbsPath.getParent(); - while (parent != null && isDirectoryEmpty(parent)) { + while (parent != null && FileHashStoreUtility.isDirectoryEmpty(parent)) { if (parent.equals(METADATA_STORE_DIRECTORY)) { // Do not delete the metadata store directory break; @@ -1623,57 +1635,4 @@ private void deleteObjectAndParentDirectories(Path objectAbsPath, String pid, St } } } - - /** - * Checks whether a directory is empty or contains files. If a file is found, it returns true. - * - * @param directory Directory to check - * @return True if a file is found or the directory is empty, False otherwise - * @throws IOException If I/O occurs when accessing directory - */ - private boolean isDirectoryEmpty(Path directory) throws IOException { - try (Stream stream = Files.list(directory)) { - // The findFirst() method is called on the stream created from the given - // directory to retrieve the first element. If the stream is empty (i.e., the - // directory is empty), findFirst() will return an empty Optional. - // - // The isPresent() method is called on the Optional returned by - // findFirst(). If the Optional contains a value (i.e., an element was found), - // isPresent() returns true. If the Optional is empty (i.e., the stream is - // empty), isPresent() returns false. - return !stream.findFirst().isPresent(); - } - } - - /** - * Checks whether a given string is empty and throws an exception if so - * - * @param string String to check - * @param argument Value that is being checked - * @param method Calling method - */ - private void checkForEmptyString(String string, String argument, String method) { - if (string.trim().isEmpty()) { - String errMsg = "FileHashStore.isStringNullOrEmpty - Calling Method: " + method + "(): " - + argument + " cannot be empty."; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } - } - - /** - * Checks whether a given long integer is greater than 0 - * - * @param object Object to check - * @param method Calling method - */ - private void checkNotNegative(long object, String method) { - if (object < 0) { - String errMsg = "FileHashStore.isObjectGreaterThanZero - Calling Method: " + method - + "(): objSize cannot be less than 0."; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } - } - } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 3b074e1e..8c0da52e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -2,14 +2,17 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.stream.Stream; import javax.xml.bind.DatatypeConverter; /** - * FileHashStoreUtility is a utility class that provides shared functionality between FileHashStore - * and related classes. + * FileHashStoreUtility is a utility class that encapsulates generic or shared functionality + * in FileHashStore and/or related classes. */ public class FileHashStoreUtility { @@ -59,6 +62,56 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm } // mdObjectHexDigest return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); + } + + /** + * Checks whether a directory is empty or contains files. If a file is found, it returns true. + * + * @param directory Directory to check + * @return True if a file is found or the directory is empty, False otherwise + * @throws IOException If I/O occurs when accessing directory + */ + public static boolean isDirectoryEmpty(Path directory) throws IOException { + try (Stream stream = Files.list(directory)) { + // The findFirst() method is called on the stream created from the given + // directory to retrieve the first element. If the stream is empty (i.e., the + // directory is empty), findFirst() will return an empty Optional. + // + // The isPresent() method is called on the Optional returned by + // findFirst(). If the Optional contains a value (i.e., an element was found), + // isPresent() returns true. If the Optional is empty (i.e., the stream is + // empty), isPresent() returns false. + return !stream.findFirst().isPresent(); + } + } + + /** + * Checks whether a given string is empty and throws an exception if so + * + * @param string String to check + * @param argument Value that is being checked + * @param method Calling method + */ + public static void checkForEmptyString(String string, String argument, String method) { + if (string.trim().isEmpty()) { + String errMsg = "FileHashStoreUtility.isStringNullOrEmpty - Calling Method: " + method + + "(): " + argument + " cannot be empty."; + throw new IllegalArgumentException(errMsg); + } + } + /** + * Checks whether a given long integer is greater than 0 + * + * @param object Object to check + * @param method Calling method + */ + public static void checkNotNegative(long object, String method) { + if (object < 0) { + String errMsg = "FileHashStoreUtility.isObjectGreaterThanZero - Calling Method: " + + method + "(): objSize cannot be less than 0."; + throw new IllegalArgumentException(errMsg); + } } + } From 55867d167d2f27c18ab7896e3d093d70857d975e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 13:02:29 -0700 Subject: [PATCH 062/553] Update pom.xml for maven-jar-plugin and jackson-dataformat-yaml version --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 445792e3..6b4a8a57 100644 --- a/pom.xml +++ b/pom.xml @@ -44,7 +44,7 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml - 2.12.2 + 2.15.2 com.fasterxml.jackson.core @@ -115,7 +115,7 @@ maven-jar-plugin - 2.4 + 3.3.0 default-jar From 070dbdf90f212eaaee40ffd1983b69f36671010e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 13:03:52 -0700 Subject: [PATCH 063/553] Remove redundant javadoc return statements from HashStore interface --- src/main/java/org/dataone/hashstore/HashStore.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index e0a08c38..20738895 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -135,7 +135,6 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * from HashStore using a given persistent identifier. * * @param pid Authority-based identifier - * @return True if successful * @throws IllegalArgumentException When pid is null or empty * @throws FileNotFoundException When requested pid has no associated object * @throws IOException I/O error when deleting empty directories @@ -150,7 +149,6 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * * @param pid Authority-based identifier * @param formatId Metadata namespace/format - * @return True if successfulÏ * @throws IllegalArgumentException When pid or formatId is null or empty * @throws FileNotFoundException When requested pid has no metadata * @throws IOException I/O error when deleting empty directories From d31f4a003456cae3fa8b7e383bd463c310a7f4b0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 14:38:13 -0700 Subject: [PATCH 064/553] Update 'pom.xml' url with link to HashStore-java repo --- pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 6b4a8a57..b30eb3ef 100644 --- a/pom.xml +++ b/pom.xml @@ -10,8 +10,7 @@ 1.0-SNAPSHOT hashstore - - http://www.example.com + https://github.com/DataONEorg/hashstore-java UTF-8 From d9abd3559cfe8b0067c354dedb39d6119486a718 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Sep 2023 14:46:45 -0700 Subject: [PATCH 065/553] Remove redundant assert statement in 'verifyChecksumParameters' --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d0962416..dd8e3044 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1189,7 +1189,6 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksum, "checksum", "verifyChecksumParameters" ); - assert checksum != null; FileHashStoreUtility.checkForEmptyString( checksum, "checksum", "verifyChecksumParameters" ); From ad1f98b78e40cd2a14041e2bd789b96019a5920c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 12:35:23 -0700 Subject: [PATCH 066/553] Update README.md client usage examples --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index af76b859..e7fe736b 100644 --- a/README.md +++ b/README.md @@ -32,35 +32,35 @@ We also maintain a parallel [Python-based version of HashStore](https://github.c $ mvn clean package -Dmaven.test.skip=true # Get help -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -h +$ java -cp ./target/.../hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -h # Step 2: ## Create a HashStore (long option) -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/.../hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 ## Create a HashStore (short option) -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 # Get the checksum of a data object -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 # Store a data object -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Retrieve a data object -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrieveobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Delete a data object -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deleteobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -$ java -cp /path/to/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 ``` ## License From e6caacf054eeea0edc4a8f88b6eaacfcb1a77275 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 13:24:24 -0700 Subject: [PATCH 067/553] Update pom.xml with version upgrade from JUnit4 to JUnit5 --- pom.xml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index b30eb3ef..29193388 100644 --- a/pom.xml +++ b/pom.xml @@ -30,9 +30,15 @@ 1.5.0 - junit - junit - 4.13.2 + org.junit.jupiter + junit-jupiter-api + 5.8.2 + test + + + org.junit.jupiter + junit-jupiter-engine + 5.8.2 test From eb09e814ec2f02725bd6c79e8552968123e60951 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 13:24:49 -0700 Subject: [PATCH 068/553] Refactor 'FileHashStoreInterfaceTest' test class to use JUnit5 syntax --- .../FileHashStoreInterfaceTest.java | 422 +++++++++++------- 1 file changed, 249 insertions(+), 173 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 13d737d9..e7c91aaa 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -24,12 +24,11 @@ import org.dataone.hashstore.ObjectInfo; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.testdata.TestDataHarness; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.*; /** * Test class for FileHashStore HashStoreInterface override methods @@ -42,9 +41,9 @@ public class FileHashStoreInterfaceTest { /** * Initialize FileHashStore before each test to creates tmp directories */ - @Before + @BeforeEach public void initializeFileHashStore() { - Path rootDirectory = tempFolder.getRoot().toPath().resolve("metacat"); + Path rootDirectory = tempFolder.resolve("metacat"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -71,8 +70,8 @@ public void initializeFileHashStore() { /** * Temporary folder for tests to run in */ - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); + @TempDir + public Path tempFolder; /** * Utility method to get absolute path of a given object @@ -157,37 +156,43 @@ public void storeObject_hexDigests() throws Exception { /** * Check that store object throws exception when object is null */ - @Test(expected = NullPointerException.class) + @Test public void storeObject_null() throws Exception { - String pid = "j.tao.1700.1"; - fileHashStore.storeObject(null, pid, null, null, null, 0); + assertThrows(NullPointerException.class, () -> { + String pid = "j.tao.1700.1"; + fileHashStore.storeObject(null, pid, null, null, null, 0); + }); } /** * Check that store object throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void storeObject_nullPid() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + assertThrows(NullPointerException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, null, null, null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, null, null, null, null, 0); + }); } } /** * Check that store object throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void storeObject_emptyPid() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, "", null, null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, "", null, null, null, 0); + }); } } @@ -290,45 +295,51 @@ public void storeObject_correctChecksumValue() throws Exception { /** * Verify exception thrown when checksum provided does not match */ - @Test(expected = IllegalArgumentException.class) + @Test public void storeObject_incorrectChecksumValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - String checksumIncorrect = - "aaf9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; + String checksumIncorrect = + "aaf9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", 0); + }); } /** * Verify exception thrown when checksum is empty and algorithm supported */ - @Test(expected = IllegalArgumentException.class) + @Test public void storeObject_emptyChecksumValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - String checksumEmpty = ""; + String checksumEmpty = ""; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", 0); + }); } /** * Verify exception thrown when checksum is null and algorithm supported */ - @Test(expected = NullPointerException.class) + @Test public void storeObject_nullChecksumValue() throws Exception { - // Get single test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(NullPointerException.class, () -> { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", 0); + }); } /** @@ -354,48 +365,56 @@ public void storeObject_objSizeCorrect() throws Exception { /** * Check that store object throws exception when incorrect file size provided */ - @Test(expected = IllegalArgumentException.class) + @Test public void storeObject_objSizeIncorrect() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, 1000); + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, 1000 + ); - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.getSize()); + }); } } /** * Verify exception thrown when unsupported additional algorithm provided */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void storeObject_invalidAlgorithm() throws Exception { - // Get single test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(NoSuchAlgorithmException.class, () -> { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "SM2", null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, "SM2", null, null, 0); + }); } /** * Check that store object throws FileAlreadyExists error when storing duplicate object */ - @Test(expected = PidObjectExistsException.class) + @Test public void storeObject_duplicate() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + assertThrows(PidObjectExistsException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, 0); - InputStream dataStreamDup = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStreamDup, pid, null, null, null, 0); + InputStream dataStreamDup = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStreamDup, pid, null, null, null, 0); + }); } } @@ -707,61 +726,69 @@ public void storeMetadata_fileSize() throws Exception { /** * Test storeMetadata throws exception when metadata is null */ - @Test(expected = NullPointerException.class) + @Test public void storeMetadata_metadataNull() throws Exception { for (String pid : testData.pidList) { - fileHashStore.storeMetadata(null, pid, null); + assertThrows(NullPointerException.class, () -> { + fileHashStore.storeMetadata(null, pid, null); + }); } } /** * Test storeMetadata throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void storeMetadata_pidNull() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); + assertThrows(NullPointerException.class, () -> { + String pidFormatted = pid.replace("/", "_"); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, null, null); + fileHashStore.storeMetadata(metadataStream, null, null); + }); } } /** * Test storeMetadata throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void storeMetadata_pidEmpty() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, "", null); + fileHashStore.storeMetadata(metadataStream, "", null); + }); } } /** * Test storeMetadata throws exception when pid is empty with spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void storeMetadata_pidEmptySpaces() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, " ", null); + fileHashStore.storeMetadata(metadataStream, " ", null); + }); } } @@ -862,37 +889,45 @@ public void retrieveObject() throws Exception { /** * Check that retrieveObject throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void retrieveObject_pidNull() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveObject(null); - pidInputStream.close(); + assertThrows(NullPointerException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveObject(null); + pidInputStream.close(); + }); } /** * Check that retrieveObject throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void retrieveObject_pidEmpty() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveObject(""); - pidInputStream.close(); + assertThrows(IllegalArgumentException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveObject(""); + pidInputStream.close(); + }); } /** * Check that retrieveObject throws exception when pid is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void retrieveObject_pidEmptySpaces() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveObject(" "); - pidInputStream.close(); + assertThrows(IllegalArgumentException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveObject(" "); + pidInputStream.close(); + }); } /** * Check that retrieveObject throws exception when file is not found */ - @Test(expected = FileNotFoundException.class) + @Test public void retrieveObject_pidNotFound() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveObject("dou.2023.hs.1"); - pidInputStream.close(); + assertThrows(FileNotFoundException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveObject("dou.2023.hs.1"); + pidInputStream.close(); + }); } /** @@ -986,75 +1021,84 @@ public void retrieveMetadata_overload() throws Exception { /** * Check that retrieveMetadata throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void retrieveMetadata_pidNull() throws Exception { - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata(null, storeFormatId); - pidInputStream.close(); - + assertThrows(NullPointerException.class, () -> { + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata(null, storeFormatId); + pidInputStream.close(); + }); } /** * Check that retrieveMetadata throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void retrieveMetadata_pidEmpty() throws Exception { - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata("", storeFormatId); - pidInputStream.close(); - + assertThrows(IllegalArgumentException.class, () -> { + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata("", storeFormatId); + pidInputStream.close(); + }); } /** * Check that retrieveMetadata throws exception when pid is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void retrieveMetadata_pidEmptySpaces() throws Exception { - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata(" ", storeFormatId); - pidInputStream.close(); - + assertThrows(IllegalArgumentException.class, () -> { + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata(" ", storeFormatId); + pidInputStream.close(); + }); } /** * Check that retrieveMetadata throws exception when format is null */ - @Test(expected = NullPointerException.class) + @Test public void retrieveMetadata_formatNull() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", null); - pidInputStream.close(); - + assertThrows(NullPointerException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", null); + pidInputStream.close(); + }); } /** * Check that retrieveMetadata throws exception when format is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void retrieveMetadata_formatEmpty() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", ""); - pidInputStream.close(); - + assertThrows(IllegalArgumentException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", ""); + pidInputStream.close(); + }); } /** * Check that retrieveMetadata throws exception when format is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void retrieveMetadata_formatEmptySpaces() throws Exception { - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", " "); - pidInputStream.close(); - + assertThrows(IllegalArgumentException.class, () -> { + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", " "); + pidInputStream.close(); + }); } /** * Check that retrieveMetadata throws exception when file is not found */ - @Test(expected = FileNotFoundException.class) + @Test public void retrieveMetadata_pidNotFound() throws Exception { - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", storeFormatId); - pidInputStream.close(); - + assertThrows(FileNotFoundException.class, () -> { + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata( + "dou.2023.hs.1", storeFormatId + ); + pidInputStream.close(); + }); } /** @@ -1141,33 +1185,41 @@ public void deleteObject() throws Exception { /** * Confirm that deleteObject throws exception when associated pid obj not found */ - @Test(expected = FileNotFoundException.class) + @Test public void deleteObject_pidNotFound() throws Exception { - fileHashStore.deleteObject("dou.2023.hashstore.1"); + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.deleteObject("dou.2023.hashstore.1"); + }); } /** * Confirm that deleteObject throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void deleteObject_pidNull() throws Exception { - fileHashStore.deleteObject(null); + assertThrows(NullPointerException.class, () -> { + fileHashStore.deleteObject(null); + }); } /** * Confirm that deleteObject throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void deleteObject_pidEmpty() throws Exception { - fileHashStore.deleteObject(""); + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.deleteObject(""); + }); } /** * Confirm that deleteObject throws exception when pid is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void deleteObject_pidEmptySpaces() throws Exception { - fileHashStore.deleteObject(" "); + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.deleteObject(" "); + }); } /** @@ -1229,64 +1281,78 @@ public void deleteMetadata_overload() throws Exception { /** * Confirm that deleteMetadata throws exception when associated pid obj not found */ - @Test(expected = FileNotFoundException.class) + @Test public void deleteMetadata_pidNotFound() throws Exception { - String formatId = "http://hashstore.tests/types/v1.0"; - fileHashStore.deleteMetadata("dou.2023.hashstore.1", formatId); + assertThrows(FileNotFoundException.class, () -> { + String formatId = "http://hashstore.tests/types/v1.0"; + fileHashStore.deleteMetadata("dou.2023.hashstore.1", formatId); + }); } /** * Confirm that deleteMetadata throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void deleteMetadata_pidNull() throws Exception { - String formatId = "http://hashstore.tests/types/v1.0"; - fileHashStore.deleteMetadata(null, formatId); + assertThrows(NullPointerException.class, () -> { + String formatId = "http://hashstore.tests/types/v1.0"; + fileHashStore.deleteMetadata(null, formatId); + }); } /** * Confirm that deleteMetadata throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void deleteMetadata_pidEmpty() throws Exception { - String formatId = "http://hashstore.tests/types/v1.0"; - fileHashStore.deleteMetadata("", formatId); + assertThrows(IllegalArgumentException.class, () -> { + String formatId = "http://hashstore.tests/types/v1.0"; + fileHashStore.deleteMetadata("", formatId); + }); } /** * Confirm that deleteMetadata throws exception when pid is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void deleteMetadata_pidEmptySpaces() throws Exception { - String formatId = "http://hashstore.tests/types/v1.0"; - fileHashStore.deleteMetadata(" ", formatId); + assertThrows(IllegalArgumentException.class, () -> { + String formatId = "http://hashstore.tests/types/v1.0"; + fileHashStore.deleteMetadata(" ", formatId); + }); } /** * Confirm that deleteMetadata throws exception when formatId is null */ - @Test(expected = NullPointerException.class) + @Test public void deleteMetadata_formatIdNull() throws Exception { - String pid = "dou.2023.hashstore.1"; - fileHashStore.deleteMetadata(pid, null); + assertThrows(NullPointerException.class, () -> { + String pid = "dou.2023.hashstore.1"; + fileHashStore.deleteMetadata(pid, null); + }); } /** * Confirm that deleteMetadata throws exception when formatId is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void deleteMetadata_formatIdEmpty() throws Exception { - String pid = "dou.2023.hashstore.1"; - fileHashStore.deleteMetadata(pid, ""); + assertThrows(IllegalArgumentException.class, () -> { + String pid = "dou.2023.hashstore.1"; + fileHashStore.deleteMetadata(pid, ""); + }); } /** * Confirm that deleteMetadata throws exception when formatId is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void deleteMetadata_formatIdEmptySpaces() throws Exception { - String pid = "dou.2023.hashstore.1"; - fileHashStore.deleteMetadata(pid, " "); + assertThrows(IllegalArgumentException.class, () -> { + String pid = "dou.2023.hashstore.1"; + fileHashStore.deleteMetadata(pid, " "); + }); } /** @@ -1314,52 +1380,62 @@ public void getHexDigest() throws Exception { /** * Confirm getHexDigest throws exception when file is not found */ - @Test(expected = FileNotFoundException.class) + @Test public void getHexDigest_pidNotFound() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - fileHashStore.getHexDigest(pidFormatted, "SHA-256"); + assertThrows(FileNotFoundException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + fileHashStore.getHexDigest(pidFormatted, "SHA-256"); + }); } } /** * Confirm getHexDigest throws exception when file is not found */ - @Test(expected = NullPointerException.class) + @Test public void getHexDigest_pidNull() throws Exception { - fileHashStore.getHexDigest(null, "SHA-256"); + assertThrows(NullPointerException.class, () -> { + fileHashStore.getHexDigest(null, "SHA-256"); + }); } /** * Confirm getHexDigest throws exception when file is not found */ - @Test(expected = IllegalArgumentException.class) + @Test public void getHexDigest_pidEmpty() throws Exception { - fileHashStore.getHexDigest("", "SHA-256"); + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.getHexDigest("", "SHA-256"); + }); } /** * Confirm getHexDigest throws exception when file is not found */ - @Test(expected = IllegalArgumentException.class) + @Test public void getHexDigest_pidEmptySpaces() throws Exception { - fileHashStore.getHexDigest(" ", "SHA-256"); + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.getHexDigest(" ", "SHA-256"); + }); } /** * Confirm getHexDigest throws exception when unsupported algorithm supplied */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void getHexDigest_badAlgo() throws Exception { for (String pid : testData.pidList) { - // Store object first - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + assertThrows(NoSuchAlgorithmException.class, () -> { + // Store object first + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, 0); - fileHashStore.getHexDigest(pid, "BLAKE2S"); + fileHashStore.getHexDigest(pid, "BLAKE2S"); + }); } } } From 78a34b6b58e5aa3b25d34f20742bfa41432a6ae2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 13:45:58 -0700 Subject: [PATCH 069/553] Refactor 'FileHashStoreProtectedTest' test class to use JUnit5 syntax --- .../FileHashStoreProtectedTest.java | 401 ++++++++++-------- 1 file changed, 226 insertions(+), 175 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 1488cb1d..6670bbda 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -14,18 +14,14 @@ import javax.xml.bind.DatatypeConverter; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import org.dataone.hashstore.ObjectInfo; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.testdata.TestDataHarness; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.*; /** * Test class for FileHashStore protected members @@ -38,9 +34,9 @@ public class FileHashStoreProtectedTest { /** * Initialize each FileHashStore test with a new root temporary folder */ - @Before + @BeforeEach public void initializeFileHashStore() { - Path rootDirectory = tempFolder.getRoot().toPath().resolve("metacat"); + Path rootDirectory = tempFolder.resolve("metacat"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -68,7 +64,8 @@ public void initializeFileHashStore() { * Non-test method using to generate a temp file */ public File generateTemporaryFile() throws Exception { - Path directory = tempFolder.getRoot().toPath(); + Path directory = tempFolder.resolve("metacat"); + System.out.println(directory); // newFile return fileHashStore.generateTmpFile("testfile", directory); } @@ -76,8 +73,8 @@ public File generateTemporaryFile() throws Exception { /** * Temporary folder for tests to run in */ - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); + @TempDir + public Path tempFolder; /** * Check algorithm support for supported algorithm @@ -98,52 +95,58 @@ public void isValidAlgorithm_supported() { /** * Check algorithm support for unsupported algorithm */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void isValidAlgorithm_notSupported() throws NoSuchAlgorithmException { - try { - String sm3 = "SM3"; - boolean not_supported = fileHashStore.validateAlgorithm(sm3); - assertFalse(not_supported); + assertThrows(NoSuchAlgorithmException.class, () -> { + try { + String sm3 = "SM3"; + boolean not_supported = fileHashStore.validateAlgorithm(sm3); + assertFalse(not_supported); - } catch (NoSuchAlgorithmException nsae) { - throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage() - ); + } catch (NoSuchAlgorithmException nsae) { + throw new NoSuchAlgorithmException( + "NoSuchAlgorithmException encountered: " + nsae.getMessage() + ); - } + } + }); } /** * Check algorithm support for unsupported algorithm with lower cases */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void isValidAlgorithm_notSupportedLowerCase() throws NoSuchAlgorithmException { - try { - // Must match string to reduce complexity, no string formatting - String md2_lowercase = "md2"; - boolean lowercase_not_supported = fileHashStore.validateAlgorithm(md2_lowercase); - assertFalse(lowercase_not_supported); + assertThrows(NoSuchAlgorithmException.class, () -> { + try { + // Must match string to reduce complexity, no string formatting + String md2_lowercase = "md2"; + boolean lowercase_not_supported = fileHashStore.validateAlgorithm(md2_lowercase); + assertFalse(lowercase_not_supported); - } catch (NoSuchAlgorithmException nsae) { - throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage() - ); + } catch (NoSuchAlgorithmException nsae) { + throw new NoSuchAlgorithmException( + "NoSuchAlgorithmException encountered: " + nsae.getMessage() + ); - } + } + }); } /** * Check algorithm support for null algorithm value */ - @Test(expected = NullPointerException.class) + @Test public void isValidAlgorithm_algorithmNull() { - try { - fileHashStore.validateAlgorithm(null); + assertThrows(NullPointerException.class, () -> { + try { + fileHashStore.validateAlgorithm(null); - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); - } + } + }); } /** @@ -183,10 +186,13 @@ public void getPidHexDigest() throws Exception { /** * Check that getPidHexDigest throws NoSuchAlgorithmException */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void getPidHexDigest_badAlgorithm() throws Exception { for (String pid : testData.pidList) { - fileHashStore.getPidHexDigest(pid, "SM2"); + assertThrows(NoSuchAlgorithmException.class, () -> { + fileHashStore.getPidHexDigest(pid, "SM2"); + }); + } } @@ -301,67 +307,77 @@ public void putObject_additionalAlgo_correctChecksumValue() throws Exception { /** * Verify putObject throws exception when checksum provided does not match */ - @Test(expected = IllegalArgumentException.class) + @Test public void putObject_incorrectChecksumValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - String checksumIncorrect = "1c25df1c8ba1d2e57bb3fd4785878b85"; + String checksumIncorrect = "1c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", 0); + }); } /** * Verify putObject throws exception when checksum is empty and algorithm supported */ - @Test(expected = IllegalArgumentException.class) + @Test public void putObject_emptyChecksumValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, " ", "MD2", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, " ", "MD2", 0); + }); } /** * Verify putObject throws exception when checksum is null and algorithm supported */ - @Test(expected = NullPointerException.class) + @Test public void putObject_nullChecksumValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(NullPointerException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, "MD2", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, null, "MD2", 0); + }); } /** * Verify putObject throws exception when checksumAlgorithm is empty and checksum is supplied */ - @Test(expected = IllegalArgumentException.class) + @Test public void putObject_emptyChecksumAlgorithmValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", " ", 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, "abc", " ", 0); + }); } /** * Verify putObject throws exception when checksumAlgorithm is null and checksum supplied */ - @Test(expected = NullPointerException.class) + @Test public void putObject_nullChecksumAlgorithmValue() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", null, 0); + assertThrows(NullPointerException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, "abc", null, 0); + }); } @@ -388,100 +404,115 @@ public void putObject_objSizeCorrect() throws Exception { /** * Check that store object throws exception when incorrect file size provided */ - @Test(expected = IllegalArgumentException.class) + @Test public void putObject_objSizeIncorrect() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, 1000); - - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.putObject( + dataStream, pid, null, null, null, 1000 + ); + + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.getSize()); + }); } } /** * Verify putObject throws exception when storing a duplicate object */ - @Test(expected = PidObjectExistsException.class) + @Test public void putObject_duplicateObject() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(PidObjectExistsException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, null, null, 0); - // Try duplicate upload - InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStreamTwo, pid, null, null, null, 0); + // Try duplicate upload + InputStream dataStreamTwo = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStreamTwo, pid, null, null, null, 0); + }); } /** * Verify putObject throws exception when unsupported additional algorithm provided */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void putObject_invalidAlgorithm() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(NoSuchAlgorithmException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "SM2", null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, "SM2", null, null, 0); + }); } /** * Verify putObject throws exception when empty algorithm is supplied */ - @Test(expected = IllegalArgumentException.class) + @Test public void putObject_emptyAlgorithm() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, " ", null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, " ", null, null, 0); + }); } /** * Verify putObject throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void putObject_emptyPid() throws Exception { - // Get test file to "upload" - String pidEmpty = ""; - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(IllegalArgumentException.class, () -> { + // Get test file to "upload" + String pidEmpty = ""; + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pidEmpty, null, null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pidEmpty, null, null, null, 0); + }); } /** * Verify putObject throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void putObject_nullPid() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + assertThrows(NullPointerException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, null, "MD2", null, null, 0); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, null, "MD2", null, null, 0); + }); } /** * Verify putObject throws exception object is null */ - @Test(expected = NullPointerException.class) + @Test public void putObject_nullObject() throws Exception { - // Get test file to "upload" - String pid = "jtao.1700.1"; - - fileHashStore.putObject(null, pid, "MD2", null, null, 0); + assertThrows(NullPointerException.class, () -> { + // Get test file to "upload" + String pid = "jtao.1700.1"; + fileHashStore.putObject(null, pid, "MD2", null, null, 0); + }); } /** @@ -623,20 +654,24 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce /** * Check that exception is thrown when unsupported algorithm supplied */ - @Test(expected = NoSuchAlgorithmException.class) + @Test public void writeToTmpFileAndGenerateChecksums_invalidAlgo() throws Exception { for (String pid : testData.pidList) { - File newTmpFile = generateTemporaryFile(); - String pidFormatted = pid.replace("/", "_"); + assertThrows(NoSuchAlgorithmException.class, () -> { + File newTmpFile = generateTemporaryFile(); + String pidFormatted = pid.replace("/", "_"); - // Get test file - Path testDataFile = testData.getTestFile(pidFormatted); + // Get test file + Path testDataFile = testData.getTestFile(pidFormatted); - // Extra algo to calculate - MD2 - String addAlgo = "SM2"; + // Extra algo to calculate - MD2 + String addAlgo = "SM2"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, null); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.writeToTmpFileAndGenerateChecksums( + newTmpFile, dataStream, addAlgo, null + ); + }); } } @@ -646,7 +681,7 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() throws Exception { @Test public void testMove() throws Exception { File newTmpFile = generateTemporaryFile(); - String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; + String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; File targetFile = new File(targetString); fileHashStore.move(newTmpFile, targetFile, "object"); @@ -656,48 +691,56 @@ public void testMove() throws Exception { /** * Confirm that FileAlreadyExistsException is thrown when target already exists */ - @Test(expected = FileAlreadyExistsException.class) + @Test public void testMove_targetExists() throws Exception { - File newTmpFile = generateTemporaryFile(); - String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; - File targetFile = new File(targetString); - fileHashStore.move(newTmpFile, targetFile, "object"); + assertThrows(FileAlreadyExistsException.class, () -> { + File newTmpFile = generateTemporaryFile(); + String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; + File targetFile = new File(targetString); + fileHashStore.move(newTmpFile, targetFile, "object"); - File newTmpFileTwo = generateTemporaryFile(); - fileHashStore.move(newTmpFileTwo, targetFile, "object"); + File newTmpFileTwo = generateTemporaryFile(); + fileHashStore.move(newTmpFileTwo, targetFile, "object"); + }); } /** * Confirm that NullPointerException is thrown when entity is null */ - @Test(expected = NullPointerException.class) + @Test public void testMove_entityNull() throws Exception { - File newTmpFile = generateTemporaryFile(); - String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; - File targetFile = new File(targetString); - fileHashStore.move(newTmpFile, targetFile, null); + assertThrows(NullPointerException.class, () -> { + File newTmpFile = generateTemporaryFile(); + String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; + File targetFile = new File(targetString); + fileHashStore.move(newTmpFile, targetFile, null); + }); } /** * Confirm that FileAlreadyExistsException is thrown entity is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void testMove_entityEmpty() throws Exception { - File newTmpFile = generateTemporaryFile(); - String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; - File targetFile = new File(targetString); - fileHashStore.move(newTmpFile, targetFile, ""); + assertThrows(IllegalArgumentException.class, () -> { + File newTmpFile = generateTemporaryFile(); + String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; + File targetFile = new File(targetString); + fileHashStore.move(newTmpFile, targetFile, ""); + }); } /** * Confirm that FileAlreadyExistsException is thrown when entity is empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void testMove_entityEmptySpaces() throws Exception { - File newTmpFile = generateTemporaryFile(); - String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; - File targetFile = new File(targetString); - fileHashStore.move(newTmpFile, targetFile, " "); + assertThrows(IllegalArgumentException.class, () -> { + File newTmpFile = generateTemporaryFile(); + String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; + File targetFile = new File(targetString); + fileHashStore.move(newTmpFile, targetFile, " "); + }); } /** @@ -729,61 +772,69 @@ public void putMetadata() throws Exception { /** * Test putMetadata throws exception when metadata is null */ - @Test(expected = NullPointerException.class) + @Test public void putMetadata_metadataNull() throws Exception { for (String pid : testData.pidList) { - fileHashStore.putMetadata(null, pid, null); + assertThrows(NullPointerException.class, () -> { + fileHashStore.putMetadata(null, pid, null); + }); } } /** * Test putMetadata throws exception when pid is null */ - @Test(expected = NullPointerException.class) + @Test public void putMetadata_pidNull() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); + assertThrows(NullPointerException.class, () -> { + String pidFormatted = pid.replace("/", "_"); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.putMetadata(metadataStream, null, null); + fileHashStore.putMetadata(metadataStream, null, null); + }); } } /** * Test putMetadata throws exception when pid is empty */ - @Test(expected = IllegalArgumentException.class) + @Test public void putMetadata_pidEmpty() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.putMetadata(metadataStream, "", null); + fileHashStore.putMetadata(metadataStream, "", null); + }); } } /** * Test putMetadata throws exception when pid is empty with spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void putMetadata_pidEmptySpaces() throws Exception { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.putMetadata(metadataStream, " ", null); + fileHashStore.putMetadata(metadataStream, " ", null); + }); } } From ecd44dd9172ba5f65b698e57aaec77c0ffd44ddd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 14:11:59 -0700 Subject: [PATCH 070/553] Refactor 'FileHashStorePublicTest' test class to use JUnit5 syntax --- .../FileHashStorePublicTest.java | 397 ++++++++++-------- 1 file changed, 212 insertions(+), 185 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 0b312204..9b9f6750 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -9,15 +9,12 @@ import java.util.HashMap; import java.util.Properties; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import org.dataone.hashstore.testdata.TestDataHarness; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; /** * Test class for FileHashStore constructor @@ -34,9 +31,9 @@ public class FileHashStorePublicTest { /** * Initialize FileHashStore */ - @BeforeClass - public static void initializeFileHashStore() { - Path root = tempFolder.getRoot().toPath(); + @BeforeEach + public void initializeFileHashStore() { + Path root = tempFolder; rootDirectory = root.resolve("metacat"); objStringFull = rootDirectory.resolve("objects"); objTmpStringFull = rootDirectory.resolve("objects/tmp"); @@ -68,164 +65,184 @@ public static void initializeFileHashStore() { /** * Temporary folder for tests to run in */ - @ClassRule - public static TemporaryFolder tempFolder = new TemporaryFolder(); + @TempDir + public Path tempFolder; /** * Test constructor invalid depth value */ - @Test(expected = NullPointerException.class) + @Test public void constructor_nullProperties() throws Exception { - new FileHashStore(null); + assertThrows(NullPointerException.class, () -> { + new FileHashStore(null); + }); } /** * Test constructor null store path */ - @Test(expected = NullPointerException.class) + @Test public void constructor_nullStorePath() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", null); - storeProperties.setProperty("storeDepth", "0"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(NullPointerException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", null); + storeProperties.setProperty("storeDepth", "0"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test constructor invalid depth property value */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_illegalDepthArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "0"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "0"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test constructor invalid width property value */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_illegalWidthArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "0"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "0"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test constructor unsupported algorithm property value */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_illegalAlgorithmArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "MD5"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "MD5"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test constructor empty algorithm property value throws exception */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_emptyAlgorithmArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", ""); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", ""); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test constructor algorithm property value with empty spaces throws exception */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_emptySpacesAlgorithmArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", " "); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", " "); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test constructor empty metadata namespace property value throws exception */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_emptyMetadataNameSpaceArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "MD5"); - storeProperties.setProperty("storeMetadataNamespace", ""); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "MD5"); + storeProperties.setProperty("storeMetadataNamespace", ""); + + new FileHashStore(storeProperties); + }); } /** * Test constructor metadata namespace property value with empty spaces */ - @Test(expected = IllegalArgumentException.class) + @Test public void constructor_emptySpacesMetadataNameSpaceArg() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "MD5"); - storeProperties.setProperty("storeMetadataNamespace", " "); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "MD5"); + storeProperties.setProperty("storeMetadataNamespace", " "); + + new FileHashStore(storeProperties); + }); } /** * Confirm that exception is thrown when storeDirectory property value is null */ - @Test(expected = NullPointerException.class) + @Test public void initDefaultStore_directoryNull() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", null); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(NullPointerException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", null); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** @@ -309,110 +326,120 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { * Test existing configuration file will raise exception when algorithm is different when * instantiating FileHashStore */ - @Test(expected = IllegalArgumentException.class) + @Test public void testExistingHashStoreConfiguration_diffAlgorithm() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "MD5"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "MD5"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test existing configuration file will raise exception when depth is different when * instantiating FileHashStore */ - @Test(expected = IllegalArgumentException.class) + @Test public void testExistingHashStoreConfiguration_diffDepth() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "2"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "2"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test existing configuration file will raise exception when width is different when * instantiating FileHashStore */ - @Test(expected = IllegalArgumentException.class) + @Test public void testExistingHashStoreConfiguration_diffWidth() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "1"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "1"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Test existing configuration file will raise exception when metadata formatId is different * when instantiating FileHashStore */ - @Test(expected = IllegalArgumentException.class) + @Test public void testExistingHashStoreConfiguration_diffMetadataNamespace() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.test.org/service/types/v2.0" - ); - - new FileHashStore(storeProperties); + assertThrows(IllegalArgumentException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.test.org/service/types/v2.0" + ); + + new FileHashStore(storeProperties); + }); } /** * Check that exception is raised when HashStore present but missing configuration file * 'hashstore.yaml' */ - @Test(expected = IllegalStateException.class) + @Test public void testExistingHashStoreConfiguration_missingYaml() throws Exception { - // Create separate store - Path newStoreDirectory = rootDirectory.resolve("test"); - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", newStoreDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - FileHashStore secondHashStore = new FileHashStore(storeProperties); - - // Confirm config present - Path newStoreHashStoreYaml = newStoreDirectory.resolve("hashstore.yaml"); - assertTrue(Files.exists(newStoreHashStoreYaml)); - - // Store objects - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - secondHashStore.storeObject(dataStream, pid, null, null, null, 0); - } - - // Delete configuration - Files.delete(newStoreHashStoreYaml); - - // Instantiate second HashStore - new FileHashStore(storeProperties); + assertThrows(IllegalStateException.class, () -> { + // Create separate store + Path newStoreDirectory = rootDirectory.resolve("test"); + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", newStoreDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + FileHashStore secondHashStore = new FileHashStore(storeProperties); + + // Confirm config present + Path newStoreHashStoreYaml = newStoreDirectory.resolve("hashstore.yaml"); + assertTrue(Files.exists(newStoreHashStoreYaml)); + + // Store objects + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + secondHashStore.storeObject(dataStream, pid, null, null, null, 0); + } + + // Delete configuration + Files.delete(newStoreHashStoreYaml); + + // Instantiate second HashStore + new FileHashStore(storeProperties); + }); } } From d1e0aaa130c066249c7dd555f4b2e0653d82ca84 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 14:14:33 -0700 Subject: [PATCH 071/553] Refactor 'ClientTest' test class to use JUnit5 syntax --- .../org/dataone/hashstore/ClientTest.java | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/ClientTest.java b/src/test/java/org/dataone/hashstore/ClientTest.java index 23818615..9f30f900 100644 --- a/src/test/java/org/dataone/hashstore/ClientTest.java +++ b/src/test/java/org/dataone/hashstore/ClientTest.java @@ -1,10 +1,5 @@ package org.dataone.hashstore; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.io.PrintStream; @@ -15,12 +10,12 @@ import java.util.List; import java.util.Properties; +import static org.junit.jupiter.api.Assertions.*; import org.dataone.hashstore.testdata.TestDataHarness; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class ClientTest { private static HashStore hashStore; @@ -30,13 +25,13 @@ public class ClientTest { /** * Temporary folder for tests to run in */ - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); + @TempDir + public Path tempFolder; - @Before + @BeforeEach public void getHashStore() { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - Path rootDirectory = tempFolder.getRoot().toPath().resolve("metacat"); + Path rootDirectory = tempFolder.resolve("metacat"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -118,7 +113,7 @@ public Path getObjectAbsPath(String id, String objType) { public void client_createHashStore() throws Exception { String optCreateHashstore = "-chs"; String optStore = "-store"; - String optStorePath = tempFolder.getRoot() + "/metacat"; + String optStorePath = tempFolder + "/metacat"; String optStoreDepth = "-dp"; String optStoreDepthValue = "3"; String optStoreWidth = "-wp"; From 31a74b99017af948b1188b6e8d2b36b5b10073ec Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 14:15:46 -0700 Subject: [PATCH 072/553] Refactor 'ObjectInfoTest' test class to use JUnit5 syntax --- .../java/org/dataone/hashstore/ObjectInfoTest.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/ObjectInfoTest.java b/src/test/java/org/dataone/hashstore/ObjectInfoTest.java index 40cdd0f6..35fb4957 100644 --- a/src/test/java/org/dataone/hashstore/ObjectInfoTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectInfoTest.java @@ -3,11 +3,10 @@ import java.util.HashMap; import java.util.Map; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeEach; /** * Test class for ObjectInfo @@ -20,8 +19,8 @@ public class ObjectInfoTest { /** * Initialize ObjectInfo variables for test efficiency purposes */ - @BeforeClass - public static void initializeInstanceVariables() { + @BeforeEach + public void initializeInstanceVariables() { id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; size = 1999999; hexDigests = new HashMap<>(); From eba2c4ca46e527beb8148908ac96d36a09e218d7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 14:18:05 -0700 Subject: [PATCH 073/553] Refactor 'HashStoreTest' test class to use JUnit5 syntax --- .../org/dataone/hashstore/HashStoreTest.java | 80 ++++++++++--------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 87487d9a..557cc214 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -1,22 +1,18 @@ package org.dataone.hashstore; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.Properties; +import static org.junit.jupiter.api.Assertions.*; + import org.dataone.hashstore.exceptions.HashStoreFactoryException; import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.testdata.TestDataHarness; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; /** * Test class for HashStoreFactory @@ -25,10 +21,10 @@ public class HashStoreTest { private static HashStore hashStore; private static final TestDataHarness testData = new TestDataHarness(); - @Before + @BeforeEach public void getHashStore() { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - Path rootDirectory = tempFolder.getRoot().toPath().resolve("metacat"); + Path rootDirectory = tempFolder.resolve("metacat"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -52,8 +48,9 @@ public void getHashStore() { /** * Temporary folder for tests to run in */ - @Rule - public TemporaryFolder tempFolder = new TemporaryFolder(); + @TempDir + public Path tempFolder; + /** * Check that mystore is an instance of "filehashstore" @@ -67,46 +64,53 @@ public void isHashStore() { /** * Check that getHashStore throws exception when classPackage is null */ - @Test(expected = HashStoreFactoryException.class) + @Test public void hashStore_classPackageNull() throws Exception { - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", "/test"); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - hashStore = HashStoreFactory.getHashStore(null, storeProperties); + assertThrows(HashStoreFactoryException.class, () -> { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", "/test"); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + hashStore = HashStoreFactory.getHashStore(null, storeProperties); + }); } /** * Check that getHashStore throws exception when classPackage is not found */ - @Test(expected = HashStoreFactoryException.class) + @Test public void hashStore_classPackageNotFound() throws Exception { - String classPackage = "org.dataone.hashstore.filehashstore.AnotherHashStore"; + assertThrows(HashStoreFactoryException.class, () -> { + String classPackage = "org.dataone.hashstore.filehashstore.AnotherHashStore"; + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", "/test"); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", "/test"); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + }); } /** * Check that getHashStore throws exception when storeProperties is null */ - @Test(expected = HashStoreFactoryException.class) + @Test public void hashStore_nullStoreProperties() throws Exception { - String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - hashStore = HashStoreFactory.getHashStore(classPackage, null); + assertThrows(HashStoreFactoryException.class, () -> { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + hashStore = HashStoreFactory.getHashStore(classPackage, null); + }); } /** From fc0dfe99b6ecfc9cb108dfcf0272a326fd85881f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 14:41:45 -0700 Subject: [PATCH 074/553] Improve clarity in HashStore interface documentation and add missing javadocs --- .../java/org/dataone/hashstore/HashStore.java | 63 ++++++++++++------- .../filehashstore/FileHashStore.java | 7 ++- 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 20738895..0693d1b3 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -16,12 +16,12 @@ */ public interface HashStore { /** - * The `storeObject` method is responsible for the atomic storage of objects to HashStore - * using a given InputStream and a persistent identifier (pid). Upon successful storage, the - * method returns an 'ObjectInfo' object containing the object's file information, such - * as the id, file size, and hex digest map of algorithms and hex digests/checksums. An - * object is stored once and only once - and `storeObject` also enforces this rule by - * synchronizing multiple calls and rejecting calls to store duplicate objects. + * Atomically stores objects to HashStore using a given InputStream and a persistent + * identifier (pid). Upon successful storage, the method returns an 'ObjectInfo' object + * containing the object's file information, such as the id, file size, and hex digest map + * of algorithms and hex digests/checksums. An object is stored once and only once - and + * `storeObject` also enforces this rule by synchronizing multiple calls and rejecting calls + * to store duplicate objects. * * The file's id is determined by calculating the SHA-256 hex digest of the provided pid, * which is also used as the permanent address of the file. The file's identifier is then @@ -59,23 +59,32 @@ ObjectInfo storeObject( String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + /** + * @see #storeObject(InputStream, String, String, String, String, long) + */ ObjectInfo storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + /** + * @see #storeObject(InputStream, String, String, String, String, long) + */ ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + /** + * @see #storeObject(InputStream, String, String, String, String, long) + */ ObjectInfo storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; /** - * The `storeMetadata` method is responsible for adding/updating metadata (ex. `sysmeta`) to - * the HashStore by using a given InputStream, a persistent identifier (`pid`) and metadata - * format (`formatId`). The permanent address of the stored metadata document is determined - * by calculating the SHA-256 hex digest of the provided `pid` + `formatId`. + * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a + * persistent identifier (`pid`) and metadata format (`formatId`). The permanent address of + * the stored metadata document is determined by calculating the SHA-256 hex digest of the + * provided `pid` + `formatId`. * * Note, multiple calls to store the same metadata content will all be accepted, but is not * guaranteed to execute sequentially. @@ -96,13 +105,15 @@ String storeMetadata(InputStream metadata, String pid, String formatId) throws I IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException; + /** + * @see #storeMetadata(InputStream, String, String) + */ String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException; /** - * The `retrieveObject` method retrieves an object from HashStore using a given persistent - * identifier (pid). + * Returns an InputStream to an object from HashStore using a given persistent identifier. * * @param pid Authority-based identifier * @return Object InputStream @@ -116,8 +127,8 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** - * The 'retrieveMetadata' method retrieves the metadata content of a given pid and metadata - * namespace from HashStore. + * Returns an InputStream to the metadata content of a given pid and metadata namespace from + * HashStore. * * @param pid Authority-based identifier * @param formatId Metadata namespace/format @@ -128,11 +139,12 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not * supported */ - InputStream retrieveMetadata(String pid, String formatId) throws Exception; + InputStream retrieveMetadata(String pid, String formatId) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; /** - * The 'deleteObject' method deletes an object (and its empty subdirectories) permanently - * from HashStore using a given persistent identifier. + * Deletes an object (and its empty subdirectories) permanently from HashStore using a given + * persistent identifier. * * @param pid Authority-based identifier * @throws IllegalArgumentException When pid is null or empty @@ -141,11 +153,12 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - void deleteObject(String pid) throws Exception; + void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, + IOException, NoSuchAlgorithmException; /** - * The 'deleteMetadata' method deletes a metadata document (ex. `sysmeta`) permanently from - * HashStore using a given persistent identifier and its respective metadata namespace. + * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given + * persistent identifier and its respective metadata namespace. * * @param pid Authority-based identifier * @param formatId Metadata namespace/format @@ -155,11 +168,12 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - void deleteMetadata(String pid, String formatId) throws Exception; + void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; /** - * The 'getHexDigest' method calculates the hex digest of an object that exists in HashStore - * using a given persistent identifier and hash algorithm. + * Calculates the hex digest of an object that exists in HashStore using a given persistent + * identifier and hash algorithm. * * @param pid Authority-based identifier * @param algorithm Algorithm of desired hex digest @@ -170,5 +184,6 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - String getHexDigest(String pid, String algorithm) throws Exception; + String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index dd8e3044..bd21e3fb 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -734,7 +734,9 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, } @Override - public InputStream retrieveMetadata(String pid, String formatId) throws Exception { + public InputStream retrieveMetadata(String pid, String formatId) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid + " with formatId: " + formatId @@ -778,7 +780,8 @@ public InputStream retrieveMetadata(String pid, String formatId) throws Exceptio /** * Overload method for retrieveMetadata with default metadata namespace */ - public InputStream retrieveMetadata(String pid) throws Exception { + public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid + " with default metadata namespace: " + METADATA_NAMESPACE From 0384dd302d84b66d0224adcff95e73e6effe576c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 15:05:46 -0700 Subject: [PATCH 075/553] Update 'Client' class with missing javadocs and added additional print statement to improve clarity for 'retrieveObject' --- src/main/java/org/dataone/hashstore/Client.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2bfcf970..2bb7746d 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -34,10 +34,21 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +/** + * HashStore's 'Client' class is a command line interface that allows a developer to create a new + * HashStore or interact directly with an existing HashStore. See 'README.md' for usage examples. + */ public class Client { private static HashStore hashStore; private static Path storePath; + /** + * Entry point to the HashStore Client interface. + * + * @param args Command line arguments + * @throws Exception General exception class to catch all exceptions. See the HashStore + * interface for details. + */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.out.println("HashStoreClient - No arguments provided. Use flag '-h' for help."); @@ -186,6 +197,8 @@ public static void main(String[] args) throws Exception { String objPreview = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); objStream.close(); System.out.println(objPreview); + String retrieveObjectMsg = "...\n<-- Truncated for Display Purposes -->"; + System.out.println(retrieveObjectMsg); } else if (cmd.hasOption("retrievemetadata")) { String pid = cmd.getOptionValue("pid"); From ab5292357b585983c44ba973de770c7ed4cf84ee Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 15:25:44 -0700 Subject: [PATCH 076/553] Clean up 'FileHashStoreUtility' class for missing and inaccurated javadocs, revised exceptions and updated affected test classes --- .../filehashstore/FileHashStoreUtility.java | 25 ++++++++++++------- .../FileHashStoreInterfaceTest.java | 24 +++++++++--------- .../FileHashStoreProtectedTest.java | 18 ++++++------- .../FileHashStorePublicTest.java | 2 +- 4 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 8c0da52e..7efdea6d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -22,12 +22,14 @@ public class FileHashStoreUtility { * @param object Object to check * @param argument Value that is being checked * @param method Calling method or class + * @throws IllegalArgumentException If the object is null */ - public static void ensureNotNull(Object object, String argument, String method) { + public static void ensureNotNull(Object object, String argument, String method) + throws IllegalArgumentException { if (object == null) { - String errMsg = "FileHashStoreUtility.isStringNullOrEmpty - Calling Method: " + method + String errMsg = "FileHashStoreUtility.ensureNotNull - Calling Method: " + method + "(): " + argument + " cannot be null."; - throw new NullPointerException(errMsg); + throw new IllegalArgumentException(errMsg); } } @@ -91,10 +93,13 @@ public static boolean isDirectoryEmpty(Path directory) throws IOException { * @param string String to check * @param argument Value that is being checked * @param method Calling method + * @throws IllegalArgumentException If the string is empty or null */ - public static void checkForEmptyString(String string, String argument, String method) { + public static void checkForEmptyString(String string, String argument, String method) + throws IllegalArgumentException { + ensureNotNull(string, "string", "checkForEmptyString"); if (string.trim().isEmpty()) { - String errMsg = "FileHashStoreUtility.isStringNullOrEmpty - Calling Method: " + method + String errMsg = "FileHashStoreUtility.checkForEmptyString - Calling Method: " + method + "(): " + argument + " cannot be empty."; throw new IllegalArgumentException(errMsg); } @@ -105,11 +110,13 @@ public static void checkForEmptyString(String string, String argument, String me * * @param object Object to check * @param method Calling method + * @throws IllegalArgumentException If longInt is less than 0 */ - public static void checkNotNegative(long object, String method) { - if (object < 0) { - String errMsg = "FileHashStoreUtility.isObjectGreaterThanZero - Calling Method: " - + method + "(): objSize cannot be less than 0."; + public static void checkNotNegative(long longInt, String method) + throws IllegalArgumentException { + if (longInt < 0) { + String errMsg = "FileHashStoreUtility.checkNotNegative - Calling Method: " + method + + "(): objSize cannot be less than 0."; throw new IllegalArgumentException(errMsg); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e7c91aaa..649117ee 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -158,7 +158,7 @@ public void storeObject_hexDigests() throws Exception { */ @Test public void storeObject_null() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String pid = "j.tao.1700.1"; fileHashStore.storeObject(null, pid, null, null, null, 0); }); @@ -170,7 +170,7 @@ public void storeObject_null() throws Exception { @Test public void storeObject_nullPid() throws Exception { for (String pid : testData.pidList) { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -332,7 +332,7 @@ public void storeObject_emptyChecksumValue() throws Exception { */ @Test public void storeObject_nullChecksumValue() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { // Get single test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); @@ -729,7 +729,7 @@ public void storeMetadata_fileSize() throws Exception { @Test public void storeMetadata_metadataNull() throws Exception { for (String pid : testData.pidList) { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { fileHashStore.storeMetadata(null, pid, null); }); } @@ -741,7 +741,7 @@ public void storeMetadata_metadataNull() throws Exception { @Test public void storeMetadata_pidNull() throws Exception { for (String pid : testData.pidList) { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); // Get test metadata file @@ -891,7 +891,7 @@ public void retrieveObject() throws Exception { */ @Test public void retrieveObject_pidNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveObject(null); pidInputStream.close(); }); @@ -1023,7 +1023,7 @@ public void retrieveMetadata_overload() throws Exception { */ @Test public void retrieveMetadata_pidNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); InputStream pidInputStream = fileHashStore.retrieveMetadata(null, storeFormatId); pidInputStream.close(); @@ -1059,7 +1059,7 @@ public void retrieveMetadata_pidEmptySpaces() throws Exception { */ @Test public void retrieveMetadata_formatNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", null); pidInputStream.close(); }); @@ -1197,7 +1197,7 @@ public void deleteObject_pidNotFound() throws Exception { */ @Test public void deleteObject_pidNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { fileHashStore.deleteObject(null); }); } @@ -1294,7 +1294,7 @@ public void deleteMetadata_pidNotFound() throws Exception { */ @Test public void deleteMetadata_pidNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata(null, formatId); }); @@ -1327,7 +1327,7 @@ public void deleteMetadata_pidEmptySpaces() throws Exception { */ @Test public void deleteMetadata_formatIdNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String pid = "dou.2023.hashstore.1"; fileHashStore.deleteMetadata(pid, null); }); @@ -1395,7 +1395,7 @@ public void getHexDigest_pidNotFound() throws Exception { */ @Test public void getHexDigest_pidNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { fileHashStore.getHexDigest(null, "SHA-256"); }); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 6670bbda..1102fd6e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -134,11 +134,11 @@ public void isValidAlgorithm_notSupportedLowerCase() throws NoSuchAlgorithmExcep } /** - * Check algorithm support for null algorithm value + * Check algorithm support for null algorithm value throws exception */ @Test public void isValidAlgorithm_algorithmNull() { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { try { fileHashStore.validateAlgorithm(null); @@ -341,7 +341,7 @@ public void putObject_emptyChecksumValue() throws Exception { */ @Test public void putObject_nullChecksumValue() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); @@ -371,7 +371,7 @@ public void putObject_emptyChecksumAlgorithmValue() throws Exception { */ @Test public void putObject_nullChecksumAlgorithmValue() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); @@ -493,7 +493,7 @@ public void putObject_emptyPid() throws Exception { */ @Test public void putObject_nullPid() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); @@ -508,7 +508,7 @@ public void putObject_nullPid() throws Exception { */ @Test public void putObject_nullObject() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; fileHashStore.putObject(null, pid, "MD2", null, null, 0); @@ -709,7 +709,7 @@ public void testMove_targetExists() throws Exception { */ @Test public void testMove_entityNull() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; File targetFile = new File(targetString); @@ -775,7 +775,7 @@ public void putMetadata() throws Exception { @Test public void putMetadata_metadataNull() throws Exception { for (String pid : testData.pidList) { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { fileHashStore.putMetadata(null, pid, null); }); } @@ -787,7 +787,7 @@ public void putMetadata_metadataNull() throws Exception { @Test public void putMetadata_pidNull() throws Exception { for (String pid : testData.pidList) { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); // Get test metadata file diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 9b9f6750..4d5377b7 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -73,7 +73,7 @@ public void initializeFileHashStore() { */ @Test public void constructor_nullProperties() throws Exception { - assertThrows(NullPointerException.class, () -> { + assertThrows(IllegalArgumentException.class, () -> { new FileHashStore(null); }); } From 12a622657edb9574aafb94fd7ad48d22fc001222 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 25 Sep 2023 16:03:42 -0700 Subject: [PATCH 077/553] Update 'Client' javadoc descriptions and add comments --- .../java/org/dataone/hashstore/Client.java | 83 +++++++++++++------ 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2bb7746d..2416e005 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -65,7 +65,7 @@ public static void main(String[] args) throws Exception { // First check if user is looking for help if (cmd.hasOption("h")) { - formatter.printHelp("CommandLineApp", options); + formatter.printHelp("HashStore Client", options); return; } @@ -256,55 +256,82 @@ private static Options addHashStoreClientOptions() { // Mandatory option options.addOption("store", "storepath", true, "Path to HashStore."); // HashStore creation options - options.addOption("chs", "createhashstore", false, "Create a HashStore."); - options.addOption("dp", "storedepth", true, "Depth of HashStore."); - options.addOption("wp", "storewidth", true, "Width of HashStore."); - options.addOption("ap", "storealgo", true, "Algorithm of HashStore."); - options.addOption("nsp", "storenamespace", true, "Default metadata namespace"); + options.addOption("chs", "createhashstore", false, "Flag to create a HashStore."); + options.addOption("dp", "storedepth", true, "Depth of HashStore to create."); + options.addOption("wp", "storewidth", true, "Width of HashStore to create."); + options.addOption( + "ap", "storealgo", true, "Algorithm used for calculating file addresses in a HashStore." + ); + options.addOption( + "nsp", "storenamespace", true, "Default metadata namespace in a HashStore." + ); // Public API options options.addOption( "getchecksum", "client_getchecksum", false, - "Get the hex digest of a data object in a HashStore" + "Flag to get the hex digest of a data object in a HashStore." ); options.addOption( - "storeobject", "client_storeobject", false, "Store object to a HashStore." + "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore." ); options.addOption( - "storemetadata", "client_storemetadata", false, "Store metadata to a HashStore" + "storemetadata", "client_storemetadata", false, "Flag to store metadata to a HashStore" ); options.addOption( - "retrieveobject", "client_retrieveobject", false, "Retrieve an object from a HashStore." + "retrieveobject", "client_retrieveobject", false, + "Flag to retrieve objs from a HashStore." ); options.addOption( "retrievemetadata", "client_retrievemetadata", false, - "Retrieve a metadata obj from a HashStore." + "Flag to retrieve metadata objs from a HashStore." ); options.addOption( - "deleteobject", "client_deleteobject", false, "Delete an object from a HashStore." + "deleteobject", "client_deleteobject", false, "Flag to delete objs from a HashStore." ); options.addOption( "deletemetadata", "client_deletemetadata", false, - "Delete a metadata obj from a HashStore." + "Flag to delete metadata objs from a HashStore." + ); + options.addOption("pid", "pidguid", true, "PID or GUID of object/metadata."); + options.addOption("path", "filepath", true, "Path to object/metadata."); + options.addOption( + "algo", "objectalgo", true, + "Algorithm to use when calling '-getchecksum' or '-storeobject' flag." ); - options.addOption("pid", "pidguid", true, "PID or GUID of object."); - options.addOption("path", "filepath", true, "Path to object."); - options.addOption("algo", "objectalgo", true, "Algorithm to use in calculations."); - options.addOption("checksum", "obj_checksum", true, "Checksum of object."); + options.addOption("checksum", "obj_checksum", true, "Checksum of object to store."); options.addOption( "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied." ); - options.addOption("size", "obj_size", true, "Size of object"); - options.addOption("format_id", "metadata_format", true, "Metadata format_id/namespace"); - // knbvm (test.arcticdata.io) options - options.addOption("knbvm", "knbvmtestadc", false, "Specify testing with knbvm."); - options.addOption("nobj", "numberofobj", false, "Number of objects to work with."); - options.addOption("sdir", "storedirectory", true, "Location of objects to convert."); - options.addOption("stype", "storetype", true, "Type of store 'objects' or 'metadata'"); - options.addOption("sts", "storetohs", false, "Flag to store objs to a HashStore"); + options.addOption("size", "obj_size", true, "Size of object to store/validate."); + options.addOption( + "format_id", "metadata_format", true, + "Format_id/namespace of metadata to store, retrieve or delete." + ); + // knbvm (test.arcticdata.io) options. Note: In order to test with knbvm, you must manually create + // a `pgdb.yaml` file with the respective JDBC values to access a Metacat db. + options.addOption( + "knbvm", "knbvmtestadc", false, "(knbvm) Flag to specify testing with knbvm." + ); + options.addOption( + "nobj", "numberofobj", false, + "(knbvm) Option to specify number of objects to retrieve from a Metacat db." + ); + options.addOption( + "sdir", "storedirectory", true, + "(knbvm) Option to specify the directory of objects to convert." + ); + options.addOption( + "stype", "storetype", true, "(knbvm) Option to specify 'objects' or 'metadata'" + ); + options.addOption( + "sts", "storetohs", false, "(knbvm) Test flag to store objs to a HashStore" + ); + options.addOption( + "rav", "retandval", false, + "(knbvm) Test flag to retrieve and validate objs from a HashStore." + ); options.addOption( - "rav", "retandval", false, "Retrieve and validate objs from a HashStore." + "dfs", "delfromhs", false, "(knbvm) Test flag to delete objs from a HashStore" ); - options.addOption("dfs", "delfromhs", false, "Delete objs from a HashStore."); return options; } @@ -415,6 +442,8 @@ private static void testWithKnbvm( String actionFlag, String objType, String originDir, String numObjects ) throws IOException { // Load metacat db yaml + // Note: In order to test with knbvm, you must manually create a `pgdb.yaml` file with the + // respective JDBC values to access a Metacat db. System.out.println("Loading metacat db yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); File pgdbYamlFile = pgdbYaml.toFile(); From 7363f1b56103fafec3d5022dc36ba333c9a363d1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 26 Sep 2023 13:51:16 -0700 Subject: [PATCH 078/553] Explicitly declare JUnit5 static Assertion imports instead of using * --- src/test/java/org/dataone/hashstore/ClientTest.java | 7 +++++-- src/test/java/org/dataone/hashstore/HashStoreTest.java | 8 ++++++-- .../java/org/dataone/hashstore/ObjectInfoTest.java | 5 +++-- .../filehashstore/FileHashStoreInterfaceTest.java | 10 +++++++++- .../filehashstore/FileHashStoreProtectedTest.java | 8 ++++++-- .../filehashstore/FileHashStorePublicTest.java | 7 +++++-- 6 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/ClientTest.java b/src/test/java/org/dataone/hashstore/ClientTest.java index 9f30f900..633312d8 100644 --- a/src/test/java/org/dataone/hashstore/ClientTest.java +++ b/src/test/java/org/dataone/hashstore/ClientTest.java @@ -1,5 +1,10 @@ package org.dataone.hashstore; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.io.PrintStream; @@ -10,8 +15,6 @@ import java.util.List; import java.util.Properties; -import static org.junit.jupiter.api.Assertions.*; - import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 557cc214..603ebd8c 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -1,12 +1,16 @@ package org.dataone.hashstore; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.Properties; -import static org.junit.jupiter.api.Assertions.*; - import org.dataone.hashstore.exceptions.HashStoreFactoryException; import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.testdata.TestDataHarness; diff --git a/src/test/java/org/dataone/hashstore/ObjectInfoTest.java b/src/test/java/org/dataone/hashstore/ObjectInfoTest.java index 35fb4957..f3f44c88 100644 --- a/src/test/java/org/dataone/hashstore/ObjectInfoTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectInfoTest.java @@ -1,10 +1,11 @@ package org.dataone.hashstore; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + import java.util.HashMap; import java.util.Map; -import static org.junit.jupiter.api.Assertions.*; - import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeEach; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 649117ee..8b8f0063 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1,5 +1,12 @@ package org.dataone.hashstore.filehashstore; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -28,7 +35,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import static org.junit.jupiter.api.Assertions.*; +// import static org.junit.jupiter.api.Assertions.*; + /** * Test class for FileHashStore HashStoreInterface override methods diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 1102fd6e..63450a49 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1,5 +1,11 @@ package org.dataone.hashstore.filehashstore; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -21,8 +27,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import static org.junit.jupiter.api.Assertions.*; - /** * Test class for FileHashStore protected members */ diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 4d5377b7..99c737f7 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -1,5 +1,10 @@ package org.dataone.hashstore.filehashstore; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -9,8 +14,6 @@ import java.util.HashMap; import java.util.Properties; -import static org.junit.jupiter.api.Assertions.*; - import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; From 563f36fa337f61aa9afe08b184c9646f9945b8e8 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 26 Sep 2023 13:52:10 -0700 Subject: [PATCH 079/553] Fix typos in README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e7fe736b..d59a5ef4 100644 --- a/README.md +++ b/README.md @@ -32,11 +32,11 @@ We also maintain a parallel [Python-based version of HashStore](https://github.c $ mvn clean package -Dmaven.test.skip=true # Get help -$ java -cp ./target/.../hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -h +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -h # Step 2: ## Create a HashStore (long option) -$ java -cp ./target/.../hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 ## Create a HashStore (short option) $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 From 2bd5fea7aac3bdac02fae2d08deb61d16db2503b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 26 Sep 2023 14:36:19 -0700 Subject: [PATCH 080/553] Remove commented out redundant code --- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 8b8f0063..9b2f8507 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -35,8 +35,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -// import static org.junit.jupiter.api.Assertions.*; - /** * Test class for FileHashStore HashStoreInterface override methods From 4f05df6fc1f3877f4428e52054fb935d92c6fc5a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 26 Sep 2023 14:58:33 -0700 Subject: [PATCH 081/553] Add JVM shut-down hook '.deleteOnExit()' to tmpFile generated in 'generateTmpFile' method --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bd21e3fb..93eaa331 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1277,6 +1277,7 @@ protected File generateTmpFile(String prefix, Path directory) throws IOException logFileHashStore.trace( "FileHashStore.generateTmpFile - tmpFile generated: " + newFile.getAbsolutePath() ); + newFile.deleteOnExit(); return newFile; } catch (IOException ioe) { From 3a07d2fde843dcd3fe873329fb90a34a971f62c2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 28 Sep 2023 08:38:29 -0700 Subject: [PATCH 082/553] Bump up 'maven-surefire-plugin' version --- pom.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 29193388..796c5bae 100644 --- a/pom.xml +++ b/pom.xml @@ -145,7 +145,12 @@ maven-surefire-plugin - 2.22.1 + 3.1.2 + + maven-install-plugin From 5792a23176a94ad0907581480392d347dcc546e3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 29 Sep 2023 09:28:41 -0700 Subject: [PATCH 083/553] Add try-catch block to delete incomplete tmpFile when 'storeObject' is interrupted plus new junit test --- .../filehashstore/FileHashStore.java | 23 ++++++++-- .../FileHashStoreInterfaceTest.java | 46 +++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 93eaa331..b3eabd42 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1025,9 +1025,26 @@ protected ObjectInfo putObject( // Generate tmp file and write to it logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); File tmpFile = generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); - Map hexDigests = writeToTmpFileAndGenerateChecksums( - tmpFile, object, additionalAlgorithm, checksumAlgorithm - ); + Map hexDigests; + try { + hexDigests = writeToTmpFileAndGenerateChecksums( + tmpFile, object, additionalAlgorithm, checksumAlgorithm + ); + } catch (Exception ge) { + // If the process to write to the tmpFile is interrupted for any reason, + // we will delete the tmpFile. + boolean deleteStatus = tmpFile.delete(); + String errMsg = + "FileHashStore.putObject - Unexpected Exception while storing object for: " + pid; + if (deleteStatus) { + errMsg = errMsg + ". Deleting temp file: " + tmpFile + ". Aborting request."; + } else { + errMsg = errMsg + ". Failed to delete temp file: " + tmpFile + + ". Aborting request."; + } + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } long storedObjFileSize = Files.size(Paths.get(tmpFile.toString())); // Validate object if checksum and checksum algorithm is passed diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 9b2f8507..c630eb78 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; +import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -460,6 +461,51 @@ public void storeObject_largeSparseFile() throws Exception { } + /** + * Tests that temporary objects that are being worked on while storeObject is in + * progress and gets interrupted are deleted. + */ + @Test + public void storeObject_interruptProcess() throws Exception { + long fileSize = 1L * 1024L * 1024L * 1024L; // 1GB + // Get tmp directory to initially store test file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + Path testFilePath = storePath.resolve("random_file.bin"); + + // Generate a random file with the specified size + try (FileOutputStream fileOutputStream = new FileOutputStream(testFilePath.toString())) { + FileChannel fileChannel = fileOutputStream.getChannel(); + FileLock lock = fileChannel.lock(); + fileChannel.position(fileSize - 1); + fileChannel.write(java.nio.ByteBuffer.wrap(new byte[]{0})); + lock.release(); + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; + } + + Thread toInterrupt = new Thread(() -> { + try { + InputStream dataStream = Files.newInputStream(testFilePath); + String pid = "dou.sparsefile.1"; + fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + } catch (IOException ioe) { + ioe.printStackTrace(); + } catch (NoSuchAlgorithmException nsae) { + nsae.printStackTrace(); + } + }); + + toInterrupt.start(); + Thread.sleep(5000); + toInterrupt.interrupt(); + toInterrupt.join(); + + // Confirm there are no files in 'objects/tmp' directory + File[] files = storePath.resolve("objects/tmp").toFile().listFiles(); + assertTrue(files.length == 0); + } + /** * Tests that the `storeObject` method can store an object successfully with multiple threads * (5). This test uses five futures (threads) that run concurrently, all except one of which From 6acc9b2857412fca496fffadfc293f8c655fb03c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 29 Sep 2023 09:29:29 -0700 Subject: [PATCH 084/553] Update inaccurate javadoc info for 'checkNotNegative' method --- .../dataone/hashstore/filehashstore/FileHashStoreUtility.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 7efdea6d..7a660ce0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -108,7 +108,7 @@ public static void checkForEmptyString(String string, String argument, String me /** * Checks whether a given long integer is greater than 0 * - * @param object Object to check + * @param longInt Object to check * @param method Calling method * @throws IllegalArgumentException If longInt is less than 0 */ From b4066af2677d07bb65522d884101ba2f881aaaa3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 29 Sep 2023 09:40:16 -0700 Subject: [PATCH 085/553] Cleanup redundant code in test classes after refactoring to JUnit5 syntax --- .../org/dataone/hashstore/HashStoreTest.java | 6 +- .../FileHashStoreInterfaceTest.java | 129 +++++++++--------- .../FileHashStoreProtectedTest.java | 60 ++++---- .../FileHashStorePublicTest.java | 34 +++-- 4 files changed, 110 insertions(+), 119 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 603ebd8c..7a822df0 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -69,7 +69,7 @@ public void isHashStore() { * Check that getHashStore throws exception when classPackage is null */ @Test - public void hashStore_classPackageNull() throws Exception { + public void hashStore_classPackageNull() { assertThrows(HashStoreFactoryException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", "/test"); @@ -88,7 +88,7 @@ public void hashStore_classPackageNull() throws Exception { * Check that getHashStore throws exception when classPackage is not found */ @Test - public void hashStore_classPackageNotFound() throws Exception { + public void hashStore_classPackageNotFound() { assertThrows(HashStoreFactoryException.class, () -> { String classPackage = "org.dataone.hashstore.filehashstore.AnotherHashStore"; @@ -110,7 +110,7 @@ public void hashStore_classPackageNotFound() throws Exception { * Check that getHashStore throws exception when storeProperties is null */ @Test - public void hashStore_nullStoreProperties() throws Exception { + public void hashStore_nullStoreProperties() { assertThrows(HashStoreFactoryException.class, () -> { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; hashStore = HashStoreFactory.getHashStore(classPackage, null); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index c630eb78..ede61f30 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -164,7 +164,7 @@ public void storeObject_hexDigests() throws Exception { * Check that store object throws exception when object is null */ @Test - public void storeObject_null() throws Exception { + public void storeObject_null() { assertThrows(IllegalArgumentException.class, () -> { String pid = "j.tao.1700.1"; fileHashStore.storeObject(null, pid, null, null, null, 0); @@ -175,7 +175,7 @@ public void storeObject_null() throws Exception { * Check that store object throws exception when pid is null */ @Test - public void storeObject_nullPid() throws Exception { + public void storeObject_nullPid() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -191,7 +191,7 @@ public void storeObject_nullPid() throws Exception { * Check that store object throws exception when pid is empty */ @Test - public void storeObject_emptyPid() throws Exception { + public void storeObject_emptyPid() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -303,7 +303,7 @@ public void storeObject_correctChecksumValue() throws Exception { * Verify exception thrown when checksum provided does not match */ @Test - public void storeObject_incorrectChecksumValue() throws Exception { + public void storeObject_incorrectChecksumValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -321,7 +321,7 @@ public void storeObject_incorrectChecksumValue() throws Exception { * Verify exception thrown when checksum is empty and algorithm supported */ @Test - public void storeObject_emptyChecksumValue() throws Exception { + public void storeObject_emptyChecksumValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -338,7 +338,7 @@ public void storeObject_emptyChecksumValue() throws Exception { * Verify exception thrown when checksum is null and algorithm supported */ @Test - public void storeObject_nullChecksumValue() throws Exception { + public void storeObject_nullChecksumValue() { assertThrows(IllegalArgumentException.class, () -> { // Get single test file to "upload" String pid = "jtao.1700.1"; @@ -373,7 +373,7 @@ public void storeObject_objSizeCorrect() throws Exception { * Check that store object throws exception when incorrect file size provided */ @Test - public void storeObject_objSizeIncorrect() throws Exception { + public void storeObject_objSizeIncorrect() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -395,7 +395,7 @@ public void storeObject_objSizeIncorrect() throws Exception { * Verify exception thrown when unsupported additional algorithm provided */ @Test - public void storeObject_invalidAlgorithm() throws Exception { + public void storeObject_invalidAlgorithm() { assertThrows(NoSuchAlgorithmException.class, () -> { // Get single test file to "upload" String pid = "jtao.1700.1"; @@ -410,7 +410,7 @@ public void storeObject_invalidAlgorithm() throws Exception { * Check that store object throws FileAlreadyExists error when storing duplicate object */ @Test - public void storeObject_duplicate() throws Exception { + public void storeObject_duplicate() { for (String pid : testData.pidList) { assertThrows(PidObjectExistsException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -489,10 +489,8 @@ public void storeObject_interruptProcess() throws Exception { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, 0); - } catch (IOException ioe) { + } catch (IOException | NoSuchAlgorithmException ioe) { ioe.printStackTrace(); - } catch (NoSuchAlgorithmException nsae) { - nsae.printStackTrace(); } }); @@ -503,7 +501,8 @@ public void storeObject_interruptProcess() throws Exception { // Confirm there are no files in 'objects/tmp' directory File[] files = storePath.resolve("objects/tmp").toFile().listFiles(); - assertTrue(files.length == 0); + assert files != null; + assertEquals(0, files.length); } /** @@ -779,11 +778,11 @@ public void storeMetadata_fileSize() throws Exception { * Test storeMetadata throws exception when metadata is null */ @Test - public void storeMetadata_metadataNull() throws Exception { + public void storeMetadata_metadataNull() { for (String pid : testData.pidList) { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.storeMetadata(null, pid, null); - }); + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.storeMetadata(null, pid, null) + ); } } @@ -791,7 +790,7 @@ public void storeMetadata_metadataNull() throws Exception { * Test storeMetadata throws exception when pid is null */ @Test - public void storeMetadata_pidNull() throws Exception { + public void storeMetadata_pidNull() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -810,7 +809,7 @@ public void storeMetadata_pidNull() throws Exception { * Test storeMetadata throws exception when pid is empty */ @Test - public void storeMetadata_pidEmpty() throws Exception { + public void storeMetadata_pidEmpty() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -829,7 +828,7 @@ public void storeMetadata_pidEmpty() throws Exception { * Test storeMetadata throws exception when pid is empty with spaces */ @Test - public void storeMetadata_pidEmptySpaces() throws Exception { + public void storeMetadata_pidEmptySpaces() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -942,7 +941,7 @@ public void retrieveObject() throws Exception { * Check that retrieveObject throws exception when pid is null */ @Test - public void retrieveObject_pidNull() throws Exception { + public void retrieveObject_pidNull() { assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveObject(null); pidInputStream.close(); @@ -953,7 +952,7 @@ public void retrieveObject_pidNull() throws Exception { * Check that retrieveObject throws exception when pid is empty */ @Test - public void retrieveObject_pidEmpty() throws Exception { + public void retrieveObject_pidEmpty() { assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveObject(""); pidInputStream.close(); @@ -964,7 +963,7 @@ public void retrieveObject_pidEmpty() throws Exception { * Check that retrieveObject throws exception when pid is empty spaces */ @Test - public void retrieveObject_pidEmptySpaces() throws Exception { + public void retrieveObject_pidEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveObject(" "); pidInputStream.close(); @@ -975,7 +974,7 @@ public void retrieveObject_pidEmptySpaces() throws Exception { * Check that retrieveObject throws exception when file is not found */ @Test - public void retrieveObject_pidNotFound() throws Exception { + public void retrieveObject_pidNotFound() { assertThrows(FileNotFoundException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveObject("dou.2023.hs.1"); pidInputStream.close(); @@ -1074,7 +1073,7 @@ public void retrieveMetadata_overload() throws Exception { * Check that retrieveMetadata throws exception when pid is null */ @Test - public void retrieveMetadata_pidNull() throws Exception { + public void retrieveMetadata_pidNull() { assertThrows(IllegalArgumentException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); InputStream pidInputStream = fileHashStore.retrieveMetadata(null, storeFormatId); @@ -1086,7 +1085,7 @@ public void retrieveMetadata_pidNull() throws Exception { * Check that retrieveMetadata throws exception when pid is empty */ @Test - public void retrieveMetadata_pidEmpty() throws Exception { + public void retrieveMetadata_pidEmpty() { assertThrows(IllegalArgumentException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); InputStream pidInputStream = fileHashStore.retrieveMetadata("", storeFormatId); @@ -1098,7 +1097,7 @@ public void retrieveMetadata_pidEmpty() throws Exception { * Check that retrieveMetadata throws exception when pid is empty spaces */ @Test - public void retrieveMetadata_pidEmptySpaces() throws Exception { + public void retrieveMetadata_pidEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); InputStream pidInputStream = fileHashStore.retrieveMetadata(" ", storeFormatId); @@ -1110,7 +1109,7 @@ public void retrieveMetadata_pidEmptySpaces() throws Exception { * Check that retrieveMetadata throws exception when format is null */ @Test - public void retrieveMetadata_formatNull() throws Exception { + public void retrieveMetadata_formatNull() { assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", null); pidInputStream.close(); @@ -1121,7 +1120,7 @@ public void retrieveMetadata_formatNull() throws Exception { * Check that retrieveMetadata throws exception when format is empty */ @Test - public void retrieveMetadata_formatEmpty() throws Exception { + public void retrieveMetadata_formatEmpty() { assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", ""); pidInputStream.close(); @@ -1132,7 +1131,7 @@ public void retrieveMetadata_formatEmpty() throws Exception { * Check that retrieveMetadata throws exception when format is empty spaces */ @Test - public void retrieveMetadata_formatEmptySpaces() throws Exception { + public void retrieveMetadata_formatEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", " "); pidInputStream.close(); @@ -1143,7 +1142,7 @@ public void retrieveMetadata_formatEmptySpaces() throws Exception { * Check that retrieveMetadata throws exception when file is not found */ @Test - public void retrieveMetadata_pidNotFound() throws Exception { + public void retrieveMetadata_pidNotFound() { assertThrows(FileNotFoundException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); InputStream pidInputStream = fileHashStore.retrieveMetadata( @@ -1238,40 +1237,34 @@ public void deleteObject() throws Exception { * Confirm that deleteObject throws exception when associated pid obj not found */ @Test - public void deleteObject_pidNotFound() throws Exception { - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.deleteObject("dou.2023.hashstore.1"); - }); + public void deleteObject_pidNotFound() { + assertThrows( + FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1") + ); } /** * Confirm that deleteObject throws exception when pid is null */ @Test - public void deleteObject_pidNull() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.deleteObject(null); - }); + public void deleteObject_pidNull() { + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(null)); } /** * Confirm that deleteObject throws exception when pid is empty */ @Test - public void deleteObject_pidEmpty() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.deleteObject(""); - }); + public void deleteObject_pidEmpty() { + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject("")); } /** * Confirm that deleteObject throws exception when pid is empty spaces */ @Test - public void deleteObject_pidEmptySpaces() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.deleteObject(" "); - }); + public void deleteObject_pidEmptySpaces() { + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(" ")); } /** @@ -1334,7 +1327,7 @@ public void deleteMetadata_overload() throws Exception { * Confirm that deleteMetadata throws exception when associated pid obj not found */ @Test - public void deleteMetadata_pidNotFound() throws Exception { + public void deleteMetadata_pidNotFound() { assertThrows(FileNotFoundException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata("dou.2023.hashstore.1", formatId); @@ -1345,7 +1338,7 @@ public void deleteMetadata_pidNotFound() throws Exception { * Confirm that deleteMetadata throws exception when pid is null */ @Test - public void deleteMetadata_pidNull() throws Exception { + public void deleteMetadata_pidNull() { assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata(null, formatId); @@ -1356,7 +1349,7 @@ public void deleteMetadata_pidNull() throws Exception { * Confirm that deleteMetadata throws exception when pid is empty */ @Test - public void deleteMetadata_pidEmpty() throws Exception { + public void deleteMetadata_pidEmpty() { assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata("", formatId); @@ -1367,7 +1360,7 @@ public void deleteMetadata_pidEmpty() throws Exception { * Confirm that deleteMetadata throws exception when pid is empty spaces */ @Test - public void deleteMetadata_pidEmptySpaces() throws Exception { + public void deleteMetadata_pidEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata(" ", formatId); @@ -1378,7 +1371,7 @@ public void deleteMetadata_pidEmptySpaces() throws Exception { * Confirm that deleteMetadata throws exception when formatId is null */ @Test - public void deleteMetadata_formatIdNull() throws Exception { + public void deleteMetadata_formatIdNull() { assertThrows(IllegalArgumentException.class, () -> { String pid = "dou.2023.hashstore.1"; fileHashStore.deleteMetadata(pid, null); @@ -1389,7 +1382,7 @@ public void deleteMetadata_formatIdNull() throws Exception { * Confirm that deleteMetadata throws exception when formatId is empty */ @Test - public void deleteMetadata_formatIdEmpty() throws Exception { + public void deleteMetadata_formatIdEmpty() { assertThrows(IllegalArgumentException.class, () -> { String pid = "dou.2023.hashstore.1"; fileHashStore.deleteMetadata(pid, ""); @@ -1400,7 +1393,7 @@ public void deleteMetadata_formatIdEmpty() throws Exception { * Confirm that deleteMetadata throws exception when formatId is empty spaces */ @Test - public void deleteMetadata_formatIdEmptySpaces() throws Exception { + public void deleteMetadata_formatIdEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { String pid = "dou.2023.hashstore.1"; fileHashStore.deleteMetadata(pid, " "); @@ -1433,7 +1426,7 @@ public void getHexDigest() throws Exception { * Confirm getHexDigest throws exception when file is not found */ @Test - public void getHexDigest_pidNotFound() throws Exception { + public void getHexDigest_pidNotFound() { for (String pid : testData.pidList) { assertThrows(FileNotFoundException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -1446,37 +1439,37 @@ public void getHexDigest_pidNotFound() throws Exception { * Confirm getHexDigest throws exception when file is not found */ @Test - public void getHexDigest_pidNull() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.getHexDigest(null, "SHA-256"); - }); + public void getHexDigest_pidNull() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.getHexDigest(null, "SHA-256") + ); } /** * Confirm getHexDigest throws exception when file is not found */ @Test - public void getHexDigest_pidEmpty() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.getHexDigest("", "SHA-256"); - }); + public void getHexDigest_pidEmpty() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.getHexDigest("", "SHA-256") + ); } /** * Confirm getHexDigest throws exception when file is not found */ @Test - public void getHexDigest_pidEmptySpaces() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.getHexDigest(" ", "SHA-256"); - }); + public void getHexDigest_pidEmptySpaces() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.getHexDigest(" ", "SHA-256") + ); } /** * Confirm getHexDigest throws exception when unsupported algorithm supplied */ @Test - public void getHexDigest_badAlgo() throws Exception { + public void getHexDigest_badAlgo() { for (String pid : testData.pidList) { assertThrows(NoSuchAlgorithmException.class, () -> { // Store object first diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 63450a49..b2243c60 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -100,7 +100,7 @@ public void isValidAlgorithm_supported() { * Check algorithm support for unsupported algorithm */ @Test - public void isValidAlgorithm_notSupported() throws NoSuchAlgorithmException { + public void isValidAlgorithm_notSupported() { assertThrows(NoSuchAlgorithmException.class, () -> { try { String sm3 = "SM3"; @@ -120,7 +120,7 @@ public void isValidAlgorithm_notSupported() throws NoSuchAlgorithmException { * Check algorithm support for unsupported algorithm with lower cases */ @Test - public void isValidAlgorithm_notSupportedLowerCase() throws NoSuchAlgorithmException { + public void isValidAlgorithm_notSupportedLowerCase() { assertThrows(NoSuchAlgorithmException.class, () -> { try { // Must match string to reduce complexity, no string formatting @@ -191,11 +191,11 @@ public void getPidHexDigest() throws Exception { * Check that getPidHexDigest throws NoSuchAlgorithmException */ @Test - public void getPidHexDigest_badAlgorithm() throws Exception { + public void getPidHexDigest_badAlgorithm() { for (String pid : testData.pidList) { - assertThrows(NoSuchAlgorithmException.class, () -> { - fileHashStore.getPidHexDigest(pid, "SM2"); - }); + assertThrows( + NoSuchAlgorithmException.class, () -> fileHashStore.getPidHexDigest(pid, "SM2") + ); } } @@ -312,7 +312,7 @@ public void putObject_additionalAlgo_correctChecksumValue() throws Exception { * Verify putObject throws exception when checksum provided does not match */ @Test - public void putObject_incorrectChecksumValue() throws Exception { + public void putObject_incorrectChecksumValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -329,7 +329,7 @@ public void putObject_incorrectChecksumValue() throws Exception { * Verify putObject throws exception when checksum is empty and algorithm supported */ @Test - public void putObject_emptyChecksumValue() throws Exception { + public void putObject_emptyChecksumValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -344,7 +344,7 @@ public void putObject_emptyChecksumValue() throws Exception { * Verify putObject throws exception when checksum is null and algorithm supported */ @Test - public void putObject_nullChecksumValue() throws Exception { + public void putObject_nullChecksumValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -359,7 +359,7 @@ public void putObject_nullChecksumValue() throws Exception { * Verify putObject throws exception when checksumAlgorithm is empty and checksum is supplied */ @Test - public void putObject_emptyChecksumAlgorithmValue() throws Exception { + public void putObject_emptyChecksumAlgorithmValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -374,7 +374,7 @@ public void putObject_emptyChecksumAlgorithmValue() throws Exception { * Verify putObject throws exception when checksumAlgorithm is null and checksum supplied */ @Test - public void putObject_nullChecksumAlgorithmValue() throws Exception { + public void putObject_nullChecksumAlgorithmValue() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -409,7 +409,7 @@ public void putObject_objSizeCorrect() throws Exception { * Check that store object throws exception when incorrect file size provided */ @Test - public void putObject_objSizeIncorrect() throws Exception { + public void putObject_objSizeIncorrect() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -431,7 +431,7 @@ public void putObject_objSizeIncorrect() throws Exception { * Verify putObject throws exception when storing a duplicate object */ @Test - public void putObject_duplicateObject() throws Exception { + public void putObject_duplicateObject() { assertThrows(PidObjectExistsException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -450,7 +450,7 @@ public void putObject_duplicateObject() throws Exception { * Verify putObject throws exception when unsupported additional algorithm provided */ @Test - public void putObject_invalidAlgorithm() throws Exception { + public void putObject_invalidAlgorithm() { assertThrows(NoSuchAlgorithmException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -465,7 +465,7 @@ public void putObject_invalidAlgorithm() throws Exception { * Verify putObject throws exception when empty algorithm is supplied */ @Test - public void putObject_emptyAlgorithm() throws Exception { + public void putObject_emptyAlgorithm() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -480,7 +480,7 @@ public void putObject_emptyAlgorithm() throws Exception { * Verify putObject throws exception when pid is empty */ @Test - public void putObject_emptyPid() throws Exception { + public void putObject_emptyPid() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pidEmpty = ""; @@ -496,7 +496,7 @@ public void putObject_emptyPid() throws Exception { * Verify putObject throws exception when pid is null */ @Test - public void putObject_nullPid() throws Exception { + public void putObject_nullPid() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -511,7 +511,7 @@ public void putObject_nullPid() throws Exception { * Verify putObject throws exception object is null */ @Test - public void putObject_nullObject() throws Exception { + public void putObject_nullObject() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; @@ -659,7 +659,7 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce * Check that exception is thrown when unsupported algorithm supplied */ @Test - public void writeToTmpFileAndGenerateChecksums_invalidAlgo() throws Exception { + public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { for (String pid : testData.pidList) { assertThrows(NoSuchAlgorithmException.class, () -> { File newTmpFile = generateTemporaryFile(); @@ -696,7 +696,7 @@ public void testMove() throws Exception { * Confirm that FileAlreadyExistsException is thrown when target already exists */ @Test - public void testMove_targetExists() throws Exception { + public void testMove_targetExists() { assertThrows(FileAlreadyExistsException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; @@ -712,7 +712,7 @@ public void testMove_targetExists() throws Exception { * Confirm that NullPointerException is thrown when entity is null */ @Test - public void testMove_entityNull() throws Exception { + public void testMove_entityNull() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -725,7 +725,7 @@ public void testMove_entityNull() throws Exception { * Confirm that FileAlreadyExistsException is thrown entity is empty */ @Test - public void testMove_entityEmpty() throws Exception { + public void testMove_entityEmpty() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -738,7 +738,7 @@ public void testMove_entityEmpty() throws Exception { * Confirm that FileAlreadyExistsException is thrown when entity is empty spaces */ @Test - public void testMove_entityEmptySpaces() throws Exception { + public void testMove_entityEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -777,11 +777,11 @@ public void putMetadata() throws Exception { * Test putMetadata throws exception when metadata is null */ @Test - public void putMetadata_metadataNull() throws Exception { + public void putMetadata_metadataNull() { for (String pid : testData.pidList) { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.putMetadata(null, pid, null); - }); + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null) + ); } } @@ -789,7 +789,7 @@ public void putMetadata_metadataNull() throws Exception { * Test putMetadata throws exception when pid is null */ @Test - public void putMetadata_pidNull() throws Exception { + public void putMetadata_pidNull() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -808,7 +808,7 @@ public void putMetadata_pidNull() throws Exception { * Test putMetadata throws exception when pid is empty */ @Test - public void putMetadata_pidEmpty() throws Exception { + public void putMetadata_pidEmpty() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -827,7 +827,7 @@ public void putMetadata_pidEmpty() throws Exception { * Test putMetadata throws exception when pid is empty with spaces */ @Test - public void putMetadata_pidEmptySpaces() throws Exception { + public void putMetadata_pidEmptySpaces() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 99c737f7..9859ad1b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -75,17 +75,15 @@ public void initializeFileHashStore() { * Test constructor invalid depth value */ @Test - public void constructor_nullProperties() throws Exception { - assertThrows(IllegalArgumentException.class, () -> { - new FileHashStore(null); - }); + public void constructor_nullProperties() { + assertThrows(IllegalArgumentException.class, () -> new FileHashStore(null)); } /** * Test constructor null store path */ @Test - public void constructor_nullStorePath() throws Exception { + public void constructor_nullStorePath() { assertThrows(NullPointerException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", null); @@ -104,7 +102,7 @@ public void constructor_nullStorePath() throws Exception { * Test constructor invalid depth property value */ @Test - public void constructor_illegalDepthArg() throws Exception { + public void constructor_illegalDepthArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -123,7 +121,7 @@ public void constructor_illegalDepthArg() throws Exception { * Test constructor invalid width property value */ @Test - public void constructor_illegalWidthArg() throws Exception { + public void constructor_illegalWidthArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -142,7 +140,7 @@ public void constructor_illegalWidthArg() throws Exception { * Test constructor unsupported algorithm property value */ @Test - public void constructor_illegalAlgorithmArg() throws Exception { + public void constructor_illegalAlgorithmArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -161,7 +159,7 @@ public void constructor_illegalAlgorithmArg() throws Exception { * Test constructor empty algorithm property value throws exception */ @Test - public void constructor_emptyAlgorithmArg() throws Exception { + public void constructor_emptyAlgorithmArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -180,7 +178,7 @@ public void constructor_emptyAlgorithmArg() throws Exception { * Test constructor algorithm property value with empty spaces throws exception */ @Test - public void constructor_emptySpacesAlgorithmArg() throws Exception { + public void constructor_emptySpacesAlgorithmArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -199,7 +197,7 @@ public void constructor_emptySpacesAlgorithmArg() throws Exception { * Test constructor empty metadata namespace property value throws exception */ @Test - public void constructor_emptyMetadataNameSpaceArg() throws Exception { + public void constructor_emptyMetadataNameSpaceArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -216,7 +214,7 @@ public void constructor_emptyMetadataNameSpaceArg() throws Exception { * Test constructor metadata namespace property value with empty spaces */ @Test - public void constructor_emptySpacesMetadataNameSpaceArg() throws Exception { + public void constructor_emptySpacesMetadataNameSpaceArg() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -233,7 +231,7 @@ public void constructor_emptySpacesMetadataNameSpaceArg() throws Exception { * Confirm that exception is thrown when storeDirectory property value is null */ @Test - public void initDefaultStore_directoryNull() throws Exception { + public void initDefaultStore_directoryNull() { assertThrows(NullPointerException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", null); @@ -330,7 +328,7 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { * instantiating FileHashStore */ @Test - public void testExistingHashStoreConfiguration_diffAlgorithm() throws Exception { + public void testExistingHashStoreConfiguration_diffAlgorithm() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -350,7 +348,7 @@ public void testExistingHashStoreConfiguration_diffAlgorithm() throws Exception * instantiating FileHashStore */ @Test - public void testExistingHashStoreConfiguration_diffDepth() throws Exception { + public void testExistingHashStoreConfiguration_diffDepth() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -370,7 +368,7 @@ public void testExistingHashStoreConfiguration_diffDepth() throws Exception { * instantiating FileHashStore */ @Test - public void testExistingHashStoreConfiguration_diffWidth() throws Exception { + public void testExistingHashStoreConfiguration_diffWidth() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -390,7 +388,7 @@ public void testExistingHashStoreConfiguration_diffWidth() throws Exception { * when instantiating FileHashStore */ @Test - public void testExistingHashStoreConfiguration_diffMetadataNamespace() throws Exception { + public void testExistingHashStoreConfiguration_diffMetadataNamespace() { assertThrows(IllegalArgumentException.class, () -> { Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -410,7 +408,7 @@ public void testExistingHashStoreConfiguration_diffMetadataNamespace() throws Ex * 'hashstore.yaml' */ @Test - public void testExistingHashStoreConfiguration_missingYaml() throws Exception { + public void testExistingHashStoreConfiguration_missingYaml() { assertThrows(IllegalStateException.class, () -> { // Create separate store Path newStoreDirectory = rootDirectory.resolve("test"); From 1a91d494a98b44f4d5244316f0b2126b5dd3e1dc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 2 Oct 2023 14:09:24 -0700 Subject: [PATCH 086/553] Add missing print statement when retrieving metadata with client app --- src/main/java/org/dataone/hashstore/Client.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 2416e005..4a4c107a 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -214,6 +214,8 @@ public static void main(String[] args) throws Exception { ); metadataStream.close(); System.out.println(metadataPreview); + String retrieveMetadataMsg = "...\n<-- Truncated for Display Purposes -->"; + System.out.println(retrieveMetadataMsg); } else if (cmd.hasOption("deleteobject")) { String pid = cmd.getOptionValue("pid"); From 4c8dbdce38659e0605ac016521e019e01862964e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 13 Oct 2023 09:17:27 -0700 Subject: [PATCH 087/553] Remove 'store_path' from being written into config and refactor init process --- .../filehashstore/FileHashStore.java | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b3eabd42..8eb32fcc 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -158,8 +158,7 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep Path hashstoreYaml = STORE_ROOT.resolve("hashstore.yaml"); if (!Files.exists(hashstoreYaml)) { String hashstoreYamlContent = buildHashStoreYamlString( - STORE_ROOT, DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, - METADATA_NAMESPACE + DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, METADATA_NAMESPACE ); writeHashStoreYaml(hashstoreYamlContent); logFileHashStore.info( @@ -220,7 +219,6 @@ protected void verifyHashStoreProperties( logFileHashStore.debug("FileHashStore - 'hashstore.yaml' found, verifying properties."); HashMap hsProperties = loadHashStoreYaml(storePath); - Path existingStorePath = (Path) hsProperties.get(HashStoreProperties.storePath.name()); int existingStoreDepth = (int) hsProperties.get(HashStoreProperties.storeDepth.name()); int existingStoreWidth = (int) hsProperties.get(HashStoreProperties.storeWidth.name()); String existingStoreAlgorithm = (String) hsProperties.get( @@ -231,7 +229,6 @@ protected void verifyHashStoreProperties( ); // Verify properties when 'hashstore.yaml' found - checkConfigurationEquality("store path", storePath, existingStorePath); checkConfigurationEquality("store depth", storeDepth, existingStoreDepth); checkConfigurationEquality("store width", storeWidth, existingStoreWidth); checkConfigurationEquality("store algorithm", storeAlgorithm, existingStoreAlgorithm); @@ -277,8 +274,6 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc try { HashMap hashStoreYamlProperties = om.readValue(hashStoreYamlFile, HashMap.class); - String yamlStorePath = (String) hashStoreYamlProperties.get("store_path"); - hsProperties.put(HashStoreProperties.storePath.name(), Paths.get(yamlStorePath)); hsProperties.put( HashStoreProperties.storeDepth.name(), hashStoreYamlProperties.get("store_depth") ); @@ -354,7 +349,6 @@ protected void checkConfigurationEquality( /** * Build the string content of the configuration file for HashStore - 'hashstore.yaml' * - * @param storePath Root path of store * @param storeDepth Depth of store * @param storeWidth Width of store * @param storeAlgorithm Algorithm to use to calculate the hex digest for the permanent @@ -363,15 +357,12 @@ protected void checkConfigurationEquality( * @return String that representing the contents of 'hashstore.yaml' */ protected String buildHashStoreYamlString( - Path storePath, int storeDepth, int storeWidth, String storeAlgorithm, - String storeMetadataNamespace + int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace ) { return String.format( "# Default configuration variables for HashStore\n\n" - + "############### Store Path ###############\n" - + "# Default path for `FileHashStore` if no path is provided\n" - + "store_path: \"%s\"\n\n" + "############### Directory Structure ###############\n" + + "############### Directory Structure ###############\n" + "# Desired amount of directories when sharding an object to " + "form the permanent address\n" + "store_depth: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP " + "NEW HASHSTORE\n" @@ -396,8 +387,8 @@ protected String buildHashStoreYamlString( + "calculated when storing an\n" + "# object to disk and returned to the caller after successful " + "storage.\n" + "store_default_algo_list:\n" + "- \"MD5\"\n" + "- \"SHA-1\"\n" + "- \"SHA-256\"\n" - + "- \"SHA-384\"\n" + "- \"SHA-512\"\n", storePath, storeDepth, storeWidth, - storeAlgorithm, storeMetadataNamespace + + "- \"SHA-384\"\n" + "- \"SHA-512\"\n", storeDepth, storeWidth, storeAlgorithm, + storeMetadataNamespace ); } From 1e4cfbcd71f86f11b849c0da1725278d35219a05 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 13 Oct 2023 09:18:11 -0700 Subject: [PATCH 088/553] Update junit tests affected by 'store_path' removal --- .../dataone/hashstore/filehashstore/FileHashStorePublicTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 9859ad1b..0ba44ccb 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -297,7 +297,6 @@ public void testPutHashStoreYaml() { @Test public void testGetHashStoreYaml() throws IOException { HashMap hsProperties = fileHashStore.loadHashStoreYaml(rootDirectory); - assertEquals(hsProperties.get("storePath"), rootDirectory); assertEquals(hsProperties.get("storeDepth"), 3); assertEquals(hsProperties.get("storeWidth"), 2); assertEquals(hsProperties.get("storeAlgorithm"), "SHA-256"); From 92e978d4973d9a954121e44ffcd4b023faa3e12f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 13 Oct 2023 09:18:40 -0700 Subject: [PATCH 089/553] Refactor client class for 'store_path' removal --- src/main/java/org/dataone/hashstore/Client.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/Client.java index 4a4c107a..d8458752 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/Client.java @@ -383,8 +383,7 @@ private static HashMap loadHashStoreYaml(Path storePath) { try { HashMap hashStoreYamlProperties = om.readValue(hashStoreYamlFile, HashMap.class); - String yamlStorePath = (String) hashStoreYamlProperties.get("store_path"); - hsProperties.put("storePath", Paths.get(yamlStorePath)); + hsProperties.put("storePath", storePath.toString()); hsProperties.put("storeDepth", hashStoreYamlProperties.get("store_depth")); hsProperties.put("storeWidth", hashStoreYamlProperties.get("store_width")); hsProperties.put("storeAlgorithm", hashStoreYamlProperties.get("store_algorithm")); @@ -413,7 +412,7 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE // Load properties and get HashStore HashMap hsProperties = loadHashStoreYaml(storePath); Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", hsProperties.get("storePath").toString()); + storeProperties.setProperty("storePath", storePath.toString()); storeProperties.setProperty("storeDepth", hsProperties.get("storeDepth").toString()); storeProperties.setProperty("storeWidth", hsProperties.get("storeWidth").toString()); storeProperties.setProperty( From addfaf9e39fb101051861ad6485130f291c9daca Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 11:55:22 -0700 Subject: [PATCH 090/553] Refactor FileHashStore for declaration of 'hashstore.yaml' as a public static final string --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8eb32fcc..16dc2d2a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -56,6 +56,8 @@ public class FileHashStore implements HashStore { private final Path METADATA_STORE_DIRECTORY; private final Path METADATA_TMP_FILE_DIRECTORY; + public static final String HASHSTORE_YAML = "hashstore.yaml"; + public static final String[] SUPPORTED_HASH_ALGORITHMS = {"MD2", "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; @@ -155,7 +157,7 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep ); // Write configuration file 'hashstore.yaml' to store HashStore properties - Path hashstoreYaml = STORE_ROOT.resolve("hashstore.yaml"); + Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); if (!Files.exists(hashstoreYaml)) { String hashstoreYamlContent = buildHashStoreYamlString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, METADATA_NAMESPACE @@ -267,7 +269,7 @@ protected void verifyHashStoreProperties( * @throws IOException If `hashstore.yaml` doesn't exist */ protected HashMap loadHashStoreYaml(Path storePath) throws IOException { - Path hashStoreYamlPath = storePath.resolve("hashstore.yaml"); + Path hashStoreYamlPath = storePath.resolve(HASHSTORE_YAML); File hashStoreYamlFile = hashStoreYamlPath.toFile(); ObjectMapper om = new ObjectMapper(new YAMLFactory()); HashMap hsProperties = new HashMap<>(); @@ -309,7 +311,7 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc * @throws IOException If unable to write `hashstore.yaml` */ protected void writeHashStoreYaml(String yamlString) throws IOException { - Path hashstoreYaml = STORE_ROOT.resolve("hashstore.yaml"); + Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8) From 2ed6d1360c8fabf458c31ba564261eba2322da9a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 12:30:34 -0700 Subject: [PATCH 091/553] Rename 'Client' class to 'CommandLineTool' and update README --- README.md | 22 +++++++++---------- .../{Client.java => CommandLineTool.java} | 7 +++--- ...ientTest.java => CommandLineToolTest.java} | 18 +++++++-------- 3 files changed, 24 insertions(+), 23 deletions(-) rename src/main/java/org/dataone/hashstore/{Client.java => CommandLineTool.java} (99%) rename src/test/java/org/dataone/hashstore/{ClientTest.java => CommandLineToolTest.java} (97%) diff --git a/README.md b/README.md index d59a5ef4..8be4e8ab 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ and then install or build the package with `mvn install` or `mvn package`, respe We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). -## HashStore Client Usage +## HashStore CommandLineTool Usage ```sh @@ -32,35 +32,35 @@ We also maintain a parallel [Python-based version of HashStore](https://github.c $ mvn clean package -Dmaven.test.skip=true # Get help -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -h +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -h # Step 2: ## Create a HashStore (long option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 ## Create a HashStore (short option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 # Get the checksum of a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 # Store a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Retrieve a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrieveobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Delete a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deleteobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.Client -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 ``` ## License diff --git a/src/main/java/org/dataone/hashstore/Client.java b/src/main/java/org/dataone/hashstore/CommandLineTool.java similarity index 99% rename from src/main/java/org/dataone/hashstore/Client.java rename to src/main/java/org/dataone/hashstore/CommandLineTool.java index d8458752..70ff573b 100644 --- a/src/main/java/org/dataone/hashstore/Client.java +++ b/src/main/java/org/dataone/hashstore/CommandLineTool.java @@ -35,10 +35,11 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; /** - * HashStore's 'Client' class is a command line interface that allows a developer to create a new - * HashStore or interact directly with an existing HashStore. See 'README.md' for usage examples. + * HashStore's 'CommandLineTool' class is a command line interface that allows a developer to create + * a new HashStore or interact directly with an existing HashStore. See 'README.md' for usage + * examples. */ -public class Client { +public class CommandLineTool { private static HashStore hashStore; private static Path storePath; diff --git a/src/test/java/org/dataone/hashstore/ClientTest.java b/src/test/java/org/dataone/hashstore/CommandLineToolTest.java similarity index 97% rename from src/test/java/org/dataone/hashstore/ClientTest.java rename to src/test/java/org/dataone/hashstore/CommandLineToolTest.java index 633312d8..109172e6 100644 --- a/src/test/java/org/dataone/hashstore/ClientTest.java +++ b/src/test/java/org/dataone/hashstore/CommandLineToolTest.java @@ -20,7 +20,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -public class ClientTest { +public class CommandLineToolTest { private static HashStore hashStore; private static final TestDataHarness testData = new TestDataHarness(); private Properties hsProperties; @@ -128,7 +128,7 @@ public void client_createHashStore() throws Exception { String[] args = {optCreateHashstore, optStore, optStorePath, optStoreDepth, optStoreDepthValue, optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, optFormatId, optFormatIdValue}; - Client.main(args); + CommandLineTool.main(args); Path storePath = Paths.get(optStorePath); Path hashStoreObjectsPath = storePath.resolve("objects"); @@ -165,7 +165,7 @@ public void client_storeObjects() throws Exception { String optPidValue = pid; String[] args = {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, optPidValue}; - Client.main(args); + CommandLineTool.main(args); // Confirm object was stored Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); @@ -208,7 +208,7 @@ public void client_storeMetadata() throws Exception { String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, optPid, optPidValue, optFormatId, optFormatIdValue}; - Client.main(args); + CommandLineTool.main(args); // Confirm metadata was stored Path absPath = getObjectAbsPath( @@ -250,7 +250,7 @@ public void client_retrieveObjects() throws Exception { String optPid = "-pid"; String optPidValue = pid; String[] args = {optRetrieveObject, optStore, optStorePath, optPid, optPidValue}; - Client.main(args); + CommandLineTool.main(args); // Put things back System.out.flush(); @@ -289,7 +289,7 @@ public void client_retrieveMetadata() throws Exception { String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, optPidValue, optFormatId, optFormatIdValue}; - Client.main(args); + CommandLineTool.main(args); // Put things back System.out.flush(); @@ -325,7 +325,7 @@ public void client_deleteObjects() throws Exception { String optPid = "-pid"; String optPidValue = pid; String[] args = {optDeleteObject, optStore, optStorePath, optPid, optPidValue}; - Client.main(args); + CommandLineTool.main(args); // Confirm object was deleted Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); @@ -368,7 +368,7 @@ public void client_deleteMetadata() throws Exception { String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, optPidValue, optFormatId, optFormatIdValue}; - Client.main(args); + CommandLineTool.main(args); // Confirm metadata was deleted Path absPath = getObjectAbsPath( @@ -413,7 +413,7 @@ public void client_getHexDigest() throws Exception { String optAlgoValue = "SHA-256"; String[] args = {optGetChecksum, optStore, optStorePath, optPid, optPidValue, optAlgo, optAlgoValue}; - Client.main(args); + CommandLineTool.main(args); String testDataChecksum = testData.pidData.get(pid).get("sha256"); From 9d3f8f89b8f45a563773ed6a807827266a59c46c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 16:23:35 -0700 Subject: [PATCH 092/553] Rename 'METADATA_NAMESPACE' variable to 'DEFAULT_METADATA_NAMESPACE' for clarity --- .../filehashstore/FileHashStore.java | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 16dc2d2a..7f0cf1c4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -52,7 +52,7 @@ public class FileHashStore implements HashStore { private final String OBJECT_STORE_ALGORITHM; private final Path OBJECT_STORE_DIRECTORY; private final Path OBJECT_TMP_FILE_DIRECTORY; - private final String METADATA_NAMESPACE; + private final String DEFAULT_METADATA_NAMESPACE; private final Path METADATA_STORE_DIRECTORY; private final Path METADATA_TMP_FILE_DIRECTORY; @@ -127,7 +127,7 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep DIRECTORY_DEPTH = storeDepth; DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; - METADATA_NAMESPACE = storeMetadataNamespace; + DEFAULT_METADATA_NAMESPACE = storeMetadataNamespace; // Resolve object/metadata directories OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); METADATA_STORE_DIRECTORY = storePath.resolve("metadata"); @@ -153,14 +153,15 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep logFileHashStore.debug( "FileHashStore - HashStore initialized. Store Depth: " + DIRECTORY_DEPTH + ". Store Width: " + DIRECTORY_WIDTH + ". Store Algorithm: " - + OBJECT_STORE_ALGORITHM + ". Store Metadata Namespace: " + METADATA_NAMESPACE + + OBJECT_STORE_ALGORITHM + ". Store Metadata Namespace: " + + DEFAULT_METADATA_NAMESPACE ); // Write configuration file 'hashstore.yaml' to store HashStore properties Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); if (!Files.exists(hashstoreYaml)) { String hashstoreYamlContent = buildHashStoreYamlString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, METADATA_NAMESPACE + DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, DEFAULT_METADATA_NAMESPACE ); writeHashStoreYaml(hashstoreYamlContent); logFileHashStore.info( @@ -594,7 +595,7 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { - checkedFormatId = METADATA_NAMESPACE; + checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "storeMetadata"); checkedFormatId = formatId; @@ -684,7 +685,7 @@ public String storeMetadata(InputStream metadata, String pid) throws IOException FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); - return syncPutMetadata(metadata, pid, METADATA_NAMESPACE); + return syncPutMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); } @Override @@ -777,19 +778,19 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid - + " with default metadata namespace: " + METADATA_NAMESPACE + + " with default metadata namespace: " + DEFAULT_METADATA_NAMESPACE ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", METADATA_NAMESPACE); + Path metadataCidPath = getRealPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " - + pid + " with formatId: " + METADATA_NAMESPACE + ". Metadata address: " + + pid + " with formatId: " + DEFAULT_METADATA_NAMESPACE + ". Metadata address: " + metadataCidPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); @@ -801,12 +802,12 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, metadataCidInputStream = Files.newInputStream(metadataCidPath); logFileHashStore.info( "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid - + " with formatId: " + METADATA_NAMESPACE + + " with formatId: " + DEFAULT_METADATA_NAMESPACE ); } catch (IOException ioe) { String errMsg = "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" - + " for pid: " + pid + " with formatId: " + METADATA_NAMESPACE + + " for pid: " + pid + " with formatId: " + DEFAULT_METADATA_NAMESPACE + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); @@ -888,7 +889,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotF FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", METADATA_NAMESPACE); + Path metadataCidPath = getRealPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); // Check to see if object exists if (!Files.exists(metadataCidPath)) { @@ -1518,7 +1519,7 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { - checkedFormatId = METADATA_NAMESPACE; + checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "putMetadata"); checkedFormatId = formatId; From af679ad93877136dd6aa738809b4c1f3ca833ae9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 16:31:07 -0700 Subject: [PATCH 093/553] Rename method 'syncPubObject' to 'syncPutObject' to fix typo in method name --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7f0cf1c4..f5a85631 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -425,7 +425,7 @@ public ObjectInfo storeObject( } FileHashStoreUtility.checkNotNegative(objSize, "storeObject"); - return syncPubObject( + return syncPutObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize ); } @@ -433,7 +433,7 @@ public ObjectInfo storeObject( /** * Method to synchronize storing objects with FileHashStore */ - private ObjectInfo syncPubObject( + private ObjectInfo syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, PidObjectExistsException, IOException, RuntimeException { @@ -530,7 +530,7 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA validateAlgorithm(additionalAlgorithm); } - return syncPubObject(object, pid, additionalAlgorithm, null, null, 0); + return syncPutObject(object, pid, additionalAlgorithm, null, null, 0); } /** @@ -556,7 +556,7 @@ public ObjectInfo storeObject( validateAlgorithm(checksumAlgorithm); } - return syncPubObject(object, pid, null, checksum, checksumAlgorithm, 0); + return syncPutObject(object, pid, null, checksum, checksumAlgorithm, 0); } /** @@ -575,7 +575,7 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); FileHashStoreUtility.checkNotNegative(objSize, "storeObject"); - return syncPubObject(object, pid, null, null, null, objSize); + return syncPutObject(object, pid, null, null, null, objSize); } @Override From 5d4b0d255a93c07ded715c11c5c206506e1c6c41 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 16:35:30 -0700 Subject: [PATCH 094/553] Fix typo in string when calling 'checkConfigurationEquality' for store metadata namespace --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f5a85631..ec70b7fd 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -236,7 +236,7 @@ protected void verifyHashStoreProperties( checkConfigurationEquality("store width", storeWidth, existingStoreWidth); checkConfigurationEquality("store algorithm", storeAlgorithm, existingStoreAlgorithm); checkConfigurationEquality( - "store algorithm", storeMetadataNamespace, existingStoreMetadataNs + "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs ); } else { From 37e801b895ff79717106981fc2975353a66ad55b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 16:40:03 -0700 Subject: [PATCH 095/553] Add missing .close() call on writer in 'writeHashStoreYaml' --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ec70b7fd..6ed32771 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -318,6 +318,7 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8) )) { writer.write(yamlString); + writer.close(); } catch (IOException ioe) { logFileHashStore.fatal( From eec23714aa61d9dc34dc044519ba0b3a7217c83f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 16:41:14 -0700 Subject: [PATCH 096/553] Move .close() call to finally block in 'calculateHexDigest' in FileHashStoreUtility class --- .../hashstore/filehashstore/FileHashStoreUtility.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 7a660ce0..d8c38c1a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -52,8 +52,6 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm mdObject.update(buffer, 0, bytesRead); } - // Close dataStream - dataStream.close(); } catch (IOException ioe) { String errMsg = @@ -61,6 +59,9 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm + ioe.getMessage(); throw new IOException(errMsg); + } finally { + // Close dataStream + dataStream.close(); } // mdObjectHexDigest return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); @@ -109,7 +110,7 @@ public static void checkForEmptyString(String string, String argument, String me * Checks whether a given long integer is greater than 0 * * @param longInt Object to check - * @param method Calling method + * @param method Calling method * @throws IllegalArgumentException If longInt is less than 0 */ public static void checkNotNegative(long longInt, String method) From 2d51099be34c7f7d9742e2136de8cf34a23f7443 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 19 Oct 2023 16:48:54 -0700 Subject: [PATCH 097/553] Call '.equalsIgnoreCase()' instead of '.equals()' when comparing checksums in 'validateTmpObject' method --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6ed32771..de3223dd 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1130,7 +1130,7 @@ private void validateTmpObject( throw new NoSuchAlgorithmException(errMsg); } - if (!checksum.equals(digestFromHexDigests)) { + if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { // Delete tmp File boolean deleteStatus = tmpFile.delete(); if (!deleteStatus) { From b747fe94e45677f08fa0b68c8888357041822f65 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 20 Oct 2023 03:56:21 -0700 Subject: [PATCH 098/553] Rename 'CommandLineTool' class to 'HashStoreClient' --- ...mmandLineTool.java => HashStoreClient.java} | 7 +++---- ...eToolTest.java => HashStoreClientTest.java} | 18 +++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) rename src/main/java/org/dataone/hashstore/{CommandLineTool.java => HashStoreClient.java} (99%) rename src/test/java/org/dataone/hashstore/{CommandLineToolTest.java => HashStoreClientTest.java} (97%) diff --git a/src/main/java/org/dataone/hashstore/CommandLineTool.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java similarity index 99% rename from src/main/java/org/dataone/hashstore/CommandLineTool.java rename to src/main/java/org/dataone/hashstore/HashStoreClient.java index 70ff573b..c6dcb28f 100644 --- a/src/main/java/org/dataone/hashstore/CommandLineTool.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -35,11 +35,10 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; /** - * HashStore's 'CommandLineTool' class is a command line interface that allows a developer to create - * a new HashStore or interact directly with an existing HashStore. See 'README.md' for usage - * examples. + * HashStoreClient is a command line interface that allows a developer to create a new HashStore or + * interact directly with an existing HashStore. See 'README.md' for usage examples. */ -public class CommandLineTool { +public class HashStoreClient { private static HashStore hashStore; private static Path storePath; diff --git a/src/test/java/org/dataone/hashstore/CommandLineToolTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java similarity index 97% rename from src/test/java/org/dataone/hashstore/CommandLineToolTest.java rename to src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 109172e6..b88e69b6 100644 --- a/src/test/java/org/dataone/hashstore/CommandLineToolTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -20,7 +20,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -public class CommandLineToolTest { +public class HashStoreClientTest { private static HashStore hashStore; private static final TestDataHarness testData = new TestDataHarness(); private Properties hsProperties; @@ -128,7 +128,7 @@ public void client_createHashStore() throws Exception { String[] args = {optCreateHashstore, optStore, optStorePath, optStoreDepth, optStoreDepthValue, optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, optFormatId, optFormatIdValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); Path storePath = Paths.get(optStorePath); Path hashStoreObjectsPath = storePath.resolve("objects"); @@ -165,7 +165,7 @@ public void client_storeObjects() throws Exception { String optPidValue = pid; String[] args = {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, optPidValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); // Confirm object was stored Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); @@ -208,7 +208,7 @@ public void client_storeMetadata() throws Exception { String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, optPid, optPidValue, optFormatId, optFormatIdValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); // Confirm metadata was stored Path absPath = getObjectAbsPath( @@ -250,7 +250,7 @@ public void client_retrieveObjects() throws Exception { String optPid = "-pid"; String optPidValue = pid; String[] args = {optRetrieveObject, optStore, optStorePath, optPid, optPidValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); // Put things back System.out.flush(); @@ -289,7 +289,7 @@ public void client_retrieveMetadata() throws Exception { String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, optPidValue, optFormatId, optFormatIdValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); // Put things back System.out.flush(); @@ -325,7 +325,7 @@ public void client_deleteObjects() throws Exception { String optPid = "-pid"; String optPidValue = pid; String[] args = {optDeleteObject, optStore, optStorePath, optPid, optPidValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); // Confirm object was deleted Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); @@ -368,7 +368,7 @@ public void client_deleteMetadata() throws Exception { String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, optPidValue, optFormatId, optFormatIdValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); // Confirm metadata was deleted Path absPath = getObjectAbsPath( @@ -413,7 +413,7 @@ public void client_getHexDigest() throws Exception { String optAlgoValue = "SHA-256"; String[] args = {optGetChecksum, optStore, optStorePath, optPid, optPidValue, optAlgo, optAlgoValue}; - CommandLineTool.main(args); + HashStoreClient.main(args); String testDataChecksum = testData.pidData.get(pid).get("sha256"); From ed057423fd0b86f31d3f8db2d9a19fa61f782ab0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 20 Oct 2023 04:07:05 -0700 Subject: [PATCH 099/553] Delete 'deleteObjectAndParentDirectories()' method, refactor affected classes and update junit tests --- .../filehashstore/FileHashStore.java | 39 ++----------------- .../FileHashStoreInterfaceTest.java | 17 ++++++-- 2 files changed, 16 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index de3223dd..67294c85 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -839,7 +839,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou } // Proceed to delete - deleteObjectAndParentDirectories(objRealPath, pid, "deleteObject"); + Files.delete(objRealPath); logFileHashStore.info( "FileHashStore.deleteObject - File deleted for: " + pid + " with object address: " + objRealPath @@ -870,7 +870,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx } // Proceed to delete - deleteObjectAndParentDirectories(metadataCidPath, pid, "deleteMetadata"); + Files.delete(metadataCidPath); logFileHashStore.info( "FileHashStore.deleteMetadata - File deleted for: " + pid + " with metadata address: " + metadataCidPath @@ -901,7 +901,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotF } // Proceed to delete - deleteObjectAndParentDirectories(metadataCidPath, pid, "deleteMetadata"); + Files.delete(metadataCidPath); logFileHashStore.info( "FileHashStore.deleteMetadata - File deleted for: " + pid + " with metadata address: " + metadataCidPath @@ -1617,37 +1617,4 @@ protected Path getRealPath(String pid, String entity, String formatId) } return realPath; } - - /** - * Deletes a given object and its parent directories if they are empty - * - * @param objectAbsPath Path of the object to delete - * @param pid Authority-based identifier - * @param method Calling method - * @throws IOException I/O error when deleting object or accessing directories - */ - private void deleteObjectAndParentDirectories(Path objectAbsPath, String pid, String method) - throws IOException { - // Delete file - Files.delete(objectAbsPath); - - // Then delete any empty directories - Path parent = objectAbsPath.getParent(); - while (parent != null && FileHashStoreUtility.isDirectoryEmpty(parent)) { - if (parent.equals(METADATA_STORE_DIRECTORY)) { - // Do not delete the metadata store directory - break; - - } else { - Files.delete(parent); - logFileHashStore.debug( - "FileHashStore.deleteObjectAndParentDirectories - " + method - + " : Deleting parent directory for: " + pid + " with parent address: " - + parent - ); - parent = parent.getParent(); - - } - } - } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index ede61f30..e30693ce 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1221,12 +1221,15 @@ public void deleteObject() throws Exception { fileHashStore.deleteObject(pid); - // Double Check that file doesn't exist + // Check that file doesn't exist String objId = objInfo.getId(); Path objCidAbsPath = getObjectAbsPath(objId); assertFalse(Files.exists(objCidAbsPath)); - // Double check that object directory still exists + // Check that parent directories are not deleted + assertTrue(Files.exists(objCidAbsPath.getParent())); + + // Check that object directory still exists Path storePath = Paths.get(fhsProperties.getProperty("storePath")); Path storeObjectPath = storePath.resolve("objects"); assertTrue(Files.exists(storeObjectPath)); @@ -1288,7 +1291,10 @@ public void deleteMetadata() throws Exception { Path metadataCidPath = fileHashStore.getRealPath(pid, "metadata", storeFormatId); assertFalse(Files.exists(metadataCidPath)); - // Double check that metadata directory still exists + // Check that parent directories are not deleted + assertTrue(Files.exists(metadataCidPath.getParent())); + + // Check that metadata directory still exists Path storePath = Paths.get(fhsProperties.getProperty("storePath")); Path storeObjectPath = storePath.resolve("metadata"); assertTrue(Files.exists(storeObjectPath)); @@ -1316,7 +1322,10 @@ public void deleteMetadata_overload() throws Exception { Path metadataCidPath = fileHashStore.getRealPath(pid, "metadata", storeFormatId); assertFalse(Files.exists(metadataCidPath)); - // Double check that metadata directory still exists + // Check that parent directories are not deleted + assertTrue(Files.exists(metadataCidPath.getParent())); + + // Check that metadata directory still exists Path storePath = Paths.get(fhsProperties.getProperty("storePath")); Path storeObjectPath = storePath.resolve("metadata"); assertTrue(Files.exists(storeObjectPath)); From 8876567c5d4d54d2c1597beff0a569b3f3c328ec Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 20 Oct 2023 05:04:53 -0700 Subject: [PATCH 100/553] Refactor overloaded methods, and 'FileHashStore' to use -1 value to bypass object size validation instead of 0 --- .../filehashstore/FileHashStore.java | 48 +++++++------------ 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 67294c85..fdbc1417 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -424,7 +424,10 @@ public ObjectInfo storeObject( ); validateAlgorithm(checksumAlgorithm); } - FileHashStoreUtility.checkNotNegative(objSize, "storeObject"); + if (objSize != -1) { + System.out.println("Checking not negative..."); + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); + } return syncPutObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize @@ -519,19 +522,11 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA "FileHashStore.storeObject - Called to store object for pid: " + pid ); - // Begin input validation - FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); - // Validate algorithms if not null or empty, throws exception if not supported - if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - additionalAlgorithm, "additionalAlgorithm", "storeObject" - ); - validateAlgorithm(additionalAlgorithm); - } + FileHashStoreUtility.ensureNotNull( + additionalAlgorithm, "additionalAlgorithm", "storeObject" + ); - return syncPutObject(object, pid, additionalAlgorithm, null, null, 0); + return storeObject(object, pid, additionalAlgorithm, null, null, -1); } /** @@ -545,19 +540,10 @@ public ObjectInfo storeObject( "FileHashStore.storeObject - Called to store object for pid: " + pid ); - // Begin input validation - FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); - // Validate algorithms if not null or empty, throws exception if not supported - if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "checksumAlgorithm", "storeObject" - ); - validateAlgorithm(checksumAlgorithm); - } + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); - return syncPutObject(object, pid, null, checksum, checksumAlgorithm, 0); + return storeObject(object, pid, null, checksum, checksumAlgorithm, -1); } /** @@ -570,13 +556,9 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) "FileHashStore.storeObject - Called to store object for pid: " + pid ); - // Begin input validation - FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); - FileHashStoreUtility.checkNotNegative(objSize, "storeObject"); + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); - return syncPutObject(object, pid, null, null, null, objSize); + return storeObject(object, pid, null, null, null, objSize); } @Override @@ -997,7 +979,9 @@ protected ObjectInfo putObject( ); validateAlgorithm(checksumAlgorithm); } - FileHashStoreUtility.checkNotNegative(objSize, "putObject"); + if (objSize != -1) { + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "putObject"); + } // If validation is desired, checksumAlgorithm and checksum must both be present boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); From f84fd8ceffb1e06c7704ef40c51a51ad08fe92d2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 20 Oct 2023 05:07:49 -0700 Subject: [PATCH 101/553] Refactor HashStoreClient to set size to -1 when size option is not provided when 'storeObject' is called to bypass object size validation --- src/main/java/org/dataone/hashstore/HashStoreClient.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index c6dcb28f..c12100a5 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -163,6 +163,8 @@ public static void main(String[] args) throws Exception { long size = 0; if (cmd.hasOption("size")) { size = Long.parseLong(cmd.getOptionValue("size")); + } else { + size = -1; } InputStream pidObjStream = Files.newInputStream(path); From ed2238023fb5e21bfa8649dd559a478ce093241c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 20 Oct 2023 05:09:20 -0700 Subject: [PATCH 102/553] Refactor 'checkNotNegative' method to 'checkNotNegativeOrZero' --- .../hashstore/filehashstore/FileHashStoreUtility.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index d8c38c1a..4ddee363 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -107,17 +107,17 @@ public static void checkForEmptyString(String string, String argument, String me } /** - * Checks whether a given long integer is greater than 0 + * Checks whether a given long integer is negative or zero * * @param longInt Object to check * @param method Calling method - * @throws IllegalArgumentException If longInt is less than 0 + * @throws IllegalArgumentException If longInt is less than or equal */ - public static void checkNotNegative(long longInt, String method) + public static void checkNotNegativeOrZero(long longInt, String method) throws IllegalArgumentException { - if (longInt < 0) { + if (longInt < 0 || longInt == 0) { String errMsg = "FileHashStoreUtility.checkNotNegative - Calling Method: " + method - + "(): objSize cannot be less than 0."; + + "(): objSize cannot be less than or equal to 0."; throw new IllegalArgumentException(errMsg); } } From 4e429cc9eb181b32b77b98b04accd53f3d0871c9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 20 Oct 2023 05:10:10 -0700 Subject: [PATCH 103/553] Refactor all junit tests affected by amended object size validation skip with -1 --- .../hashstore/HashStoreClientTest.java | 6 +- .../org/dataone/hashstore/HashStoreTest.java | 2 +- .../FileHashStoreInterfaceTest.java | 60 ++++++++++++------- .../FileHashStoreProtectedTest.java | 34 +++++------ .../FileHashStorePublicTest.java | 2 +- 5 files changed, 60 insertions(+), 44 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index b88e69b6..443fda3e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -241,7 +241,7 @@ public void client_retrieveObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, 0); + hashStore.storeObject(dataStream, pid, null, null, null, -1); // Call client String optRetrieveObject = "-retrieveobject"; @@ -316,7 +316,7 @@ public void client_deleteObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, 0); + hashStore.storeObject(dataStream, pid, null, null, null, -1); // Call client String optDeleteObject = "-deleteobject"; @@ -401,7 +401,7 @@ public void client_getHexDigest() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, 0); + hashStore.storeObject(dataStream, pid, null, null, null, -1); // Call client String optGetChecksum = "-getchecksum"; diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 7a822df0..0507987e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -127,7 +127,7 @@ public void hashStore_storeObjects() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = hashStore.storeObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objAuthorityId = testData.pidData.get(pid).get("object_cid"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e30693ce..e6f7fbfc 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -106,7 +106,7 @@ public void storeObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id (pid)) String objectCid = testData.pidData.get(pid).get("object_cid"); @@ -124,7 +124,7 @@ public void storeObject_objSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Check the object size long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -142,7 +142,7 @@ public void storeObject_hexDigests() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); Map hexDigests = objInfo.getHexDigests(); @@ -167,7 +167,7 @@ public void storeObject_hexDigests() throws Exception { public void storeObject_null() { assertThrows(IllegalArgumentException.class, () -> { String pid = "j.tao.1700.1"; - fileHashStore.storeObject(null, pid, null, null, null, 0); + fileHashStore.storeObject(null, pid, null, null, null, -1); }); } @@ -182,7 +182,7 @@ public void storeObject_nullPid() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, null, null, null, null, 0); + fileHashStore.storeObject(dataStream, null, null, null, null, -1); }); } } @@ -198,7 +198,23 @@ public void storeObject_emptyPid() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, "", null, null, null, 0); + fileHashStore.storeObject(dataStream, "", null, null, null, -1); + }); + } + } + + /** + * Check that store object throws exception when object size is 0 + */ + @Test + public void storeObject_zeroObjSize() { + for (String pid : testData.pidList) { + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, 0); }); } } @@ -244,7 +260,7 @@ public void storeObject_validateChecksum_overload() throws Exception { } /** - * Check that store object returns the correct ObjectInfo size + * Check that store object returns the correct ObjectInfo size with overloaded method */ @Test public void storeObject_objSize_overload() throws Exception { @@ -273,7 +289,7 @@ public void storeObject_validateChecksumValue() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectInfo address = fileHashStore.storeObject( - dataStream, pid, null, checksumCorrect, "SHA-256", 0 + dataStream, pid, null, checksumCorrect, "SHA-256", -1 ); String objCid = address.getId(); @@ -293,7 +309,7 @@ public void storeObject_correctChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "MD2", null, null, 0); + fileHashStore.storeObject(dataStream, pid, "MD2", null, null, -1); String md2 = testData.pidData.get(pid).get("md2"); assertEquals(checksumCorrect, md2); @@ -313,7 +329,7 @@ public void storeObject_incorrectChecksumValue() { "aaf9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", 0); + fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", -1); }); } @@ -330,7 +346,7 @@ public void storeObject_emptyChecksumValue() { String checksumEmpty = ""; InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", 0); + fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", -1); }); } @@ -345,7 +361,7 @@ public void storeObject_nullChecksumValue() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", 0); + fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", -1); }); } @@ -402,7 +418,7 @@ public void storeObject_invalidAlgorithm() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "SM2", null, null, 0); + fileHashStore.storeObject(dataStream, pid, "SM2", null, null, -1); }); } @@ -417,10 +433,10 @@ public void storeObject_duplicate() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); InputStream dataStreamDup = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStreamDup, pid, null, null, null, 0); + fileHashStore.storeObject(dataStreamDup, pid, null, null, null, -1); }); } } @@ -452,7 +468,7 @@ public void storeObject_largeSparseFile() throws Exception { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; ObjectInfo sparseFileObjInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 + dataStream, pid, null, null, null, -1 ); String objCid = sparseFileObjInfo.getId(); @@ -488,7 +504,7 @@ public void storeObject_interruptProcess() throws Exception { try { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); } catch (IOException | NoSuchAlgorithmException ioe) { ioe.printStackTrace(); } @@ -929,7 +945,7 @@ public void retrieveObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Retrieve object InputStream objectCidInputStream = fileHashStore.retrieveObject(pid); @@ -991,7 +1007,7 @@ public void retrieveObject_verifyContent() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Retrieve object InputStream objectCidInputStream; @@ -1217,7 +1233,7 @@ public void deleteObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); fileHashStore.deleteObject(pid); @@ -1420,7 +1436,7 @@ public void getHexDigest() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Then get the checksum String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); @@ -1486,7 +1502,7 @@ public void getHexDigest_badAlgo() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); fileHashStore.getHexDigest(pid, "BLAKE2S"); }); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index b2243c60..819f5294 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -210,7 +210,7 @@ public void putObject_testHarness_id() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, 0); + ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objAuthorityId = testData.pidData.get(pid).get("object_cid"); @@ -228,7 +228,7 @@ public void putObject_objSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, 0); + ObjectInfo objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -246,7 +246,7 @@ public void putObject_testHarness_hexDigests() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, 0); + ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); Map hexDigests = address.getHexDigests(); @@ -277,7 +277,7 @@ public void putObject_validateChecksumValue() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectInfo address = fileHashStore.putObject( - dataStream, pid, null, checksumCorrect, "MD2", 0 + dataStream, pid, null, checksumCorrect, "MD2", -1 ); String objCid = address.getId(); @@ -302,7 +302,7 @@ public void putObject_additionalAlgo_correctChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "MD2", null, null, 0); + fileHashStore.putObject(dataStream, pid, "MD2", null, null, -1); String md2 = testData.pidData.get(pid).get("md2"); assertEquals(checksumCorrect, md2); @@ -321,7 +321,7 @@ public void putObject_incorrectChecksumValue() { String checksumIncorrect = "1c25df1c8ba1d2e57bb3fd4785878b85"; InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", 0); + fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", -1); }); } @@ -336,7 +336,7 @@ public void putObject_emptyChecksumValue() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, " ", "MD2", 0); + fileHashStore.putObject(dataStream, pid, null, " ", "MD2", -1); }); } @@ -351,7 +351,7 @@ public void putObject_nullChecksumValue() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, "MD2", 0); + fileHashStore.putObject(dataStream, pid, null, null, "MD2", -1); }); } @@ -366,7 +366,7 @@ public void putObject_emptyChecksumAlgorithmValue() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", " ", 0); + fileHashStore.putObject(dataStream, pid, null, "abc", " ", -1); }); } @@ -380,7 +380,7 @@ public void putObject_nullChecksumAlgorithmValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", null, 0); + fileHashStore.putObject(dataStream, pid, null, "abc", null, -1); }); } @@ -438,11 +438,11 @@ public void putObject_duplicateObject() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, null, 0); + fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Try duplicate upload InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStreamTwo, pid, null, null, null, 0); + fileHashStore.putObject(dataStreamTwo, pid, null, null, null, -1); }); } @@ -457,7 +457,7 @@ public void putObject_invalidAlgorithm() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "SM2", null, null, 0); + fileHashStore.putObject(dataStream, pid, "SM2", null, null, -1); }); } @@ -472,7 +472,7 @@ public void putObject_emptyAlgorithm() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, " ", null, null, 0); + fileHashStore.putObject(dataStream, pid, " ", null, null, -1); }); } @@ -488,7 +488,7 @@ public void putObject_emptyPid() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pidEmpty, null, null, null, 0); + fileHashStore.putObject(dataStream, pidEmpty, null, null, null, -1); }); } @@ -503,7 +503,7 @@ public void putObject_nullPid() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, null, "MD2", null, null, 0); + fileHashStore.putObject(dataStream, null, "MD2", null, null, -1); }); } @@ -515,7 +515,7 @@ public void putObject_nullObject() { assertThrows(IllegalArgumentException.class, () -> { // Get test file to "upload" String pid = "jtao.1700.1"; - fileHashStore.putObject(null, pid, "MD2", null, null, 0); + fileHashStore.putObject(null, pid, "MD2", null, null, -1); }); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 0ba44ccb..d4c5458a 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -432,7 +432,7 @@ public void testExistingHashStoreConfiguration_missingYaml() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - secondHashStore.storeObject(dataStream, pid, null, null, null, 0); + secondHashStore.storeObject(dataStream, pid, null, null, null, -1); } // Delete configuration From 86febc008cee0bcf9e19eb33e25996d6a4529b32 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 23 Oct 2023 15:42:08 -0700 Subject: [PATCH 104/553] Refactor storeMetadata overload method --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index fdbc1417..5845331f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -663,12 +663,8 @@ public String storeMetadata(InputStream metadata, String pid) throws IOException "FileHashStore.storeMetadata - Called to store metadata for pid: " + pid + ", with default namespace." ); - // Validate input parameters - FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); - return syncPutMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); + return storeMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); } @Override From e3160e7686f7ab2685e14a7a93f2a775cae6e765 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 23 Oct 2023 15:45:40 -0700 Subject: [PATCH 105/553] Refactor deleteMetadata overload method --- .../filehashstore/FileHashStore.java | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 5845331f..b1728298 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -860,30 +860,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx */ public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid - ); - // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); - - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); - - // Check to see if object exists - if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid - + " with metadata address: " + metadataCidPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } - - // Proceed to delete - Files.delete(metadataCidPath); - logFileHashStore.info( - "FileHashStore.deleteMetadata - File deleted for: " + pid + " with metadata address: " - + metadataCidPath - ); + deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); } @Override From b9343f566618e2fdf2d445a1ae5f0e1893c72160 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 23 Oct 2023 15:48:19 -0700 Subject: [PATCH 106/553] Clean up and remove redundant logging statements --- .../filehashstore/FileHashStore.java | 33 +++++-------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b1728298..e4a71f96 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -447,7 +447,7 @@ private ObjectInfo syncPutObject( synchronized (objectLockedIds) { if (objectLockedIds.contains(pid)) { String errMsg = - "FileHashStore.storeObject - Duplicate object request encountered for pid: " + "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " + pid + ". Already in progress."; logFileHashStore.warn(errMsg); throw new RuntimeException(errMsg); @@ -460,7 +460,7 @@ private ObjectInfo syncPutObject( try { logFileHashStore.debug( - "FileHashStore.storeObject - called .putObject() to store pid: " + pid + "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum + ". checksumAlgorithm: " + checksumAlgorithm ); @@ -469,33 +469,33 @@ private ObjectInfo syncPutObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize ); logFileHashStore.info( - "FileHashStore.storeObject - Object stored for pid: " + pid + "FileHashStore.syncPutObject - Object stored for pid: " + pid + ". Permanent address: " + getRealPath(pid, "object", null) ); return objInfo; } catch (NoSuchAlgorithmException nsae) { - String errMsg = "FileHashStore.storeObject - Unable to store object for pid: " + pid + String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid + ". NoSuchAlgorithmException: " + nsae.getMessage(); logFileHashStore.error(errMsg); throw nsae; } catch (PidObjectExistsException poee) { - String errMsg = "FileHashStore.storeObject - Unable to store object for pid: " + pid + String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid + ". PidObjectExistsException: " + poee.getMessage(); logFileHashStore.error(errMsg); throw poee; } catch (IOException ioe) { // Covers AtomicMoveNotSupportedException, FileNotFoundException - String errMsg = "FileHashStore.storeObject - Unable to store object for pid: " + pid + String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; } catch (RuntimeException re) { // Covers SecurityException, IllegalArgumentException, NullPointerException - String errMsg = "FileHashStore.storeObject - Unable to store object for pid: " + pid + String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid + ". Runtime Exception: " + re.getMessage(); logFileHashStore.error(errMsg); throw re; @@ -504,7 +504,7 @@ private ObjectInfo syncPutObject( // Release lock synchronized (objectLockedIds) { logFileHashStore.debug( - "FileHashStore.storeObject - Releasing objectLockedIds for pid: " + pid + "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid ); objectLockedIds.remove(pid); objectLockedIds.notifyAll(); @@ -518,10 +518,6 @@ private ObjectInfo syncPutObject( @Override public ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { - logFileHashStore.debug( - "FileHashStore.storeObject - Called to store object for pid: " + pid - ); - FileHashStoreUtility.ensureNotNull( additionalAlgorithm, "additionalAlgorithm", "storeObject" ); @@ -536,10 +532,6 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA public ObjectInfo storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { - logFileHashStore.debug( - "FileHashStore.storeObject - Called to store object for pid: " + pid - ); - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); @@ -552,10 +544,6 @@ public ObjectInfo storeObject( @Override public ObjectInfo storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { - logFileHashStore.debug( - "FileHashStore.storeObject - Called to store object for pid: " + pid - ); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); return storeObject(object, pid, null, null, null, objSize); @@ -659,11 +647,6 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF @Override public String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.storeMetadata - Called to store metadata for pid: " + pid - + ", with default namespace." - ); - return storeMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); } From 15eb39b6d73d593025b0bb2f6105ce27be595ea9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 23 Oct 2023 16:17:34 -0700 Subject: [PATCH 107/553] Re-word inaccurate logging statements in 'validateTmpObject' method --- .../hashstore/filehashstore/FileHashStore.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e4a71f96..fccaaaf3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1041,8 +1041,10 @@ private void validateTmpObject( boolean deleteStatus = tmpFile.delete(); if (!deleteStatus) { String errMsg = - "FileHashStore.validateTmpObject - Object size stored does not match" - + ". Failed" + " to delete tmpFile: " + tmpFile.getName(); + "FileHashStore.validateTmpObject - objSize given is not equal to the" + + " stored object size. ObjSize: " + objSize + ". storedObjFileSize:" + + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + .getName(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -1075,15 +1077,18 @@ private void validateTmpObject( boolean deleteStatus = tmpFile.delete(); if (!deleteStatus) { String errMsg = - "FileHashStore.validateTmpObject - Object cannot be validated, failed" - + " to delete tmpFile: " + tmpFile.getName(); + "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" + + " is not equal to the calculated hex digest: " + digestFromHexDigests + + ". Checksum" + " provided: " + checksum + + ". Failed to delete tmpFile: " + tmpFile.getName(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } String errMsg = "FileHashStore.validateTmpObject - Checksum given is not equal to the" + " calculated hex digest: " + digestFromHexDigests + ". Checksum" - + " provided: " + checksum + ". Deleting tmpFile: " + tmpFile.getName(); + + " provided: " + checksum + ". tmpFile has been deleted: " + tmpFile + .getName(); logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } From ad95d85054dd64c18507124867a4e979d4545aa3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 24 Oct 2023 11:52:20 -0700 Subject: [PATCH 108/553] Refactor 'putObject' to throw 'PidObjectExistsException' if duplicate file found after finish writing tmpFile --- .../hashstore/filehashstore/FileHashStore.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index fccaaaf3..a79f969e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -997,15 +997,16 @@ protected ObjectInfo putObject( if (!deleteStatus) { String errMsg = "FileHashStore.putObject - Object is found to be a duplicate after writing" - + " tmpFile. Attempted to delete tmpFile but failed: " + tmpFile.getName(); + + " tmpFile. Did not move object and failed to delete tmpFile: " + tmpFile + .getName(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } - objectCid = null; - logFileHashStore.info( + String errMsg = "FileHashStore.putObject - Did not move object, duplicate file found for pid: " - + pid + ". Deleted tmpFile: " + tmpFile.getName() - ); + + pid + ". Deleted tmpFile: " + tmpFile.getName(); + logFileHashStore.info(errMsg); + throw new PidObjectExistsException(errMsg); } else { File permFile = objRealPath.toFile(); move(tmpFile, permFile, "object"); From 4e7850d630674dedc347056312dba5d07c8e563a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 24 Oct 2023 12:34:03 -0700 Subject: [PATCH 109/553] Refactor 'syncPutMetadata' to use pid+formatId as the metadataLockedId to allow multiple types of metadata to be stored concurrently for the same pid --- .../hashstore/filehashstore/FileHashStore.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a79f969e..7e6761b8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -582,16 +582,19 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF throws InterruptedException, IOException, NoSuchAlgorithmException { // Lock pid for thread safety, transaction control and atomic writing // Metadata storage requests for the same pid must be written serially + // However, the same pid could be used with different formatIds, so + // synchronize ids with pid + formatId; + String pidFormatId = pid + checkedFormatId; synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(pid)) { + while (metadataLockedIds.contains(pidFormatId)) { try { metadataLockedIds.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = "FileHashStore.storeMetadata - Metadata lock was interrupted while" - + " storing metadata for: " + pid + ". InterruptedException: " + ie - .getMessage(); + + " storing metadata for: " + pid + " and formatId: " + checkedFormatId + + ". InterruptedException: " + ie.getMessage(); logFileHashStore.warn(errMsg); throw ie; } @@ -599,7 +602,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF logFileHashStore.debug( "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid ); - metadataLockedIds.add(pid); + metadataLockedIds.add(pidFormatId); } try { @@ -634,8 +637,9 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF synchronized (metadataLockedIds) { logFileHashStore.debug( "FileHashStore.storeMetadata - Releasing metadataLockedIds for pid: " + pid + + " and formatId " + checkedFormatId ); - metadataLockedIds.remove(pid); + metadataLockedIds.remove(pidFormatId); metadataLockedIds.notifyAll(); } } From 39074e10c85c3d154f6bb3e61fe3f04224c86ede Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 24 Oct 2023 12:40:24 -0700 Subject: [PATCH 110/553] Update README.md --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 8be4e8ab..b9cf3cb4 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ and then install or build the package with `mvn install` or `mvn package`, respe We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). -## HashStore CommandLineTool Usage +## HashStore HashStoreClient Usage ```sh @@ -32,35 +32,35 @@ We also maintain a parallel [Python-based version of HashStore](https://github.c $ mvn clean package -Dmaven.test.skip=true # Get help -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -h +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -h # Step 2: ## Create a HashStore (long option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 ## Create a HashStore (short option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 # Get the checksum of a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 # Store a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Retrieve a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -retrieveobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Delete a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -deleteobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.CommandLineTool -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 ``` ## License From 5185d69c5bdf95098582d7c5fa244827dab05e07 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 24 Oct 2023 13:27:50 -0700 Subject: [PATCH 111/553] Refactor 'putObject' for redundant Files.exists() check after writing tmpFile --- .../filehashstore/FileHashStore.java | 26 +++---------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7e6761b8..523b95dd 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -993,31 +993,11 @@ protected ObjectInfo putObject( ); // Move object + File permFile = objRealPath.toFile(); + move(tmpFile, permFile, "object"); logFileHashStore.debug( - "FileHashStore.putObject - Moving object: " + tmpFile + ". Destination: " + objRealPath + "FileHashStore.putObject - Move object success, permanent address: " + objRealPath ); - if (Files.exists(objRealPath)) { - boolean deleteStatus = tmpFile.delete(); - if (!deleteStatus) { - String errMsg = - "FileHashStore.putObject - Object is found to be a duplicate after writing" - + " tmpFile. Did not move object and failed to delete tmpFile: " + tmpFile - .getName(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - String errMsg = - "FileHashStore.putObject - Did not move object, duplicate file found for pid: " - + pid + ". Deleted tmpFile: " + tmpFile.getName(); - logFileHashStore.info(errMsg); - throw new PidObjectExistsException(errMsg); - } else { - File permFile = objRealPath.toFile(); - move(tmpFile, permFile, "object"); - logFileHashStore.debug( - "FileHashStore.putObject - Move object success, permanent address: " + objRealPath - ); - } // Create ObjectInfo to return with pertinent data return new ObjectInfo(objectCid, storedObjFileSize, hexDigests); From 87b961fe76111bda1bd0a37fe5d37c196434df0d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 16 Dec 2023 15:10:28 -0800 Subject: [PATCH 112/553] Refactor 'storeObject' to store objects with their content identifiers as the permanent address and update tests --- .../filehashstore/FileHashStore.java | 41 ++++++++++--------- .../hashstore/HashStoreClientTest.java | 2 +- .../org/dataone/hashstore/HashStoreTest.java | 4 +- .../FileHashStoreInterfaceTest.java | 4 +- .../FileHashStoreProtectedTest.java | 4 +- 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 523b95dd..6709412f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -946,21 +946,6 @@ protected ObjectInfo putObject( // If validation is desired, checksumAlgorithm and checksum must both be present boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); - // Gather ObjectInfo elements and prepare object permanent address - String objectCid = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String objShardString = getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); - Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); - - // If file (pid hash) exists, reject request immediately - if (Files.exists(objRealPath)) { - String errMsg = "FileHashStore.putObject - File already exists for pid: " + pid - + ". Object address: " + objRealPath + ". Aborting request."; - logFileHashStore.warn(errMsg); - throw new PidObjectExistsException(errMsg); - } - // Generate tmp file and write to it logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); File tmpFile = generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); @@ -992,12 +977,28 @@ protected ObjectInfo putObject( storedObjFileSize ); - // Move object - File permFile = objRealPath.toFile(); - move(tmpFile, permFile, "object"); - logFileHashStore.debug( - "FileHashStore.putObject - Move object success, permanent address: " + objRealPath + // Gather the elements to form the permanent address + String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); + String objShardString = getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); + Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + + // Confirm that the object does not yet exist, delete tmpFile if so + if (Files.exists(objRealPath)) { + String errMsg = "FileHashStore.putObject - File already exists for pid: " + pid + + ". Object address: " + objRealPath + ". Aborting request."; + logFileHashStore.warn(errMsg); + tmpFile.delete(); + throw new PidObjectExistsException(errMsg); + } else { + // Move object + File permFile = objRealPath.toFile(); + move(tmpFile, permFile, "object"); + logFileHashStore.debug( + "FileHashStore.putObject - Move object success, permanent address: " + objRealPath + ); + } // Create ObjectInfo to return with pertinent data return new ObjectInfo(objectCid, storedObjFileSize, hexDigests); diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 443fda3e..3cd0c1b4 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -168,7 +168,7 @@ public void client_storeObjects() throws Exception { HashStoreClient.main(args); // Confirm object was stored - Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); + Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("sha256"), "object"); assertTrue(Files.exists(absPath)); // Put things back diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 0507987e..06444610 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -130,8 +130,8 @@ public void hashStore_storeObjects() throws Exception { ObjectInfo objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) - String objAuthorityId = testData.pidData.get(pid).get("object_cid"); - assertEquals(objAuthorityId, objInfo.getId()); + String objContentId = testData.pidData.get(pid).get("sha256"); + assertEquals(objContentId, objInfo.getId()); } } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e6f7fbfc..8120159f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -108,8 +108,8 @@ public void storeObject() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - // Check id (sha-256 hex digest of the ab_id (pid)) - String objectCid = testData.pidData.get(pid).get("object_cid"); + // Check id (content identifier based on the store algorithm) + String objectCid = testData.pidData.get(pid).get("sha256"); assertEquals(objectCid, objInfo.getId()); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 819f5294..437d7b72 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -213,8 +213,8 @@ public void putObject_testHarness_id() throws Exception { ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) - String objAuthorityId = testData.pidData.get(pid).get("object_cid"); - assertEquals(objAuthorityId, address.getId()); + String objContentId = testData.pidData.get(pid).get("sha256"); + assertEquals(objContentId, address.getId()); } } From 430a3ab225acc0513f0c74afdaa7f12787f7b91b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 16 Dec 2023 15:25:23 -0800 Subject: [PATCH 113/553] Add 'tagObject', 'verifyObject' and 'findObject' to HashStore interface and update FileHashStore with placeholder methods --- .../java/org/dataone/hashstore/HashStore.java | 31 +++++++++++++++++++ .../filehashstore/FileHashStore.java | 17 ++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 0693d1b3..8a99d1c4 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -80,6 +80,37 @@ ObjectInfo storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + /** + * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, + * deleting or calculating a hex digest of an object is based on a pid argument; and to + * proceed, we must be able to find the object associated with the pid. + * + * @param pid Authority-based identifier + * @param cid Content-identifier (hash identifier) + * @return Boolean to indicate the pid and cid has been tagged. + */ + boolean tagObject(String pid, String cid); + + /** + * Confirms that an object_metadata's content is equal to the given values. + * + * @param objectInfo ObjectInfo object with values + * @param checksum Value of checksum to validate against + * @param checksumAlgorithm Algorithm of checksum submitted + * @param objSize Expected size of object to validate after storing + */ + void verifyObject( + ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize + ); + + /** + * Checks whether an object referenced by a pid exists and returns the content identifier. + * + * @param pid Authority-based identifier + * @return Content identifier + */ + String findObject(String pid); + /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a * persistent identifier (`pid`) and metadata format (`formatId`). The permanent address of diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6709412f..fe06a6d4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -549,6 +549,23 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) return storeObject(object, pid, null, null, null, objSize); } + @Override + public boolean tagObject(String pid, String cid) { + return true; + } + + @Override + public void verifyObject( + ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize + ) { + return; + } + + @Override + public String findObject(String pid) { + return "CID"; + } + @Override public String storeMetadata(InputStream metadata, String pid, String formatId) throws IOException, FileNotFoundException, IllegalArgumentException, InterruptedException, From fdd0abff7f13ac081ded7b3ab4152876ef170bff Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 16 Dec 2023 15:39:17 -0800 Subject: [PATCH 114/553] Add new synchronization ArrayList 'referenceLockedCids' and skeleton for 'tagObject' method --- .../filehashstore/FileHashStore.java | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index fe06a6d4..fdaea2b1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -46,6 +46,7 @@ public class FileHashStore implements HashStore { private static final int TIME_OUT_MILLISEC = 1000; private static final ArrayList objectLockedIds = new ArrayList<>(100); private static final ArrayList metadataLockedIds = new ArrayList<>(100); + private static final ArrayList referenceLockedCids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -551,7 +552,53 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) @Override public boolean tagObject(String pid, String cid) { - return true; + logFileHashStore.debug( + "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid + ); + // Validate input parameters + FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); + FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); + FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); + + synchronized (referenceLockedCids) { + if (referenceLockedCids.contains(pid)) { + String errMsg = + "FileHashStore.tagObject - Duplicate tag request encountered for cid: " + cid + + ". Already in progress."; + logFileHashStore.error(errMsg); + throw new RuntimeException(errMsg); + } + logFileHashStore.debug( + "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid + ); + objectLockedIds.add(pid); + } + + try { + // TODO: + // - Get absolute path for pid refs file + // - Get absolute path for cid refs file + // - Write pid refs file to tmp file + // - Write cid refs file to tmp file + // - Move pid refs file + // - Move cid refs file + // - Verify process succeeded + + return true; + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid + ); + referenceLockedCids.remove(pid); + referenceLockedCids.notifyAll(); + } + } + + } @Override From 1360477e3c6b571e21ecbe3c81441a63918beae7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 16 Dec 2023 19:49:47 -0800 Subject: [PATCH 115/553] Refactor FileHashStore constructor for 'refs' directories and add revise/add new junit tests --- .../filehashstore/FileHashStore.java | 15 ++++++++- .../FileHashStorePublicTest.java | 31 ++++++++++--------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index fdaea2b1..292c4286 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -56,6 +56,10 @@ public class FileHashStore implements HashStore { private final String DEFAULT_METADATA_NAMESPACE; private final Path METADATA_STORE_DIRECTORY; private final Path METADATA_TMP_FILE_DIRECTORY; + private final Path REFS_STORE_DIRECTORY; + private final Path REFS_TMP_FILE_DIRECTORY; + private final Path REFS_PID_FILE_DIRECTORY; + private final Path REFS_CID_FILE_DIRECTORY; public static final String HASHSTORE_YAML = "hashstore.yaml"; @@ -129,19 +133,28 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; DEFAULT_METADATA_NAMESPACE = storeMetadataNamespace; - // Resolve object/metadata directories + // Resolve object/metadata/refs directories OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); METADATA_STORE_DIRECTORY = storePath.resolve("metadata"); + REFS_STORE_DIRECTORY = storePath.resolve("refs"); // Resolve tmp object/metadata directory paths, this is where objects are // created before they are moved to their permanent address OBJECT_TMP_FILE_DIRECTORY = OBJECT_STORE_DIRECTORY.resolve("tmp"); METADATA_TMP_FILE_DIRECTORY = METADATA_STORE_DIRECTORY.resolve("tmp"); + REFS_TMP_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("tmp"); + REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("pid"); + REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("cid"); + try { // Physically create object & metadata store and tmp directories Files.createDirectories(OBJECT_STORE_DIRECTORY); Files.createDirectories(METADATA_STORE_DIRECTORY); + Files.createDirectories(REFS_STORE_DIRECTORY); Files.createDirectories(OBJECT_TMP_FILE_DIRECTORY); Files.createDirectories(METADATA_TMP_FILE_DIRECTORY); + Files.createDirectories(REFS_TMP_FILE_DIRECTORY); + Files.createDirectories(REFS_PID_FILE_DIRECTORY); + Files.createDirectories(REFS_CID_FILE_DIRECTORY); logFileHashStore.debug("FileHashStore - Created store and store tmp directories."); } catch (IOException ioe) { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index d4c5458a..05b0fa42 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -247,39 +247,40 @@ public void initDefaultStore_directoryNull() { } /** - * Check object store directory is created after initialization + * Check object store and tmp directories are created after initialization */ @Test - public void initObjDirectory() { + public void initObjDirectories() { Path checkObjectStorePath = objStringFull; assertTrue(Files.isDirectory(checkObjectStorePath)); - } - - /** - * Check object store tmp directory is created after initialization - */ - @Test - public void initObjTmpDirectory() { Path checkTmpPath = objTmpStringFull; assertTrue(Files.isDirectory(checkTmpPath)); } /** - * Check metadata store directory is created after initialization + * Check metadata store and tmp directories are created after initialization */ @Test - public void initMetadataDirectory() { + public void initMetadataDirectories() { Path checkMetadataStorePath = metadataStringFull; assertTrue(Files.isDirectory(checkMetadataStorePath)); + Path checkMetadataTmpPath = metadataTmpStringFull; + assertTrue(Files.isDirectory(checkMetadataTmpPath)); } /** - * Check metadata store tmp directory is created after initialization + * Check refs tmp, pid and cid directories are created after initialization */ @Test - public void initMetadataTmpDirectory() { - Path checkMetadataTmpPath = metadataTmpStringFull; - assertTrue(Files.isDirectory(checkMetadataTmpPath)); + public void initRefsDirectories() { + Path refsPath = rootDirectory.resolve("refs"); + assertTrue(Files.isDirectory(refsPath)); + Path refsTmpPath = rootDirectory.resolve("refs/tmp"); + assertTrue(Files.isDirectory(refsTmpPath)); + Path refsPidPath = rootDirectory.resolve("refs/pid"); + assertTrue(Files.isDirectory(refsPidPath)); + Path refsCidPath = rootDirectory.resolve("refs/cid"); + assertTrue(Files.isDirectory(refsCidPath)); } /** From e221fb2d7921b90cadb234c9de32ec0d8d27d2ed Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 16 Dec 2023 20:23:32 -0800 Subject: [PATCH 116/553] Add all related code for 'tag_object' to throw new PidRefsFileExistsException class when pid refs file already exists --- .../java/org/dataone/hashstore/HashStore.java | 5 ++- .../PidRefsFileExistsException.java | 13 +++++++ .../filehashstore/FileHashStore.java | 36 +++++++++++++------ 3 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 8a99d1c4..feec7d5c 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -6,6 +6,7 @@ import java.security.NoSuchAlgorithmException; import org.dataone.hashstore.exceptions.PidObjectExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; /** * HashStore is a content-addressable file management system that utilizes the hash/hex digest of a @@ -88,8 +89,10 @@ ObjectInfo storeObject(InputStream object, String pid, long objSize) * @param pid Authority-based identifier * @param cid Content-identifier (hash identifier) * @return Boolean to indicate the pid and cid has been tagged. + * @throws IOException Failure to create tmp file + * @throws PidRefsFileExistsException When pid refs file already exists */ - boolean tagObject(String pid, String cid); + boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException; /** * Confirms that an object_metadata's content is equal to the given values. diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java new file mode 100644 index 00000000..586d0f1f --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java @@ -0,0 +1,13 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +/** + * Custom exception class for FileHashStore pidObjects + */ +public class PidRefsFileExistsException extends IOException { + public PidRefsFileExistsException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 292c4286..220f5db5 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -35,6 +35,7 @@ import org.dataone.hashstore.ObjectInfo; import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.PidObjectExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; /** * FileHashStore is a HashStore adapter class that manages the storage of objects and metadata to a @@ -564,7 +565,7 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) } @Override - public boolean tagObject(String pid, String cid) { + public boolean tagObject(String pid, String cid) throws IOException { logFileHashStore.debug( "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid ); @@ -589,14 +590,30 @@ public boolean tagObject(String pid, String cid) { } try { - // TODO: - // - Get absolute path for pid refs file - // - Get absolute path for cid refs file - // - Write pid refs file to tmp file - // - Write cid refs file to tmp file - // - Move pid refs file - // - Move cid refs file - // - Verify process succeeded + // Check that pid refs file doesn't exist yet + String pidShardString = getHierarchicalPathString(3, 2, pid); + String cidShardString = getHierarchicalPathString(3, 2, cid); + Path absPathCidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); + Path absPathPidRefsPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); + + if (Files.exists(absPathPidRefsPath)) { + String errMsg = "FileHashStore.tagObject - pid ref files already exists for pid: " + + pid + ". A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } else if (Files.exists(absPathCidRefsPath)) { + // TODO: + // Update cid refs file + } else { + // TODO: + // - Write pid refs file to tmp file + File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + // - Write cid refs file to tmp file + File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + // - Move pid refs file + // - Move cid refs file + // - Verify process succeeded + } return true; @@ -1455,7 +1472,6 @@ protected void move(File source, File target, String entity) throws IOException, ); // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); - FileHashStoreUtility.checkForEmptyString(entity, "entity", "move"); // Entity is only used when checking for an existence of an object if (entity.equals("object") && target.exists()) { From f489370f21939a39c19c525e585217b5b62d2087 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 16 Dec 2023 21:13:07 -0800 Subject: [PATCH 117/553] Update 'tag_object' with new method 'writePidRefsFile' and add junit test in new test class 'FileHashStoreReferencesTest' --- .../filehashstore/FileHashStore.java | 30 +++++++- .../FileHashStoreReferencesTest.java | 77 +++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 220f5db5..a0bb183c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -601,21 +601,26 @@ public boolean tagObject(String pid, String cid) throws IOException { + pid + ". A pid can only reference one cid."; logFileHashStore.error(errMsg); throw new PidRefsFileExistsException(errMsg); + } else if (Files.exists(absPathCidRefsPath)) { // TODO: // Update cid refs file + return true; + } else { // TODO: // - Write pid refs file to tmp file File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + writePidRefsFile(pidRefsTmpFile, cid); // - Write cid refs file to tmp file File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); // - Move pid refs file // - Move cid refs file // - Verify process succeeded + return true; + } - return true; } finally { // Release lock @@ -1515,6 +1520,29 @@ protected void move(File source, File target, String entity) throws IOException, } } + /** + * Writes the given 'cid' into the provided file. + * + * @param tmpFile File object to write into + * @param cid Content identifier to write + * @throws IOException Failure to write pid refs file + */ + protected void writePidRefsFile(File tmpFile, String cid) throws IOException { + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(Files.newOutputStream(tmpFile.toPath()), StandardCharsets.UTF_8) + )) { + writer.write(cid); + writer.close(); + + } catch (IOException ioe) { + logFileHashStore.error( + "FileHashStore.writeHashStoreYaml() - Unable to write pid refs file for cid: " + cid + + " IOException: " + ioe.getMessage() + ); + throw ioe; + } + } + /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java new file mode 100644 index 00000000..9bb4b5c3 --- /dev/null +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -0,0 +1,77 @@ +package org.dataone.hashstore.filehashstore; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.NoSuchAlgorithmException; +import java.util.Properties; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Test class for FileHashStore references related methods + */ +public class FileHashStoreReferencesTest { + private FileHashStore fileHashStore; + private Properties fhsProperties; + private Path rootDirectory; + + /** + * Initialize FileHashStore before each test to creates tmp directories + */ + @BeforeEach + public void initializeFileHashStore() { + rootDirectory = tempFolder.resolve("metacat"); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + ); + + try { + fhsProperties = storeProperties; + fileHashStore = new FileHashStore(storeProperties); + + } catch (IOException ioe) { + fail("IOException encountered: " + ioe.getMessage()); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + } + + /** + * Temporary folder for tests to run in + */ + @TempDir + public Path tempFolder; + + // TODO: Add tests for tag object + // TODO: Add tests to check exception thrown when pid refs file already exists + + /** + * Check that the cid supplied is written into the file given + */ + @Test + public void writePidRefsFile_Content() throws Exception { + Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); + File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); + String cidToWrite = "test_cid_123"; + fileHashStore.writePidRefsFile(refsTmpFile, cidToWrite); + + String cidRead = new String(Files.readAllBytes(refsTmpFile.toPath())); + assertEquals(cidRead, cidToWrite); + + } +} From 2c78315ad666e5f62461f92b8c30bd06628d1cda Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 17 Dec 2023 11:27:23 -0800 Subject: [PATCH 118/553] Update 'tag_object' with new method 'writeCidRefsFile' and add new junit test --- .../filehashstore/FileHashStore.java | 28 ++++++++++++++++++- .../FileHashStoreReferencesTest.java | 17 ++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a0bb183c..71352b55 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -597,7 +597,7 @@ public boolean tagObject(String pid, String cid) throws IOException { Path absPathPidRefsPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); if (Files.exists(absPathPidRefsPath)) { - String errMsg = "FileHashStore.tagObject - pid ref files already exists for pid: " + String errMsg = "FileHashStore.tagObject - pid refs file already exists for pid: " + pid + ". A pid can only reference one cid."; logFileHashStore.error(errMsg); throw new PidRefsFileExistsException(errMsg); @@ -614,6 +614,7 @@ public boolean tagObject(String pid, String cid) throws IOException { writePidRefsFile(pidRefsTmpFile, cid); // - Write cid refs file to tmp file File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + writePidRefsFile(cidRefsTmpFile, pid); // - Move pid refs file // - Move cid refs file // - Verify process succeeded @@ -1520,6 +1521,31 @@ protected void move(File source, File target, String entity) throws IOException, } } + /** + * Writes the given 'pid' into the provided file. + * + * @param tmpFile File object to write into + * @param pid Authority-based or persistent identifier to write + * @throws IOException Failure to write pid refs file + */ + protected void writeCidRefsFile(File tmpFile, String pid) throws IOException { + String pidNewLine = pid + "\n"; + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(Files.newOutputStream(tmpFile.toPath()), StandardCharsets.UTF_8) + )) { + writer.write(pidNewLine); + writer.close(); + + } catch (IOException ioe) { + logFileHashStore.error( + "FileHashStore.writeHashStoreYaml() - Unable to write cid refs file for pid: " + pid + + " IOException: " + ioe.getMessage() + ); + throw ioe; + } + } + /** * Writes the given 'cid' into the provided file. * diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 9bb4b5c3..4a30e5c6 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -64,7 +64,7 @@ public void initializeFileHashStore() { * Check that the cid supplied is written into the file given */ @Test - public void writePidRefsFile_Content() throws Exception { + public void writePidRefsFile_content() throws Exception { Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); String cidToWrite = "test_cid_123"; @@ -74,4 +74,19 @@ public void writePidRefsFile_Content() throws Exception { assertEquals(cidRead, cidToWrite); } + + /** + * Check that the pid supplied is written into the file given with a new line + */ + @Test + public void writeCidRefsFile_content() throws Exception { + Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); + File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); + String pidToWrite = "dou.test.123"; + fileHashStore.writeCidRefsFile(refsTmpFile, pidToWrite); + + String cidRead = new String(Files.readAllBytes(refsTmpFile.toPath())); + assertEquals(cidRead, pidToWrite + "\n"); + + } } From c4a2d3e542ca8bc087e18bbe931875ceee2a7862 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 17 Dec 2023 12:48:50 -0800 Subject: [PATCH 119/553] Update 'tag_object' to move tmp refs files to their permanent locations and add new junit tests --- .../java/org/dataone/hashstore/HashStore.java | 5 +- .../filehashstore/FileHashStore.java | 20 ++++--- .../FileHashStoreReferencesTest.java | 57 ++++++++++++++++++- 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index feec7d5c..fd281a75 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -91,8 +91,11 @@ ObjectInfo storeObject(InputStream object, String pid, long objSize) * @return Boolean to indicate the pid and cid has been tagged. * @throws IOException Failure to create tmp file * @throws PidRefsFileExistsException When pid refs file already exists + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address does + * not exist. */ - boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException; + boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, + NoSuchAlgorithmException; /** * Confirms that an object_metadata's content is equal to the given values. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 71352b55..ac1ad1eb 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -527,6 +527,8 @@ private ObjectInfo syncPutObject( } } + // TODO: storeObject with just InputStream + /** * Overload method for storeObject with an additionalAlgorithm */ @@ -565,7 +567,8 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) } @Override - public boolean tagObject(String pid, String cid) throws IOException { + public boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, + NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid ); @@ -591,7 +594,8 @@ public boolean tagObject(String pid, String cid) throws IOException { try { // Check that pid refs file doesn't exist yet - String pidShardString = getHierarchicalPathString(3, 2, pid); + String pidRefId = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidShardString = getHierarchicalPathString(3, 2, pidRefId); String cidShardString = getHierarchicalPathString(3, 2, cid); Path absPathCidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); Path absPathPidRefsPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); @@ -608,15 +612,17 @@ public boolean tagObject(String pid, String cid) throws IOException { return true; } else { - // TODO: - // - Write pid refs file to tmp file + // Write pid refs file to tmp file File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); writePidRefsFile(pidRefsTmpFile, cid); - // - Write cid refs file to tmp file + // Write cid refs file to tmp file File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); writePidRefsFile(cidRefsTmpFile, pid); - // - Move pid refs file - // - Move cid refs file + // Move refs files to permanent location + File absPathPidRefsFile = absPathPidRefsPath.toFile(); + File absPathCidRefsFile = absPathCidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); // - Verify process succeeded return true; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 4a30e5c6..c7e4d8d5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -1,15 +1,20 @@ package org.dataone.hashstore.filehashstore; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; import java.util.Properties; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -57,8 +62,56 @@ public void initializeFileHashStore() { @TempDir public Path tempFolder; - // TODO: Add tests for tag object - // TODO: Add tests to check exception thrown when pid refs file already exists + /** + * Utility method to get absolute path of a given object + */ + public Path getObjectAbsPath(String id, String entity) { + int shardDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int shardWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // Get relative path + String objCidShardString = fileHashStore.getHierarchicalPathString( + shardDepth, shardWidth, id + ); + // Get absolute path + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + + return storePath.resolve(entity).resolve(objCidShardString); + } + + /** + * Check that tagObject writes expected refs files + */ + @Test + public void tagObject_refFilesWritten() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String pidAddress = fileHashStore.getPidHexDigest( + pid, fhsProperties.getProperty("storeAlgorithm") + ); + Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); + assertTrue(Files.exists(pidRefsFilePath)); + + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + assertTrue(Files.exists(cidRefsFilePath)); + + } + + /** + * Check that tagObject throws exception when pid refs file already exists + */ + @Test + public void tagObject_pidRefsFileExists() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + assertThrows(PidRefsFileExistsException.class, () -> { + fileHashStore.tagObject(pid, cid); + }); + + } /** * Check that the cid supplied is written into the file given From 5bab09f556b6dd4d670f3e09e040e8f84d52f614 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 17 Dec 2023 15:56:25 -0800 Subject: [PATCH 120/553] Update 'tag_object' to verify tagging process with new method 'verifyHashStoreRefFiles' and add new junit tests --- .../filehashstore/FileHashStore.java | 73 +++++++++++++++++-- .../FileHashStoreReferencesTest.java | 73 +++++++++++++++++-- 2 files changed, 134 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ac1ad1eb..714e89d6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -593,13 +593,13 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } try { - // Check that pid refs file doesn't exist yet String pidRefId = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidShardString = getHierarchicalPathString(3, 2, pidRefId); String cidShardString = getHierarchicalPathString(3, 2, cid); - Path absPathCidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); - Path absPathPidRefsPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); + Path absPathPidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); + Path absPathCidRefsPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); + // Check that pid refs file doesn't exist yet if (Files.exists(absPathPidRefsPath)) { String errMsg = "FileHashStore.tagObject - pid refs file already exists for pid: " + pid + ". A pid can only reference one cid."; @@ -617,15 +617,16 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile writePidRefsFile(pidRefsTmpFile, cid); // Write cid refs file to tmp file File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - writePidRefsFile(cidRefsTmpFile, pid); + writeCidRefsFile(cidRefsTmpFile, pid); // Move refs files to permanent location File absPathPidRefsFile = absPathPidRefsPath.toFile(); File absPathCidRefsFile = absPathCidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // - Verify process succeeded - return true; + // Verify tagging process, this throws exceptions if there's an issue + verifyHashStoreRefFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); + return true; } @@ -1575,6 +1576,66 @@ protected void writePidRefsFile(File tmpFile, String cid) throws IOException { } } + /** + * Verifies that the reference files for the given pid and cid exist and contain + * the expected values. + * + * @param pid Authority-based or persistent identifier + * @param cid Content identifier + * @param absPathPidRefsPath Path to where the pid refs file exists + * @param absPathCidRefsPath Path to where the cid refs file exists + * @throws FileNotFoundException Any refs files are missing + * @throws IOException Unable to read any of the refs files or if the refs content + * is not what is expected + */ + protected void verifyHashStoreRefFiles( + String pid, String cid, Path absPathPidRefsPath, Path absPathCidRefsPath + ) throws FileNotFoundException, IOException { + // First check that the files exist + if (!Files.exists(absPathCidRefsPath)) { + String errMsg = "FileHashStore.verifyHashStoreRefFiles - cid refs file is missing: " + + absPathCidRefsPath + " for pid: " + pid; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + } + if (!Files.exists(absPathPidRefsPath)) { + String errMsg = "FileHashStore.verifyHashStoreRefFiles - pid refs file is missing: " + + absPathPidRefsPath + " for cid: " + cid; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + } + // Now verify the content + try { + String cidRead = new String(Files.readAllBytes(absPathPidRefsPath)); + if (!cidRead.equals(cid)) { + System.out.println(cidRead); + System.out.println(cid); + String errMsg = "FileHashStore.verifyHashStoreRefFiles - Unexpected cid: " + cidRead + + " found in pid refs file: " + absPathPidRefsPath + ". Expected cid: " + cid; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + // This will strip new line characters + List lines = Files.readAllLines(absPathCidRefsPath); + boolean pidFoundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pid)) { + pidFoundInCidRefFiles = true; + } + } + if (!pidFoundInCidRefFiles) { + String errMsg = "FileHashStore.verifyHashStoreRefFiles - Missing expected pid: " + + pid + " in cid refs file: " + absPathCidRefsPath; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } catch (IOException ioe) { + String errMsg = "FileHashStore.verifyHashStoreRefFiles - " + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } + /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index c7e4d8d5..1cefbcff 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -7,7 +7,6 @@ import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -79,10 +78,10 @@ public Path getObjectAbsPath(String id, String entity) { } /** - * Check that tagObject writes expected refs files + * Check that tagObject writes expected pid refs files */ @Test - public void tagObject_refFilesWritten() throws Exception { + public void tagObject_pidRefsFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); @@ -92,10 +91,19 @@ public void tagObject_refFilesWritten() throws Exception { ); Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); assertTrue(Files.exists(pidRefsFilePath)); + } + + /** + * Check that tagObject writes expected cid refs files + */ + @Test + public void tagObject_cidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); assertTrue(Files.exists(cidRefsFilePath)); - } /** @@ -138,8 +146,61 @@ public void writeCidRefsFile_content() throws Exception { String pidToWrite = "dou.test.123"; fileHashStore.writeCidRefsFile(refsTmpFile, pidToWrite); - String cidRead = new String(Files.readAllBytes(refsTmpFile.toPath())); - assertEquals(cidRead, pidToWrite + "\n"); + String pidRead = new String(Files.readAllBytes(refsTmpFile.toPath())); + assertEquals(pidRead, pidToWrite + "\n"); } + + /** + * Check that exception is thrown when incorrect cid in a pid refs file. + */ + @Test + public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a pid refs file with the incorrect cid + Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); + File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); + String cidToWrite = "dou.test.123"; + fileHashStore.writePidRefsFile(refsTmpFile, cidToWrite); + Path refsTmpFileAbsPath = refsTmpFileDirectory.resolve(refsTmpFile.getName()); + + // Get path of the cid refs file + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + + assertThrows(IOException.class, () -> { + fileHashStore.verifyHashStoreRefFiles(pid, cid, refsTmpFileAbsPath, cidRefsFilePath); + }); + } + + /** + * Check that exception is thrown when an expected pid is not found in a cid refs file + */ + @Test + public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a cid refs file with the incorrect pid + Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); + File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); + String cidToWrite = "dou.test.2"; + fileHashStore.writeCidRefsFile(refsTmpFile, cidToWrite); + Path refsTmpFileAbsPath = refsTmpFileDirectory.resolve(refsTmpFile.getName()); + + // Get path of the pid refs file + String pidAddress = fileHashStore.getPidHexDigest( + pid, fhsProperties.getProperty("storeAlgorithm") + ); + Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); + + assertThrows(IOException.class, () -> { + fileHashStore.verifyHashStoreRefFiles(pid, cid, pidRefsFilePath, refsTmpFileAbsPath); + }); + } + + // TODO: Write test for when updating a cid refs file with additional pids } From 43f012371c6a2ce3ad826d7105521d2401da168d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sun, 17 Dec 2023 15:59:25 -0800 Subject: [PATCH 121/553] Update 'tagObject' javadoc --- src/main/java/org/dataone/hashstore/HashStore.java | 5 +++-- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index fd281a75..05f25133 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -92,10 +92,11 @@ ObjectInfo storeObject(InputStream object, String pid, long objSize) * @throws IOException Failure to create tmp file * @throws PidRefsFileExistsException When pid refs file already exists * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address does - * not exist. + * not exist + * @throws FileNotFoundException If refs file is missing during verification */ boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException; + NoSuchAlgorithmException, FileNotFoundException; /** * Confirms that an object_metadata's content is equal to the given values. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 714e89d6..5e76d11b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -568,7 +568,7 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) @Override public boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException { + NoSuchAlgorithmException, FileNotFoundException { logFileHashStore.debug( "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid ); @@ -629,7 +629,6 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile return true; } - } finally { // Release lock synchronized (referenceLockedCids) { From 3e935516b6143c6dbe47b06f5928beb0bc14d5b5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 10:32:23 -0800 Subject: [PATCH 122/553] Change 'verifyHashStoreRefFiles' method name to 'verifyHashStoreRefsFiles' --- .../hashstore/filehashstore/FileHashStore.java | 17 +++++++++-------- .../FileHashStoreReferencesTest.java | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 5e76d11b..e67852ed 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -624,7 +624,7 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile move(pidRefsTmpFile, absPathPidRefsFile, "refs"); move(cidRefsTmpFile, absPathCidRefsFile, "refs"); // Verify tagging process, this throws exceptions if there's an issue - verifyHashStoreRefFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); + verifyHashStoreRefsFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); return true; } @@ -1587,18 +1587,18 @@ protected void writePidRefsFile(File tmpFile, String cid) throws IOException { * @throws IOException Unable to read any of the refs files or if the refs content * is not what is expected */ - protected void verifyHashStoreRefFiles( + protected void verifyHashStoreRefsFiles( String pid, String cid, Path absPathPidRefsPath, Path absPathCidRefsPath ) throws FileNotFoundException, IOException { // First check that the files exist if (!Files.exists(absPathCidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefFiles - cid refs file is missing: " + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " + absPathCidRefsPath + " for pid: " + pid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } if (!Files.exists(absPathPidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefFiles - pid refs file is missing: " + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - pid refs file is missing: " + absPathPidRefsPath + " for cid: " + cid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); @@ -1609,8 +1609,9 @@ protected void verifyHashStoreRefFiles( if (!cidRead.equals(cid)) { System.out.println(cidRead); System.out.println(cid); - String errMsg = "FileHashStore.verifyHashStoreRefFiles - Unexpected cid: " + cidRead - + " found in pid refs file: " + absPathPidRefsPath + ". Expected cid: " + cid; + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " + + cidRead + " found in pid refs file: " + absPathPidRefsPath + + ". Expected cid: " + cid; logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -1623,13 +1624,13 @@ protected void verifyHashStoreRefFiles( } } if (!pidFoundInCidRefFiles) { - String errMsg = "FileHashStore.verifyHashStoreRefFiles - Missing expected pid: " + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + pid + " in cid refs file: " + absPathCidRefsPath; logFileHashStore.error(errMsg); throw new IOException(errMsg); } } catch (IOException ioe) { - String errMsg = "FileHashStore.verifyHashStoreRefFiles - " + ioe.getMessage(); + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 1cefbcff..9387353f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -171,7 +171,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); assertThrows(IOException.class, () -> { - fileHashStore.verifyHashStoreRefFiles(pid, cid, refsTmpFileAbsPath, cidRefsFilePath); + fileHashStore.verifyHashStoreRefsFiles(pid, cid, refsTmpFileAbsPath, cidRefsFilePath); }); } @@ -198,7 +198,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); assertThrows(IOException.class, () -> { - fileHashStore.verifyHashStoreRefFiles(pid, cid, pidRefsFilePath, refsTmpFileAbsPath); + fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, refsTmpFileAbsPath); }); } From 9b2d597115c4289a4b372cc4eccca4f0df2e4091 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 11:30:09 -0800 Subject: [PATCH 123/553] Fix logging statement inaccuracies --- .../hashstore/filehashstore/FileHashStore.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e67852ed..3edd78ef 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -579,7 +579,7 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); synchronized (referenceLockedCids) { - if (referenceLockedCids.contains(pid)) { + if (referenceLockedCids.contains(cid)) { String errMsg = "FileHashStore.tagObject - Duplicate tag request encountered for cid: " + cid + ". Already in progress."; @@ -587,9 +587,9 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile throw new RuntimeException(errMsg); } logFileHashStore.debug( - "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid + "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid ); - objectLockedIds.add(pid); + objectLockedIds.add(cid); } try { @@ -633,9 +633,9 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid + "FileHashStore.tagObject - Releasing referenceLockedCids for cid: " + cid ); - referenceLockedCids.remove(pid); + referenceLockedCids.remove(cid); referenceLockedCids.notifyAll(); } } @@ -1545,7 +1545,7 @@ protected void writeCidRefsFile(File tmpFile, String pid) throws IOException { } catch (IOException ioe) { logFileHashStore.error( - "FileHashStore.writeHashStoreYaml() - Unable to write cid refs file for pid: " + pid + "FileHashStore.writeCidRefsFile - Unable to write cid refs file for pid: " + pid + " IOException: " + ioe.getMessage() ); throw ioe; @@ -1568,7 +1568,7 @@ protected void writePidRefsFile(File tmpFile, String cid) throws IOException { } catch (IOException ioe) { logFileHashStore.error( - "FileHashStore.writeHashStoreYaml() - Unable to write pid refs file for cid: " + cid + "FileHashStore.writePidRefsFile - Unable to write pid refs file for cid: " + cid + " IOException: " + ioe.getMessage() ); throw ioe; From f94f1ae23e36d8183922da5ee84733bd04953a88 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 13:07:35 -0800 Subject: [PATCH 124/553] Add new custom exception class 'PidExistsInCidRefsFileException' --- .../exceptions/PidExistsInCidRefsFileException.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java new file mode 100644 index 00000000..85d222b7 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java @@ -0,0 +1,13 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +/** + * Custom exception class for FileHashStore pidObjects + */ +public class PidExistsInCidRefsFileException extends IOException { + public PidExistsInCidRefsFileException(String message) { + super(message); + } + +} From 0c0d4ec93ef1ac4032f8141f5a1840f99ab8db0e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 13:11:55 -0800 Subject: [PATCH 125/553] Finalize 'tagObject' method, update javadocs, add new method 'updateCidRefsFiles' and add new junit tests --- .../java/org/dataone/hashstore/HashStore.java | 14 ++-- .../filehashstore/FileHashStore.java | 70 ++++++++++++++++- .../FileHashStoreReferencesTest.java | 78 ++++++++++++++++++- 3 files changed, 152 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 05f25133..38c4c252 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.security.NoSuchAlgorithmException; +import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -89,14 +90,15 @@ ObjectInfo storeObject(InputStream object, String pid, long objSize) * @param pid Authority-based identifier * @param cid Content-identifier (hash identifier) * @return Boolean to indicate the pid and cid has been tagged. - * @throws IOException Failure to create tmp file - * @throws PidRefsFileExistsException When pid refs file already exists - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address does - * not exist - * @throws FileNotFoundException If refs file is missing during verification + * @throws IOException Failure to create tmp file + * @throws PidRefsFileExistsException When pid refs file already exists + * @throws PidExistsInCidRefsFileException pid to write already exists in a cid refs file + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address + * does not exist + * @throws FileNotFoundException If refs file is missing during verification */ boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException; + PidExistsInCidRefsFileException, NoSuchAlgorithmException, FileNotFoundException; /** * Confirms that an object_metadata's content is equal to the given values. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3edd78ef..a540d5c1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -4,9 +4,13 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; import java.nio.charset.StandardCharsets; import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.FileAlreadyExistsException; @@ -34,6 +38,7 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectInfo; import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -568,7 +573,7 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) @Override public boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException { + NoSuchAlgorithmException, FileNotFoundException, PidExistsInCidRefsFileException { logFileHashStore.debug( "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid ); @@ -607,8 +612,35 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile throw new PidRefsFileExistsException(errMsg); } else if (Files.exists(absPathCidRefsPath)) { - // TODO: - // Update cid refs file + // Ensure that the pid is not already found in the file + List lines = Files.readAllLines(absPathCidRefsPath); + boolean pidFoundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pid)) { + pidFoundInCidRefFiles = true; + } + } + if (pidFoundInCidRefFiles) { + String errMsg = "FileHashStore.tagObject - cid refs file already contains pid: " + + pid; + logFileHashStore.error(errMsg); + throw new PidExistsInCidRefsFileException(errMsg); + } + + // Write pid refs file to tmp file + File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + writePidRefsFile(pidRefsTmpFile, cid); + File absPathPidRefsFile = absPathPidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + // Now update cid refs file + updateCidRefsFiles(pid, absPathCidRefsPath); + // Verify tagging process, this throws exceptions if there's an issue + verifyHashStoreRefsFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); + + logFileHashStore.info( + "FileHashStore.tagObject - Object with cid: " + cid + + " has been updated successfully with pid: " + pid + ); return true; } else { @@ -626,6 +658,10 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile // Verify tagging process, this throws exceptions if there's an issue verifyHashStoreRefsFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); + logFileHashStore.info( + "FileHashStore.tagObject - Object with cid: " + cid + + " has been tagged successfully with pid: " + pid + ); return true; } @@ -1636,6 +1672,34 @@ protected void verifyHashStoreRefsFiles( } } + /** + * Updates a cid refs file with a pid that references the cid + * + * @param pid Authority-based or persistent identifier + * @param absPathCidRefsPath Path to the cid refs file to update + */ + protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) { + File absPathCidRefsFile = absPathCidRefsPath.toFile(); + try { + // Obtain a lock on the file + try (FileChannel channel = new RandomAccessFile(absPathCidRefsFile, "rw").getChannel(); + FileLock lock = channel.lock()) { + + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(absPathCidRefsFile, true) + )) { + writer.write(pid + "\n"); + writer.close(); + } + + } + // The lock is automatically released when the try block exits + + } catch (IOException e) { + e.printStackTrace(); + } + } + /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 9387353f..340ac6d0 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -11,8 +11,10 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; +import java.util.List; import java.util.Properties; +import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -121,6 +123,57 @@ public void tagObject_pidRefsFileExists() throws Exception { } + /** + * Check that tagObject creates a pid refs file and updates an existing cid refs file + */ + @Test + public void tagObject_cidRefsFileExists() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String pidAdditional = "another.pid.2"; + fileHashStore.tagObject(pidAdditional, cid); + + String pidAddress = fileHashStore.getPidHexDigest( + pid, fhsProperties.getProperty("storeAlgorithm") + ); + Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); + assertTrue(Files.exists(pidRefsFilePath)); + + + // Check cid refs file + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidFoundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pidAdditional)) { + pidFoundInCidRefFiles = true; + } + } + assertTrue(pidFoundInCidRefFiles); + } + + /** + * Check that tagObject throws an exception when calling to write a pid into a cid refs + * file that already contains the pid + */ + @Test + public void tagObject_pidExistsInCidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); + File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); + fileHashStore.writeCidRefsFile(refsTmpFile, pid); + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + fileHashStore.move(refsTmpFile, cidRefsFilePath.toFile(), "refs"); + + assertThrows(PidExistsInCidRefsFileException.class, () -> { + fileHashStore.tagObject(pid, cid); + }); + } + /** * Check that the cid supplied is written into the file given */ @@ -202,5 +255,28 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception }); } - // TODO: Write test for when updating a cid refs file with additional pids + /** + * Confirm that cid refs file has been updated successfully + */ + @Test + public void updateCidRefsFiles_content() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + + String pidAdditional = "dou.test.2"; + fileHashStore.updateCidRefsFiles("dou.test.2", cidRefsFilePath); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidFoundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pidAdditional)) { + pidFoundInCidRefFiles = true; + } + } + assertTrue(pidFoundInCidRefFiles); + } } From a562bacdd4f7c077d20d426ba556319da3502946 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 13:37:11 -0800 Subject: [PATCH 126/553] Fix bug in 'tagObject' where wrong synchronization variable is referenced and improve logging and error statements --- .../java/org/dataone/hashstore/HashStore.java | 5 ++- .../filehashstore/FileHashStore.java | 45 ++++++++++++------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 38c4c252..bc8282ef 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -96,9 +96,12 @@ ObjectInfo storeObject(InputStream object, String pid, long objSize) * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address * does not exist * @throws FileNotFoundException If refs file is missing during verification + * @throws InterruptedException When tagObject is waiting to execute but is + * interrupted */ boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - PidExistsInCidRefsFileException, NoSuchAlgorithmException, FileNotFoundException; + PidExistsInCidRefsFileException, NoSuchAlgorithmException, FileNotFoundException, + InterruptedException; /** * Confirms that an object_metadata's content is equal to the given values. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a540d5c1..29d0750f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -573,7 +573,8 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) @Override public boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException, PidExistsInCidRefsFileException { + NoSuchAlgorithmException, FileNotFoundException, PidExistsInCidRefsFileException, + InterruptedException { logFileHashStore.debug( "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid ); @@ -584,17 +585,23 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); synchronized (referenceLockedCids) { - if (referenceLockedCids.contains(cid)) { - String errMsg = - "FileHashStore.tagObject - Duplicate tag request encountered for cid: " + cid - + ". Already in progress."; - logFileHashStore.error(errMsg); - throw new RuntimeException(errMsg); + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.tagObject - referecenceLockedCids lock was interrupted while" + + " waiting to tag pid: " + pid + " and cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } } logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid + "FileHashStore.tagObject - Synchronizing referenceLockedCids for cid: " + cid ); - objectLockedIds.add(cid); + referenceLockedCids.add(cid); } try { @@ -622,7 +629,8 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } if (pidFoundInCidRefFiles) { String errMsg = "FileHashStore.tagObject - cid refs file already contains pid: " - + pid; + + pid + ". Refs file not created for both the given pid. Cid refs file (" + + absPathCidRefsPath + ") has not been updated."; logFileHashStore.error(errMsg); throw new PidExistsInCidRefsFileException(errMsg); } @@ -737,8 +745,8 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF "FileHashStore.storeMetadata - Metadata lock was interrupted while" + " storing metadata for: " + pid + " and formatId: " + checkedFormatId + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.warn(errMsg); - throw ie; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } } logFileHashStore.debug( @@ -1677,13 +1685,14 @@ protected void verifyHashStoreRefsFiles( * * @param pid Authority-based or persistent identifier * @param absPathCidRefsPath Path to the cid refs file to update + * @throws IOException Issue with updating a cid refs file */ - protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) { + protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) throws IOException { File absPathCidRefsFile = absPathCidRefsPath.toFile(); try { // Obtain a lock on the file - try (FileChannel channel = new RandomAccessFile(absPathCidRefsFile, "rw").getChannel(); - FileLock lock = channel.lock()) { + try (RandomAccessFile raf = new RandomAccessFile(absPathCidRefsFile, "rw"); + FileChannel channel = raf.getChannel(); FileLock lock = channel.lock()) { try (BufferedWriter writer = new BufferedWriter( new FileWriter(absPathCidRefsFile, true) @@ -1695,8 +1704,10 @@ protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) { } // The lock is automatically released when the try block exits - } catch (IOException e) { - e.printStackTrace(); + } catch (IOException ioe) { + String errMsg = "FileHashStore.updateCidRefsFiles - " + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); } } From a389e67c06d89a479f5dfffd12c68ae6b69e5f04 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 14:42:11 -0800 Subject: [PATCH 127/553] Refactor 'tagObject', clean up code and update junit tests --- .../filehashstore/FileHashStore.java | 85 +++++++++++-------- .../FileHashStoreReferencesTest.java | 39 ++++----- 2 files changed, 65 insertions(+), 59 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 29d0750f..7823c5c3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -591,7 +591,7 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } catch (InterruptedException ie) { String errMsg = - "FileHashStore.tagObject - referecenceLockedCids lock was interrupted while" + "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" + " waiting to tag pid: " + pid + " and cid: " + cid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); @@ -620,13 +620,7 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } else if (Files.exists(absPathCidRefsPath)) { // Ensure that the pid is not already found in the file - List lines = Files.readAllLines(absPathCidRefsPath); - boolean pidFoundInCidRefFiles = false; - for (String line : lines) { - if (line.equals(pid)) { - pidFoundInCidRefFiles = true; - } - } + boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absPathCidRefsPath); if (pidFoundInCidRefFiles) { String errMsg = "FileHashStore.tagObject - cid refs file already contains pid: " + pid + ". Refs file not created for both the given pid. Cid refs file (" @@ -636,8 +630,7 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } // Write pid refs file to tmp file - File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - writePidRefsFile(pidRefsTmpFile, cid); + File pidRefsTmpFile = writePidRefsFile(cid); File absPathPidRefsFile = absPathPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); // Now update cid refs file @@ -652,12 +645,9 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile return true; } else { - // Write pid refs file to tmp file - File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - writePidRefsFile(pidRefsTmpFile, cid); - // Write cid refs file to tmp file - File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - writeCidRefsFile(cidRefsTmpFile, pid); + // Get pid and cid refs files + File pidRefsTmpFile = writePidRefsFile(cid); + File cidRefsTmpFile = writeCidRefsFile(pid); // Move refs files to permanent location File absPathPidRefsFile = absPathPidRefsPath.toFile(); File absPathCidRefsFile = absPathCidRefsPath.toFile(); @@ -1572,21 +1562,26 @@ protected void move(File source, File target, String entity) throws IOException, } /** - * Writes the given 'pid' into the provided file. - * - * @param tmpFile File object to write into - * @param pid Authority-based or persistent identifier to write + * Writes the given 'pid' into a tmp file in the cid refs file format, which consists of + * multiple pids that references a 'cid' delimited by "\n". + * + * @param pid Authority-based or persistent identifier to write * @throws IOException Failure to write pid refs file */ - protected void writeCidRefsFile(File tmpFile, String pid) throws IOException { + protected File writeCidRefsFile(String pid) throws IOException { + File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); String pidNewLine = pid + "\n"; try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter(Files.newOutputStream(tmpFile.toPath()), StandardCharsets.UTF_8) + new OutputStreamWriter( + Files.newOutputStream(cidRefsTmpFile.toPath()), StandardCharsets.UTF_8 + ) )) { writer.write(pidNewLine); writer.close(); + return cidRefsTmpFile; + } catch (IOException ioe) { logFileHashStore.error( "FileHashStore.writeCidRefsFile - Unable to write cid refs file for pid: " + pid @@ -1597,19 +1592,24 @@ protected void writeCidRefsFile(File tmpFile, String pid) throws IOException { } /** - * Writes the given 'cid' into the provided file. + * Writes the given 'cid' into a tmp file in the 'pid' refs file format. A pid refs file + * contains a single 'cid'. Note, a 'pid' can only ever reference one 'cid'. * - * @param tmpFile File object to write into - * @param cid Content identifier to write + * @param cid Content identifier to write * @throws IOException Failure to write pid refs file */ - protected void writePidRefsFile(File tmpFile, String cid) throws IOException { + protected File writePidRefsFile(String cid) throws IOException { + File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter(Files.newOutputStream(tmpFile.toPath()), StandardCharsets.UTF_8) + new OutputStreamWriter( + Files.newOutputStream(pidRefsTmpFile.toPath()), StandardCharsets.UTF_8 + ) )) { writer.write(cid); writer.close(); + return pidRefsTmpFile; + } catch (IOException ioe) { logFileHashStore.error( "FileHashStore.writePidRefsFile - Unable to write pid refs file for cid: " + cid @@ -1619,6 +1619,27 @@ protected void writePidRefsFile(File tmpFile, String cid) throws IOException { } } + /** + * Checks a given cid refs file for a pid. + * + * @param pid Authority-based or persistent identifier to search + * @param absPathCidRefsPath Path to the cid refs file to check + * @return True if cid is found, false otherwise + * @throws IOException If unable to read the cid refs file. + */ + private boolean isPidInCidRefsFile(String pid, Path absPathCidRefsPath) throws IOException { + List lines = Files.readAllLines(absPathCidRefsPath); + boolean pidFoundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pid)) { + pidFoundInCidRefFiles = true; + break; + } + } + return pidFoundInCidRefFiles; + } + + /** * Verifies that the reference files for the given pid and cid exist and contain * the expected values. @@ -1660,13 +1681,7 @@ protected void verifyHashStoreRefsFiles( throw new IOException(errMsg); } // This will strip new line characters - List lines = Files.readAllLines(absPathCidRefsPath); - boolean pidFoundInCidRefFiles = false; - for (String line : lines) { - if (line.equals(pid)) { - pidFoundInCidRefFiles = true; - } - } + boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absPathCidRefsPath); if (!pidFoundInCidRefFiles) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + pid + " in cid refs file: " + absPathCidRefsPath; @@ -1690,7 +1705,7 @@ protected void verifyHashStoreRefsFiles( protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) throws IOException { File absPathCidRefsFile = absPathCidRefsPath.toFile(); try { - // Obtain a lock on the file + // Obtain a lock on the file before updating it try (RandomAccessFile raf = new RandomAccessFile(absPathCidRefsFile, "rw"); FileChannel channel = raf.getChannel(); FileLock lock = channel.lock()) { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 340ac6d0..8060723e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -163,11 +163,10 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); - File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); - fileHashStore.writeCidRefsFile(refsTmpFile, pid); + File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pid); + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); - fileHashStore.move(refsTmpFile, cidRefsFilePath.toFile(), "refs"); + fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); assertThrows(PidExistsInCidRefsFileException.class, () -> { fileHashStore.tagObject(pid, cid); @@ -179,12 +178,10 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { */ @Test public void writePidRefsFile_content() throws Exception { - Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); - File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); String cidToWrite = "test_cid_123"; - fileHashStore.writePidRefsFile(refsTmpFile, cidToWrite); + File pidRefsTmpFile = fileHashStore.writePidRefsFile(cidToWrite); - String cidRead = new String(Files.readAllBytes(refsTmpFile.toPath())); + String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); assertEquals(cidRead, cidToWrite); } @@ -194,12 +191,10 @@ public void writePidRefsFile_content() throws Exception { */ @Test public void writeCidRefsFile_content() throws Exception { - Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); - File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); String pidToWrite = "dou.test.123"; - fileHashStore.writeCidRefsFile(refsTmpFile, pidToWrite); + File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pidToWrite); - String pidRead = new String(Files.readAllBytes(refsTmpFile.toPath())); + String pidRead = new String(Files.readAllBytes(cidRefsTmpFile.toPath())); assertEquals(pidRead, pidToWrite + "\n"); } @@ -214,17 +209,15 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { fileHashStore.tagObject(pid, cid); // Create a pid refs file with the incorrect cid - Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); - File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); - String cidToWrite = "dou.test.123"; - fileHashStore.writePidRefsFile(refsTmpFile, cidToWrite); - Path refsTmpFileAbsPath = refsTmpFileDirectory.resolve(refsTmpFile.getName()); + String cidToWrite = "123456789abcdef"; + File pidRefsTmpFile = fileHashStore.writePidRefsFile(cidToWrite); + Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); assertThrows(IOException.class, () -> { - fileHashStore.verifyHashStoreRefsFiles(pid, cid, refsTmpFileAbsPath, cidRefsFilePath); + fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); }); } @@ -237,12 +230,10 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - // Create a cid refs file with the incorrect pid - Path refsTmpFileDirectory = rootDirectory.resolve("refs/tmp"); - File refsTmpFile = fileHashStore.generateTmpFile("tmp", refsTmpFileDirectory); + // Create a cid refs file with a different pid from the one that is expected String cidToWrite = "dou.test.2"; - fileHashStore.writeCidRefsFile(refsTmpFile, cidToWrite); - Path refsTmpFileAbsPath = refsTmpFileDirectory.resolve(refsTmpFile.getName()); + File cidRefsTmpFile = fileHashStore.writeCidRefsFile(cidToWrite); + Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); // Get path of the pid refs file String pidAddress = fileHashStore.getPidHexDigest( @@ -251,7 +242,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); assertThrows(IOException.class, () -> { - fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, refsTmpFileAbsPath); + fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); }); } From 162d35f1ffe5b0cfa083f9ac0111800e68a8a79b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 15:18:14 -0800 Subject: [PATCH 128/553] Implement 'findObject' method, update HashStore interface and add new junit tests --- .../java/org/dataone/hashstore/HashStore.java | 6 ++++- .../filehashstore/FileHashStore.java | 24 +++++++++++++++-- .../FileHashStoreReferencesTest.java | 27 +++++++++++++++++-- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index bc8282ef..ca501ac7 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -120,8 +120,12 @@ void verifyObject( * * @param pid Authority-based identifier * @return Content identifier + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs file's + * absolute address is not valid + * @throws IOException Unable to read from a pid refs file or pid refs file + * does not exist */ - String findObject(String pid); + String findObject(String pid) throws NoSuchAlgorithmException, IOException; /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7823c5c3..17b02378 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -685,8 +685,26 @@ public void verifyObject( } @Override - public String findObject(String pid) { - return "CID"; + public String findObject(String pid) throws NoSuchAlgorithmException, IOException { + logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); + FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); + + // Get path of the pid references file + String pidRefId = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidShardString = getHierarchicalPathString(3, 2, pidRefId); + Path absPathPidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); + + if (Files.exists(absPathPidRefsPath)) { + String cidFromPidRefsFile = new String(Files.readAllBytes(absPathPidRefsPath)); + return cidFromPidRefsFile; + + } else { + String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid + + ". Pid refs file does not exist at: " + absPathPidRefsPath; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } } @Override @@ -1726,6 +1744,8 @@ protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) throws IO } } + // TODO: Implement delete methods for pid and cid refs files + /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 8060723e..6ac6e3ed 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -173,6 +173,31 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { }); } + /** + * Confirm expected cid is returned + */ + @Test + public void findObject_content() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String cidRetrieved = fileHashStore.findObject(pid); + + assertEquals(cid, cidRetrieved); + } + + /** + * Check that exception is thrown when pid refs file doesn't exist + */ + @Test + public void findObject_pidNotFound() throws Exception { + String pid = "dou.test.1"; + assertThrows(IOException.class, () -> { + fileHashStore.findObject(pid); + }); + } + /** * Check that the cid supplied is written into the file given */ @@ -183,7 +208,6 @@ public void writePidRefsFile_content() throws Exception { String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); assertEquals(cidRead, cidToWrite); - } /** @@ -196,7 +220,6 @@ public void writeCidRefsFile_content() throws Exception { String pidRead = new String(Files.readAllBytes(cidRefsTmpFile.toPath())); assertEquals(pidRead, pidToWrite + "\n"); - } /** From 6f409828d6ac73ef3463bce6ded19017d3c81d64 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 16:01:51 -0800 Subject: [PATCH 129/553] Refactor 'syncPutObject' to call 'tagObject', refactor 'getHexDigest', refactor 'getRealPath', update HashStore interface and update affected junit tests --- .../java/org/dataone/hashstore/HashStore.java | 11 +-- .../filehashstore/FileHashStore.java | 68 ++++++++++++------- .../FileHashStoreInterfaceTest.java | 2 +- .../FileHashStoreReferencesTest.java | 3 +- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index ca501ac7..462331c2 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -55,32 +55,35 @@ public interface HashStore { * @throws PidObjectExistsException When duplicate pid object is found * @throws RuntimeException Thrown when there is an issue with permissions, illegal * arguments (ex. empty pid) or null pointers + * @throws InterruptedException When tagging pid and cid process is interrupted */ ObjectInfo storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectInfo storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException; + ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, - RuntimeException; + RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectInfo storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, - RuntimeException; + RuntimeException, InterruptedException; /** * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 17b02378..357177c8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -422,7 +422,8 @@ protected String buildHashStoreYamlString( public ObjectInfo storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { + ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + InterruptedException { logFileHashStore.debug( "FileHashStore.storeObject - Called to store object for pid: " + pid ); @@ -460,7 +461,8 @@ public ObjectInfo storeObject( private ObjectInfo syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, PidObjectExistsException, IOException, RuntimeException { + ) throws NoSuchAlgorithmException, PidObjectExistsException, IOException, RuntimeException, + InterruptedException { // Lock pid for thread safety, transaction control and atomic writing // A pid can only be stored once and only once, subsequent calls will // be accepted but will be rejected if pid hash object exists @@ -488,6 +490,9 @@ private ObjectInfo syncPutObject( ObjectInfo objInfo = putObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize ); + // Tag object + String cid = objInfo.getId(); + tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid + ". Permanent address: " + getRealPath(pid, "object", null) @@ -539,7 +544,8 @@ private ObjectInfo syncPutObject( */ @Override public ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { + throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + InterruptedException { FileHashStoreUtility.ensureNotNull( additionalAlgorithm, "additionalAlgorithm", "storeObject" ); @@ -553,7 +559,8 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA @Override public ObjectInfo storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { + ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + InterruptedException { FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); @@ -565,7 +572,8 @@ public ObjectInfo storeObject( */ @Override public ObjectInfo storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException { + throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + InterruptedException { FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); return storeObject(object, pid, null, null, null, objSize); @@ -673,8 +681,6 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile referenceLockedCids.notifyAll(); } } - - } @Override @@ -703,7 +709,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid + ". Pid refs file does not exist at: " + absPathPidRefsPath; logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new FileNotFoundException(errMsg); } } @@ -1019,24 +1025,33 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE FileHashStoreUtility.checkForEmptyString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); + // Find the content identifier + if (algorithm.equals(OBJECT_STORE_ALGORITHM)) { + String cid = findObject(pid); + return cid; - // Check to see if object exists - if (!Files.exists(objRealPath)) { - String errMsg = "FileHashStore.getHexDigest - File does not exist for pid: " + pid - + " with object address: " + objRealPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } + } else { + // Get permanent address of the pid + Path objRealPath = getRealPath(pid, "object", null); - InputStream dataStream = Files.newInputStream(objRealPath); - String mdObjectHexDigest = FileHashStoreUtility.calculateHexDigest(dataStream, algorithm); - logFileHashStore.info( - "FileHashStore.getHexDigest - Hex digest calculated for pid: " + pid - + ", with hex digest value: " + mdObjectHexDigest - ); - return mdObjectHexDigest; + // Check to see if object exists + if (!Files.exists(objRealPath)) { + String errMsg = "FileHashStore.getHexDigest - File does not exist for pid: " + pid + + " with object address: " + objRealPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } + + InputStream dataStream = Files.newInputStream(objRealPath); + String mdObjectHexDigest = FileHashStoreUtility.calculateHexDigest( + dataStream, algorithm + ); + logFileHashStore.info( + "FileHashStore.getHexDigest - Hex digest calculated for pid: " + pid + + ", with hex digest value: " + mdObjectHexDigest + ); + return mdObjectHexDigest; + } } // FileHashStore Core & Supporting Methods @@ -1847,12 +1862,13 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea * @return Actual path to object * @throws IllegalArgumentException If entity is not object or metadata * @throws NoSuchAlgorithmException If store algorithm is not supported + * @throws IOException If unable to retrieve cid */ protected Path getRealPath(String pid, String entity, String formatId) - throws IllegalArgumentException, NoSuchAlgorithmException { + throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; if (entity.equalsIgnoreCase("object")) { - String objectCid = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String objectCid = findObject(pid); String objShardString = getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 8120159f..0fb0e9bc 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -505,7 +505,7 @@ public void storeObject_interruptProcess() throws Exception { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - } catch (IOException | NoSuchAlgorithmException ioe) { + } catch (IOException | NoSuchAlgorithmException | InterruptedException ioe) { ioe.printStackTrace(); } }); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 6ac6e3ed..37da4ebc 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -6,6 +6,7 @@ import static org.junit.jupiter.api.Assertions.fail; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -193,7 +194,7 @@ public void findObject_content() throws Exception { @Test public void findObject_pidNotFound() throws Exception { String pid = "dou.test.1"; - assertThrows(IOException.class, () -> { + assertThrows(FileNotFoundException.class, () -> { fileHashStore.findObject(pid); }); } From 823fece130d31a3a24dca617e3b9e6bf57cea42e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 18 Dec 2023 16:25:00 -0800 Subject: [PATCH 130/553] Update HashStore interface 'storeObject' javadoc and add new override method for 'storeObject' to be called to with only an InputStream --- .../java/org/dataone/hashstore/HashStore.java | 47 ++++++++++++------- .../filehashstore/FileHashStore.java | 6 ++- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 462331c2..6adb10c0 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -18,28 +18,35 @@ */ public interface HashStore { /** - * Atomically stores objects to HashStore using a given InputStream and a persistent - * identifier (pid). Upon successful storage, the method returns an 'ObjectInfo' object - * containing the object's file information, such as the id, file size, and hex digest map - * of algorithms and hex digests/checksums. An object is stored once and only once - and - * `storeObject` also enforces this rule by synchronizing multiple calls and rejecting calls - * to store duplicate objects. + * The `storeOject` method is responsible for the atomic storage of objects to disk using a + * given InputStream. Upon successful storage, the method returns a (ObjectInfo) object + * containing relevant file information, such as the file's id (which can be used to locate + * the object on disk), the file's size, and a hex digest dict of algorithms and checksums. + * Storing an object with `store_object` also tags an object (creating references) which + * allow the object to be discoverable. * - * The file's id is determined by calculating the SHA-256 hex digest of the provided pid, - * which is also used as the permanent address of the file. The file's identifier is then - * sharded using a depth of 3 and width of 2, delimited by '/' and concatenated to produce - * the final permanent address, which is stored in the object store directory (ex. - * `./[storePath]/objects/`). + * `storeObject` also ensures that an object is stored only once by synchronizing multiple + * calls and rejecting calls to store duplicate objects. Note, calling `storeObject` without + * a pid is a possibility, but should only store the object without tagging the object. It + * is then the caller's responsibility to finalize the process by calling `tagObject` after + * veriftying the correct object is stored. + * + * The file's id is determined by calculating the object's content identifier based on the + * store's default algorithm, which is also used as the permanent address of the file. The + * file's identifier is then sharded using the store's configured depth and width, delimited + * by '/' and concatenated to produce the final permanent address and is stored in the + * `./[storePath]/objects/` directory. * * By default, the hex digest map includes the following hash algorithms: MD5, SHA-1, - * SHA-256, SHA-384 and SHA-512, which are the most commonly used algorithms in dataset + * SHA-256, SHA-384, SHA-512 - which are the most commonly used algorithms in dataset * submissions to DataONE and the Arctic Data Center. If an additional algorithm is - * provided, the `storeObject` method checks if it is supported and adds it to the map along - * with its corresponding hex digest. An algorithm is considered "supported" if it is - * recognized as a valid hash algorithm in the `java.security.MessageDigest` class. + * provided, the `storeObject` method checks if it is supported and adds it to the hex + * digests dict along with its corresponding hex digest. An algorithm is considered + * "supported" if it is recognized as a valid hash algorithm in + * `java.security.MessageDigest` class. * - * Similarly, if a checksum and a checksumAlgorithm or an object size value is provided, - * `storeObject` validates the object to ensure it matches what is provided before moving + * Similarly, if a file size and/or checksum & checksumAlgorithm value are provided, + * `storeObject` validates the object to ensure it matches the given arguments before moving * the file to its permanent address. * * @param object Input stream to file @@ -63,6 +70,12 @@ ObjectInfo storeObject( ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException; + /** + * @see #storeObject(InputStream, String, String, String, String, long) + */ + ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, + PidObjectExistsException, RuntimeException, InterruptedException; + /** * @see #storeObject(InputStream, String, String, String, String, long) */ diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 357177c8..2c837515 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -537,7 +537,11 @@ private ObjectInfo syncPutObject( } } - // TODO: storeObject with just InputStream + @Override + public ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, + PidObjectExistsException, RuntimeException, InterruptedException { + return storeObject(object, null, null, null, null, -1); + } /** * Overload method for storeObject with an additionalAlgorithm From 20ed145c92d25b57ee07c2fc27a19b58c10105d1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 10:29:32 -0800 Subject: [PATCH 131/553] Remove unintended print statements --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2c837515..087a7b0f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -446,7 +446,6 @@ public ObjectInfo storeObject( validateAlgorithm(checksumAlgorithm); } if (objSize != -1) { - System.out.println("Checking not negative..."); FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); } @@ -540,6 +539,7 @@ private ObjectInfo syncPutObject( @Override public ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException { + // TODO return storeObject(object, null, null, null, null, -1); } @@ -1709,8 +1709,6 @@ protected void verifyHashStoreRefsFiles( try { String cidRead = new String(Files.readAllBytes(absPathPidRefsPath)); if (!cidRead.equals(cid)) { - System.out.println(cidRead); - System.out.println(cid); String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " + cidRead + " found in pid refs file: " + absPathPidRefsPath + ". Expected cid: " + cid; From a7c53fb78344b3410d8fdc1f62bc705d6ba9d463 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 11:59:09 -0800 Subject: [PATCH 132/553] Refactor 'putObject' to remove input validation, which is already done by calling method 'storeObject' --- .../filehashstore/FileHashStore.java | 4 -- .../FileHashStoreProtectedTest.java | 43 ------------------- 2 files changed, 47 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 087a7b0f..a83387bf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1099,10 +1099,6 @@ protected ObjectInfo putObject( AtomicMoveNotSupportedException { logFileHashStore.debug("FileHashStore.putObject - Called to put object for pid: " + pid); - // Begin input validation - FileHashStoreUtility.ensureNotNull(object, "object", "putObject"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "putObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "putObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForEmptyString( diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 437d7b72..aede8db6 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -476,49 +476,6 @@ public void putObject_emptyAlgorithm() { }); } - /** - * Verify putObject throws exception when pid is empty - */ - @Test - public void putObject_emptyPid() { - assertThrows(IllegalArgumentException.class, () -> { - // Get test file to "upload" - String pidEmpty = ""; - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); - - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pidEmpty, null, null, null, -1); - }); - } - - /** - * Verify putObject throws exception when pid is null - */ - @Test - public void putObject_nullPid() { - assertThrows(IllegalArgumentException.class, () -> { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); - - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, null, "MD2", null, null, -1); - }); - } - - /** - * Verify putObject throws exception object is null - */ - @Test - public void putObject_nullObject() { - assertThrows(IllegalArgumentException.class, () -> { - // Get test file to "upload" - String pid = "jtao.1700.1"; - fileHashStore.putObject(null, pid, "MD2", null, null, -1); - }); - } - /** * Check default checksums are generated */ From 252aa2f0c2e5f5f4206de5d5ee8070340412eb9a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 13:16:42 -0800 Subject: [PATCH 133/553] Implement 'storeObject' method with just an InputStream and add/refactor junit tests --- .../filehashstore/FileHashStore.java | 16 ++++- .../FileHashStoreInterfaceTest.java | 71 +++++++++++++------ 2 files changed, 64 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a83387bf..eb99d5ef 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -536,11 +536,23 @@ private ObjectInfo syncPutObject( } } + /** + * Overload method for storeObject with just an InputStream + */ @Override public ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException { - // TODO - return storeObject(object, null, null, null, null, -1); + // 'putObject' is called directly to bypass the pid synchronization implemented to + // efficiently handle duplicate object store requests. Since there is no pid, calling + // 'storeObject' would unintentionally create a bottleneck for all requests without a + // pid (they would be executed sequentially). This scenario occurs when metadata about + // the object (ex. form data including the pid, checksum, checksum algorithm, etc.) is + // unavailable. + // + // Note: This method does not tag the object to make it discoverable, so the client must + // call 'tagObject' and 'verifyObject' separately to ensure that the object stored + // is discoverable and is what is expected. + return putObject(object, "HashStoreNoPid", null, null, null, -1); } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 0fb0e9bc..8069a6f7 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -83,7 +83,7 @@ public void initializeFileHashStore() { /** * Utility method to get absolute path of a given object */ - public Path getObjectAbsPath(String id) { + public Path getObjectAbsPath(String id, String entity) { int shardDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int shardWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // Get relative path @@ -93,7 +93,8 @@ public Path getObjectAbsPath(String id) { // Get absolute path Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - return storePath.resolve("objects/" + objCidShardString); + // return storePath.resolve("objects/" + objCidShardString); + return storePath.resolve(entity).resolve(objCidShardString); } /** @@ -277,7 +278,35 @@ public void storeObject_objSize_overload() throws Exception { } /** - * Verify that storeObject stores an object with a good checksum value + * Check that store object executes as expected with only an InputStream (does not create + * any reference files) + */ + @Test + public void storeObject_inputStream_overload() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + + Map hexDigests = objInfo.getHexDigests(); + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + String cid = objInfo.getId(); + + assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); + + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.findObject(pid); + }); + + Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + assertFalse(Files.exists(cidRefsFilePath)); + } + } + + /** + * Verify that storeObject returns the expected checksum value */ @Test public void storeObject_validateChecksumValue() throws Exception { @@ -293,7 +322,7 @@ public void storeObject_validateChecksumValue() throws Exception { ); String objCid = address.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } @@ -472,7 +501,7 @@ public void storeObject_largeSparseFile() throws Exception { ); String objCid = sparseFileObjInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } @@ -553,8 +582,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -570,8 +599,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -587,8 +616,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -604,8 +633,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -621,8 +650,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -666,8 +695,8 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -683,8 +712,8 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { dataStream, pid, null, null, null, 0 ); if (objInfo != null) { - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -1238,8 +1267,8 @@ public void deleteObject() throws Exception { fileHashStore.deleteObject(pid); // Check that file doesn't exist - String objId = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objId); + String objCid = objInfo.getId(); + Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); assertFalse(Files.exists(objCidAbsPath)); // Check that parent directories are not deleted From 3b9b02886411f0832d3dd65d604bf171d05f2351 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 14:37:05 -0800 Subject: [PATCH 134/553] Refactor 'getRealPath' and update all affected code and junit tests --- .../filehashstore/FileHashStore.java | 41 +++++++++----- .../FileHashStoreInterfaceTest.java | 52 +++++------------- .../FileHashStoreReferencesTest.java | 55 ++++++------------- 3 files changed, 58 insertions(+), 90 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index eb99d5ef..b9931e7c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -629,11 +629,8 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } try { - String pidRefId = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidShardString = getHierarchicalPathString(3, 2, pidRefId); - String cidShardString = getHierarchicalPathString(3, 2, cid); - Path absPathPidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); - Path absPathCidRefsPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); + Path absPathPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absPathCidRefsPath = getRealPath(cid, "refs", "cid"); // Check that pid refs file doesn't exist yet if (Files.exists(absPathPidRefsPath)) { @@ -713,9 +710,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); // Get path of the pid references file - String pidRefId = getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidShardString = getHierarchicalPathString(3, 2, pidRefId); - Path absPathPidRefsPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); + Path absPathPidRefsPath = getRealPath(pid, "refs", "pid"); if (Files.exists(absPathPidRefsPath)) { String cidFromPidRefsFile = new String(Files.readAllBytes(absPathPidRefsPath)); @@ -1866,31 +1861,51 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea /** * Get the absolute path of a HashStore object or metadata file * - * @param pid Authority-based identifier + * @param abId Authority-based, persistent or content idenfitier * @param entity "object" or "metadata" - * @param formatId Metadata namespace + * @param formatId Metadata namespace or reference type (pid/cid) * @return Actual path to object * @throws IllegalArgumentException If entity is not object or metadata * @throws NoSuchAlgorithmException If store algorithm is not supported * @throws IOException If unable to retrieve cid */ - protected Path getRealPath(String pid, String entity, String formatId) + protected Path getRealPath(String abId, String entity, String formatId) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; if (entity.equalsIgnoreCase("object")) { - String objectCid = findObject(pid); + // 'abId' is expected to be a pid + String objectCid = findObject(abId); String objShardString = getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); } else if (entity.equalsIgnoreCase("metadata")) { - String objectCid = getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); + String objectCid = getPidHexDigest(abId + formatId, OBJECT_STORE_ALGORITHM); String objShardString = getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); realPath = METADATA_STORE_DIRECTORY.resolve(objShardString); + } else if (entity.equalsIgnoreCase("refs")) { + if (formatId.equalsIgnoreCase("pid")) { + String pidRefId = getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); + String pidShardString = getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidRefId + ); + realPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); + } else if (formatId.equalsIgnoreCase("cid")) { + String cidShardString = getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId + ); + realPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); + } else { + String errMsg = + "FileHashStore.getRealPath - formatId must be 'pid' or 'cid' when entity is 'refs'."; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + } else { throw new IllegalArgumentException( "FileHashStore.getRealPath - entity must be 'object' or 'metadata'" diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 8069a6f7..2cf597be 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -80,23 +80,6 @@ public void initializeFileHashStore() { @TempDir public Path tempFolder; - /** - * Utility method to get absolute path of a given object - */ - public Path getObjectAbsPath(String id, String entity) { - int shardDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int shardWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // Get relative path - String objCidShardString = fileHashStore.getHierarchicalPathString( - shardDepth, shardWidth, id - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - - // return storePath.resolve("objects/" + objCidShardString); - return storePath.resolve(entity).resolve(objCidShardString); - } - /** * Check that store object returns the correct ObjectInfo id */ @@ -300,7 +283,7 @@ public void storeObject_inputStream_overload() throws Exception { fileHashStore.findObject(pid); }); - Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); assertFalse(Files.exists(cidRefsFilePath)); } } @@ -317,12 +300,9 @@ public void storeObject_validateChecksumValue() throws Exception { String checksumCorrect = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo address = fileHashStore.storeObject( - dataStream, pid, null, checksumCorrect, "SHA-256", -1 - ); + fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); - String objCid = address.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } @@ -496,12 +476,9 @@ public void storeObject_largeSparseFile() throws Exception { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; - ObjectInfo sparseFileObjInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String objCid = sparseFileObjInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } @@ -583,7 +560,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -600,7 +577,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -617,7 +594,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -634,7 +611,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -651,7 +628,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -696,7 +673,7 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -713,7 +690,7 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { ); if (objInfo != null) { String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } } catch (Exception e) { @@ -1262,13 +1239,12 @@ public void deleteObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); fileHashStore.deleteObject(pid); // Check that file doesn't exist - String objCid = objInfo.getId(); - Path objCidAbsPath = getObjectAbsPath(objCid, "objects"); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); assertFalse(Files.exists(objCidAbsPath)); // Check that parent directories are not deleted diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 37da4ebc..61ac12b6 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -10,7 +10,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; import java.util.List; import java.util.Properties; @@ -26,7 +25,6 @@ */ public class FileHashStoreReferencesTest { private FileHashStore fileHashStore; - private Properties fhsProperties; private Path rootDirectory; /** @@ -46,7 +44,6 @@ public void initializeFileHashStore() { ); try { - fhsProperties = storeProperties; fileHashStore = new FileHashStore(storeProperties); } catch (IOException ioe) { @@ -64,22 +61,6 @@ public void initializeFileHashStore() { @TempDir public Path tempFolder; - /** - * Utility method to get absolute path of a given object - */ - public Path getObjectAbsPath(String id, String entity) { - int shardDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int shardWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // Get relative path - String objCidShardString = fileHashStore.getHierarchicalPathString( - shardDepth, shardWidth, id - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - - return storePath.resolve(entity).resolve(objCidShardString); - } - /** * Check that tagObject writes expected pid refs files */ @@ -89,10 +70,7 @@ public void tagObject_pidRefsFile() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - String pidAddress = fileHashStore.getPidHexDigest( - pid, fhsProperties.getProperty("storeAlgorithm") - ); - Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); + Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); assertTrue(Files.exists(pidRefsFilePath)); } @@ -105,7 +83,7 @@ public void tagObject_cidRefsFile() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(cidRefsFilePath)); } @@ -136,15 +114,12 @@ public void tagObject_cidRefsFileExists() throws Exception { String pidAdditional = "another.pid.2"; fileHashStore.tagObject(pidAdditional, cid); - String pidAddress = fileHashStore.getPidHexDigest( - pid, fhsProperties.getProperty("storeAlgorithm") - ); - Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); + Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); assertTrue(Files.exists(pidRefsFilePath)); // Check cid refs file - Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); List lines = Files.readAllLines(cidRefsFilePath); boolean pidFoundInCidRefFiles = false; for (String line : lines) { @@ -166,7 +141,7 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pid); - Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); assertThrows(PidExistsInCidRefsFileException.class, () -> { @@ -238,7 +213,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file - Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); assertThrows(IOException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); @@ -260,10 +235,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); // Get path of the pid refs file - String pidAddress = fileHashStore.getPidHexDigest( - pid, fhsProperties.getProperty("storeAlgorithm") - ); - Path pidRefsFilePath = getObjectAbsPath(pidAddress, "refs/pid"); + Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); assertThrows(IOException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); @@ -280,18 +252,23 @@ public void updateCidRefsFiles_content() throws Exception { fileHashStore.tagObject(pid, cid); // Get path of the cid refs file - Path cidRefsFilePath = getObjectAbsPath(cid, "refs/cid"); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); String pidAdditional = "dou.test.2"; fileHashStore.updateCidRefsFiles("dou.test.2", cidRefsFilePath); List lines = Files.readAllLines(cidRefsFilePath); - boolean pidFoundInCidRefFiles = false; + boolean pidOriginal_foundInCidRefFiles = false; + boolean pidAdditional_foundInCidRefFiles = false; for (String line : lines) { if (line.equals(pidAdditional)) { - pidFoundInCidRefFiles = true; + pidAdditional_foundInCidRefFiles = true; + } + if (line.equals(pidAdditional)) { + pidOriginal_foundInCidRefFiles = true; } } - assertTrue(pidFoundInCidRefFiles); + assertTrue(pidOriginal_foundInCidRefFiles); + assertTrue(pidAdditional_foundInCidRefFiles); } } From cee2aab0e9c15a468b47dc4a687b83c2c24ae6aa Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 15:25:15 -0800 Subject: [PATCH 135/553] Add new method 'deletePidRefsFile' and new junit tests --- .../filehashstore/FileHashStore.java | 35 +++++++++++++++++-- .../FileHashStoreReferencesTest.java | 28 +++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b9931e7c..705e4691 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -18,6 +18,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -1667,7 +1668,7 @@ protected File writePidRefsFile(String cid) throws IOException { * @return True if cid is found, false otherwise * @throws IOException If unable to read the cid refs file. */ - private boolean isPidInCidRefsFile(String pid, Path absPathCidRefsPath) throws IOException { + protected boolean isPidInCidRefsFile(String pid, Path absPathCidRefsPath) throws IOException { List lines = Files.readAllLines(absPathCidRefsPath); boolean pidFoundInCidRefFiles = false; for (String line : lines) { @@ -1764,7 +1765,35 @@ protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) throws IO } } - // TODO: Implement delete methods for pid and cid refs files + /** + * Deletes a pid references file + * + * @param pid Authority-based or persistent identifier + * @throws NoSuchAlgorithmException Incompatible algorithm used to find pid refs file + * @throws IOException Unable to delete object or open pid refs file + */ + protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IOException { + FileHashStoreUtility.ensureNotNull(pid, "pid", "deletePidRefsFile"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "deletePidRefsFile"); + + Path absPathPidRefsPath = getRealPath(pid, "refs", "pid"); + + // Check to see if pid refs file exists + if (!Files.exists(absPathPidRefsPath)) { + String errMsg = + "FileHashStore.deletePidRefsFile - File refs file does not exist for pid: " + pid + + " with address" + absPathPidRefsPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } else { + // Proceed to delete + Files.delete(absPathPidRefsPath); + logFileHashStore.debug( + "FileHashStore.deletePidRefsFile - Pid refs file deleted for: " + pid + + " with address: " + absPathPidRefsPath + ); + } + } /** * Takes a given input stream and writes it to its permanent address on disk based on the @@ -1901,7 +1930,7 @@ protected Path getRealPath(String abId, String entity, String formatId) realPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); } else { String errMsg = - "FileHashStore.getRealPath - formatId must be 'pid' or 'cid' when entity is 'refs'."; + "FileHashStore.getRealPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 61ac12b6..cd461640 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -1,6 +1,7 @@ package org.dataone.hashstore.filehashstore; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -271,4 +272,31 @@ public void updateCidRefsFiles_content() throws Exception { assertTrue(pidOriginal_foundInCidRefFiles); assertTrue(pidAdditional_foundInCidRefFiles); } + + /** + * Check that deletePidRefsFile deletes file + */ + @Test + public void deletePidRefsFile_fileDeleted() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + fileHashStore.deletePidRefsFile(pid); + + Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + assertFalse(Files.exists(pidRefsFilePath)); + } + + /** + * Check that deletePidRefsFile throws exception when there is no file to delete + */ + @Test + public void deletePidRefsFile_missingPidRefsFile() throws Exception { + String pid = "dou.test.1"; + + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.deletePidRefsFile(pid); + }); + } } From fd0ef31934decd8c319ca50261cfe2fe979f73c1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 15:38:11 -0800 Subject: [PATCH 136/553] Add new methods 'deleteCidRefsPid' and 'deleteCidRefsFile' and add new junit tests --- .../filehashstore/FileHashStore.java | 93 +++++++++++++++++++ .../FileHashStoreReferencesTest.java | 81 ++++++++++++++++ 2 files changed, 174 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 705e4691..9c2938c1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1795,6 +1795,99 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO } } + + /** + * Removes a pid from a cid refs file. + * + * @param pid Authority-based or persistent identifier. + * @param cid Content identifier + * @throws NoSuchAlgorithmException Incompatible algorithm used to find pid refs file + * @throws IOException Unable to delete object or open pid refs file + */ + protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmException, + IOException { + FileHashStoreUtility.ensureNotNull(cid, "pid", "deleteCidRefsPid"); + FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsPid"); + + Path absPathCidRefsPath = getRealPath(cid, "refs", "cid"); + + // Check to see if cid refs file exists + if (!Files.exists(absPathCidRefsPath)) { + String errMsg = + "FileHashStore.deleteCidRefsPid - Cid refs file does not exist for cid: " + cid + + " with address" + absPathCidRefsPath; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + + } else { + if (isPidInCidRefsFile(pid, absPathCidRefsPath)) { + try { + List lines = new ArrayList<>(Files.readAllLines(absPathCidRefsPath)); + lines.remove(pid); + Files.write( + absPathCidRefsPath, lines, StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING + ); + logFileHashStore.debug( + "FileHashStore.deleteCidRefsPid - Pid: " + pid + + " removed from cid refs file: " + absPathCidRefsPath + ); + + } catch (IOException ioe) { + String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid + + "from cid refs file: " + absPathCidRefsPath; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + + } else { + String errMsg = "FileHashStore.deleteCidRefsPid - pid: " + pid + + " not found in cid refs file: " + absPathCidRefsPath; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + } + } + + + /** + * Deletes a cid refs file if it is empty. + * + * @param cid Content identifier + * @throws IOException Unable to delete object cid refs file + */ + protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IOException { + FileHashStoreUtility.ensureNotNull(cid, "pid", "deleteCidRefsFile"); + FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsFile"); + + Path absPathCidRefsPath = getRealPath(cid, "refs", "cid"); + + // Check to see if cid refs file exists + if (!Files.exists(absPathCidRefsPath)) { + String errMsg = + "FileHashStore.deleteCidRefsFile - Cid refs file does not exist for cid: " + cid + + " with address" + absPathCidRefsPath; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + + } else { + // A cid refs file is only deleted if it is empty. Client must removed pids first + if (Files.size(absPathCidRefsPath) == 0) { + Files.delete(absPathCidRefsPath); + logFileHashStore.debug( + "FileHashStore.deleteCidRefsFile - Deleted cid refs file: " + absPathCidRefsPath + ); + + } else { + String errMsg = + "FileHashStore.deleteCidRefsFile - Unable to delete cid refs file, it is not empty: " + + absPathCidRefsPath; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + } + } + /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index cd461640..7c589802 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -299,4 +299,85 @@ public void deletePidRefsFile_missingPidRefsFile() throws Exception { fileHashStore.deletePidRefsFile(pid); }); } + + /** + * Check that deleteCidRefsPid deletes pid from its cid refs file + */ + @Test + public void deleteCidRefsPid_pidRemoved() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + String pidAdditional = "dou.test.2"; + fileHashStore.tagObject(pidAdditional, cid); + + fileHashStore.deleteCidRefsPid(pid, cid); + + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertFalse(fileHashStore.isPidInCidRefsFile(pid, cidRefsFilePath)); + } + + /** + * Check that deleteCidRefsPid throws exception when there is no file to delete the pid from + */ + @Test + public void deleteCidRefsPid_missingCidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abc123456789"; + + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.deleteCidRefsPid(pid, cid); + }); + } + + /** + * Check that deleteCidRefsPid throws exception when there is no file to delete the pid from + */ + @Test + public void deleteCidRefsPid_pidNotFoundInCidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.deleteCidRefsPid("bad.pid", cid); + }); + } + + /** + * Check that deleteCidRefsFile deletes a file when it is empty. + */ + @Test + public void deleteCidRefsFile_fileDeleted() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + fileHashStore.deleteCidRefsPid(pid, cid); + fileHashStore.deleteCidRefsFile(cid); + + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertFalse(Files.exists(cidRefsFilePath)); + } + + /** + * Check that deleteCidRefsFile throws exception when cid refs file is not empty. + */ + @Test + public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + String pidAdditional = "dou.test.2"; + fileHashStore.tagObject(pidAdditional, cid); + + fileHashStore.deleteCidRefsPid(pid, cid); + + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.deleteCidRefsFile(cid); + }); + + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertTrue(Files.exists(cidRefsFilePath)); + } } From 053457de0311da1b9f39a4b97741cf3ab06e928b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 15:48:39 -0800 Subject: [PATCH 137/553] Clean up code, revise logging levels and statements --- .../filehashstore/FileHashStore.java | 27 +++++++++++-------- .../FileHashStoreReferencesTest.java | 4 +-- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9c2938c1..cac05e55 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1652,11 +1652,11 @@ protected File writePidRefsFile(String cid) throws IOException { return pidRefsTmpFile; } catch (IOException ioe) { - logFileHashStore.error( + String errMsg = "FileHashStore.writePidRefsFile - Unable to write pid refs file for cid: " + cid - + " IOException: " + ioe.getMessage() - ); - throw ioe; + + " IOException: " + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); } } @@ -1718,14 +1718,15 @@ protected void verifyHashStoreRefsFiles( + ". Expected cid: " + cid; logFileHashStore.error(errMsg); throw new IOException(errMsg); + } - // This will strip new line characters boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absPathCidRefsPath); if (!pidFoundInCidRefFiles) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + pid + " in cid refs file: " + absPathCidRefsPath; logFileHashStore.error(errMsg); throw new IOException(errMsg); + } } catch (IOException ioe) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); @@ -1754,7 +1755,10 @@ protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) throws IO writer.write(pid + "\n"); writer.close(); } - + logFileHashStore.debug( + "FileHashStore.updateCidRefsFiles - Pid: " + pid + + " has been added to cid refs file: " + absPathCidRefsFile + ); } // The lock is automatically released when the try block exits @@ -1783,8 +1787,9 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO String errMsg = "FileHashStore.deletePidRefsFile - File refs file does not exist for pid: " + pid + " with address" + absPathPidRefsPath; - logFileHashStore.warn(errMsg); + logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); + } else { // Proceed to delete Files.delete(absPathPidRefsPath); @@ -1801,8 +1806,7 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO * * @param pid Authority-based or persistent identifier. * @param cid Content identifier - * @throws NoSuchAlgorithmException Incompatible algorithm used to find pid refs file - * @throws IOException Unable to delete object or open pid refs file + * @throws IOException Unable to access cid refs file */ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmException, IOException { @@ -1835,7 +1839,8 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx } catch (IOException ioe) { String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid - + "from cid refs file: " + absPathCidRefsPath; + + "from cid refs file: " + absPathCidRefsPath + ". Additional Info: " + ioe + .getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -1871,7 +1876,7 @@ protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IO throw new FileNotFoundException(errMsg); } else { - // A cid refs file is only deleted if it is empty. Client must removed pids first + // A cid refs file is only deleted if it is empty. Client must removed pid(s) first if (Files.size(absPathCidRefsPath) == 0) { Files.delete(absPathCidRefsPath); logFileHashStore.debug( diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 7c589802..3905e942 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -345,7 +345,7 @@ public void deleteCidRefsPid_pidNotFoundInCidRefsFile() throws Exception { } /** - * Check that deleteCidRefsFile deletes a file when it is empty. + * Check that deleteCidRefsFile deletes a file when it is empty */ @Test public void deleteCidRefsFile_fileDeleted() throws Exception { @@ -361,7 +361,7 @@ public void deleteCidRefsFile_fileDeleted() throws Exception { } /** - * Check that deleteCidRefsFile throws exception when cid refs file is not empty. + * Check that deleteCidRefsFile throws exception when cid refs file is not empty */ @Test public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { From 9c880a9f6661ea9400568576f9c878a0c430e75a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 16:14:19 -0800 Subject: [PATCH 138/553] Refactor 'deleteObject' to also remove the relevant reference files --- .../filehashstore/FileHashStore.java | 13 ++++++++-- .../FileHashStoreInterfaceTest.java | 24 ++++++++++++++++++- .../FileHashStoreReferencesTest.java | 23 ++---------------- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index cac05e55..bc36dd6c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -981,6 +981,12 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou // Proceed to delete Files.delete(objRealPath); + // Remove pid from the cid refs file + String cid = findObject(pid); + deleteCidRefsPid(pid, cid); + // Delete pid reference file + deletePidRefsFile(pid); + logFileHashStore.info( "FileHashStore.deleteObject - File deleted for: " + pid + " with object address: " + objRealPath @@ -1844,6 +1850,10 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx logFileHashStore.error(errMsg); throw new IOException(errMsg); } + // Perform clean up on cid refs file - if it is empty, delete it + if (Files.size(absPathCidRefsPath) == 0) { + Files.delete(absPathCidRefsPath); + } } else { String errMsg = "FileHashStore.deleteCidRefsPid - pid: " + pid @@ -1887,8 +1897,7 @@ protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IO String errMsg = "FileHashStore.deleteCidRefsFile - Unable to delete cid refs file, it is not empty: " + absPathCidRefsPath; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); + logFileHashStore.warn(errMsg); } } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 2cf597be..c55e9168 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1241,10 +1241,10 @@ public void deleteObject() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); fileHashStore.deleteObject(pid); // Check that file doesn't exist - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); assertFalse(Files.exists(objCidAbsPath)); // Check that parent directories are not deleted @@ -1257,6 +1257,28 @@ public void deleteObject() throws Exception { } } + /** + * Confirm that deleteObject deletes reference files + */ + @Test + public void deleteObject_referencesDeleted() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.getId(); + + // Path objAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + fileHashStore.deleteObject(pid); + assertFalse(Files.exists(absPathPidRefsPath)); + assertFalse(Files.exists(absPathCidRefsPath)); + } + } + /** * Confirm that deleteObject throws exception when associated pid obj not found */ diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 3905e942..6bf5f91c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -345,23 +345,7 @@ public void deleteCidRefsPid_pidNotFoundInCidRefsFile() throws Exception { } /** - * Check that deleteCidRefsFile deletes a file when it is empty - */ - @Test - public void deleteCidRefsFile_fileDeleted() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - fileHashStore.deleteCidRefsPid(pid, cid); - fileHashStore.deleteCidRefsFile(cid); - - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertFalse(Files.exists(cidRefsFilePath)); - } - - /** - * Check that deleteCidRefsFile throws exception when cid refs file is not empty + * Check that deleteCidRefsFile still exists if called and cid refs file is not empty */ @Test public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { @@ -372,10 +356,7 @@ public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { fileHashStore.tagObject(pidAdditional, cid); fileHashStore.deleteCidRefsPid(pid, cid); - - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.deleteCidRefsFile(cid); - }); + fileHashStore.deleteCidRefsFile(cid); Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(cidRefsFilePath)); From 3a484b4c1e97d20f453613f61f630d75746064a0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 19 Dec 2023 16:19:32 -0800 Subject: [PATCH 139/553] Fix redundant variable names and add missing logging statement to 'findObject' --- .../filehashstore/FileHashStore.java | 125 +++++++++--------- 1 file changed, 64 insertions(+), 61 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bc36dd6c..105cf228 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -630,35 +630,35 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } try { - Path absPathPidRefsPath = getRealPath(pid, "refs", "pid"); - Path absPathCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); // Check that pid refs file doesn't exist yet - if (Files.exists(absPathPidRefsPath)) { + if (Files.exists(absPidRefsPath)) { String errMsg = "FileHashStore.tagObject - pid refs file already exists for pid: " + pid + ". A pid can only reference one cid."; logFileHashStore.error(errMsg); throw new PidRefsFileExistsException(errMsg); - } else if (Files.exists(absPathCidRefsPath)) { + } else if (Files.exists(absCidRefsPath)) { // Ensure that the pid is not already found in the file - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absPathCidRefsPath); + boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); if (pidFoundInCidRefFiles) { String errMsg = "FileHashStore.tagObject - cid refs file already contains pid: " + pid + ". Refs file not created for both the given pid. Cid refs file (" - + absPathCidRefsPath + ") has not been updated."; + + absCidRefsPath + ") has not been updated."; logFileHashStore.error(errMsg); throw new PidExistsInCidRefsFileException(errMsg); } // Write pid refs file to tmp file File pidRefsTmpFile = writePidRefsFile(cid); - File absPathPidRefsFile = absPathPidRefsPath.toFile(); + File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); // Now update cid refs file - updateCidRefsFiles(pid, absPathCidRefsPath); + updateCidRefsFiles(pid, absCidRefsPath); // Verify tagging process, this throws exceptions if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid @@ -671,12 +671,12 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile File pidRefsTmpFile = writePidRefsFile(cid); File cidRefsTmpFile = writeCidRefsFile(pid); // Move refs files to permanent location - File absPathPidRefsFile = absPathPidRefsPath.toFile(); - File absPathCidRefsFile = absPathCidRefsPath.toFile(); + File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); move(cidRefsTmpFile, absPathCidRefsFile, "refs"); // Verify tagging process, this throws exceptions if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPathPidRefsPath, absPathCidRefsPath); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid @@ -711,15 +711,18 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); // Get path of the pid references file - Path absPathPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - if (Files.exists(absPathPidRefsPath)) { - String cidFromPidRefsFile = new String(Files.readAllBytes(absPathPidRefsPath)); + if (Files.exists(absPidRefsPath)) { + String cidFromPidRefsFile = new String(Files.readAllBytes(absPidRefsPath)); + logFileHashStore.info( + "FileHashStore.findObject - Cid (" + cidFromPidRefsFile + ") found for pid:" + pid + ); return cidFromPidRefsFile; } else { String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid - + ". Pid refs file does not exist at: " + absPathPidRefsPath; + + ". Pid refs file does not exist at: " + absPidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } @@ -1669,13 +1672,13 @@ protected File writePidRefsFile(String cid) throws IOException { /** * Checks a given cid refs file for a pid. * - * @param pid Authority-based or persistent identifier to search - * @param absPathCidRefsPath Path to the cid refs file to check + * @param pid Authority-based or persistent identifier to search + * @param absCidRefsPath Path to the cid refs file to check * @return True if cid is found, false otherwise * @throws IOException If unable to read the cid refs file. */ - protected boolean isPidInCidRefsFile(String pid, Path absPathCidRefsPath) throws IOException { - List lines = Files.readAllLines(absPathCidRefsPath); + protected boolean isPidInCidRefsFile(String pid, Path absCidRefsPath) throws IOException { + List lines = Files.readAllLines(absCidRefsPath); boolean pidFoundInCidRefFiles = false; for (String line : lines) { if (line.equals(pid)) { @@ -1691,45 +1694,45 @@ protected boolean isPidInCidRefsFile(String pid, Path absPathCidRefsPath) throws * Verifies that the reference files for the given pid and cid exist and contain * the expected values. * - * @param pid Authority-based or persistent identifier - * @param cid Content identifier - * @param absPathPidRefsPath Path to where the pid refs file exists - * @param absPathCidRefsPath Path to where the cid refs file exists + * @param pid Authority-based or persistent identifier + * @param cid Content identifier + * @param absPidRefsPath Path to where the pid refs file exists + * @param absCidRefsPath Path to where the cid refs file exists * @throws FileNotFoundException Any refs files are missing * @throws IOException Unable to read any of the refs files or if the refs content * is not what is expected */ protected void verifyHashStoreRefsFiles( - String pid, String cid, Path absPathPidRefsPath, Path absPathCidRefsPath + String pid, String cid, Path absPidRefsPath, Path absCidRefsPath ) throws FileNotFoundException, IOException { // First check that the files exist - if (!Files.exists(absPathCidRefsPath)) { + if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " - + absPathCidRefsPath + " for pid: " + pid; + + absCidRefsPath + " for pid: " + pid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } - if (!Files.exists(absPathPidRefsPath)) { + if (!Files.exists(absPidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - pid refs file is missing: " - + absPathPidRefsPath + " for cid: " + cid; + + absPidRefsPath + " for cid: " + cid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } // Now verify the content try { - String cidRead = new String(Files.readAllBytes(absPathPidRefsPath)); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); if (!cidRead.equals(cid)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " - + cidRead + " found in pid refs file: " + absPathPidRefsPath - + ". Expected cid: " + cid; + + cidRead + " found in pid refs file: " + absPidRefsPath + ". Expected cid: " + + cid; logFileHashStore.error(errMsg); throw new IOException(errMsg); } - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absPathCidRefsPath); + boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " - + pid + " in cid refs file: " + absPathCidRefsPath; + + pid + " in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); throw new IOException(errMsg); @@ -1744,12 +1747,12 @@ protected void verifyHashStoreRefsFiles( /** * Updates a cid refs file with a pid that references the cid * - * @param pid Authority-based or persistent identifier - * @param absPathCidRefsPath Path to the cid refs file to update + * @param pid Authority-based or persistent identifier + * @param absCidRefsPath Path to the cid refs file to update * @throws IOException Issue with updating a cid refs file */ - protected void updateCidRefsFiles(String pid, Path absPathCidRefsPath) throws IOException { - File absPathCidRefsFile = absPathCidRefsPath.toFile(); + protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOException { + File absPathCidRefsFile = absCidRefsPath.toFile(); try { // Obtain a lock on the file before updating it try (RandomAccessFile raf = new RandomAccessFile(absPathCidRefsFile, "rw"); @@ -1786,22 +1789,22 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO FileHashStoreUtility.ensureNotNull(pid, "pid", "deletePidRefsFile"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deletePidRefsFile"); - Path absPathPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absPidRefsPath = getRealPath(pid, "refs", "pid"); // Check to see if pid refs file exists - if (!Files.exists(absPathPidRefsPath)) { + if (!Files.exists(absPidRefsPath)) { String errMsg = "FileHashStore.deletePidRefsFile - File refs file does not exist for pid: " + pid - + " with address" + absPathPidRefsPath; + + " with address" + absPidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } else { // Proceed to delete - Files.delete(absPathPidRefsPath); + Files.delete(absPidRefsPath); logFileHashStore.debug( "FileHashStore.deletePidRefsFile - Pid refs file deleted for: " + pid - + " with address: " + absPathPidRefsPath + + " with address: " + absPidRefsPath ); } } @@ -1819,45 +1822,45 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx FileHashStoreUtility.ensureNotNull(cid, "pid", "deleteCidRefsPid"); FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsPid"); - Path absPathCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); // Check to see if cid refs file exists - if (!Files.exists(absPathCidRefsPath)) { + if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.deleteCidRefsPid - Cid refs file does not exist for cid: " + cid - + " with address" + absPathCidRefsPath; + + " with address" + absCidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } else { - if (isPidInCidRefsFile(pid, absPathCidRefsPath)) { + if (isPidInCidRefsFile(pid, absCidRefsPath)) { try { - List lines = new ArrayList<>(Files.readAllLines(absPathCidRefsPath)); + List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.remove(pid); Files.write( - absPathCidRefsPath, lines, StandardOpenOption.WRITE, + absCidRefsPath, lines, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING ); logFileHashStore.debug( "FileHashStore.deleteCidRefsPid - Pid: " + pid - + " removed from cid refs file: " + absPathCidRefsPath + + " removed from cid refs file: " + absCidRefsPath ); } catch (IOException ioe) { String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid - + "from cid refs file: " + absPathCidRefsPath + ". Additional Info: " + ioe + + "from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe .getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } // Perform clean up on cid refs file - if it is empty, delete it - if (Files.size(absPathCidRefsPath) == 0) { - Files.delete(absPathCidRefsPath); + if (Files.size(absCidRefsPath) == 0) { + Files.delete(absCidRefsPath); } } else { String errMsg = "FileHashStore.deleteCidRefsPid - pid: " + pid - + " not found in cid refs file: " + absPathCidRefsPath; + + " not found in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } @@ -1875,28 +1878,28 @@ protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IO FileHashStoreUtility.ensureNotNull(cid, "pid", "deleteCidRefsFile"); FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsFile"); - Path absPathCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); // Check to see if cid refs file exists - if (!Files.exists(absPathCidRefsPath)) { + if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.deleteCidRefsFile - Cid refs file does not exist for cid: " + cid - + " with address" + absPathCidRefsPath; + + " with address" + absCidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } else { // A cid refs file is only deleted if it is empty. Client must removed pid(s) first - if (Files.size(absPathCidRefsPath) == 0) { - Files.delete(absPathCidRefsPath); + if (Files.size(absCidRefsPath) == 0) { + Files.delete(absCidRefsPath); logFileHashStore.debug( - "FileHashStore.deleteCidRefsFile - Deleted cid refs file: " + absPathCidRefsPath + "FileHashStore.deleteCidRefsFile - Deleted cid refs file: " + absCidRefsPath ); } else { String errMsg = "FileHashStore.deleteCidRefsFile - Unable to delete cid refs file, it is not empty: " - + absPathCidRefsPath; + + absCidRefsPath; logFileHashStore.warn(errMsg); } } From 5cb2815312b5716ad3debcec67d1d3fe62354b77 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 11:54:02 -0800 Subject: [PATCH 140/553] Implement 'verifyObject' method, refactor 'validateTmpObject' and update junit tests --- .../java/org/dataone/hashstore/HashStore.java | 10 ++- .../filehashstore/FileHashStore.java | 65 ++++++++++++------ .../FileHashStoreReferencesTest.java | 67 +++++++++++++++++++ 3 files changed, 120 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 6adb10c0..304c825c 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -120,16 +120,22 @@ boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsE InterruptedException; /** - * Confirms that an object_metadata's content is equal to the given values. + * Confirms that an ObjectInfo's content is equal to the given values. If it is not equal, + * it will delete the object referenced by the ObjectInfo object. * * @param objectInfo ObjectInfo object with values * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing + * @throws IOException An issue with deleting the object when there is a + * mismatch + * @throws NoSuchAlgorithmException If checksum algorithm (and its respective checksum) is + * not in objectInfo + * @throws IllegalArgumentException An expected value does not match */ void verifyObject( ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize - ); + ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException; /** * Checks whether an object referenced by a pid exists and returns the content identifier. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 105cf228..520468ad 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -596,6 +596,30 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) return storeObject(object, pid, null, null, null, objSize); } + // TODO: Clean up code and review everything line by line + + @Override + public void verifyObject( + ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize + ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { + FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); + FileHashStoreUtility.ensureNotNull(objSize, "objSize", "verifyObject"); + + Map hexDigests = objectInfo.getHexDigests(); + long objInfoRetrievedSize = objectInfo.getSize(); + String objId = objectInfo.getId(); + // Object is not tagged at this stage, so we must manually form the permanent address of the file + String cidShardString = getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, objId); + Path objAbsPath = OBJECT_STORE_DIRECTORY.resolve(cidShardString); + + validateTmpObject( + true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, objInfoRetrievedSize + ); + return; + } + @Override public boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, PidExistsInCidRefsFileException, @@ -697,13 +721,6 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile } } - @Override - public void verifyObject( - ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize - ) { - return; - } - @Override public String findObject(String pid) throws NoSuchAlgorithmException, IOException { logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); @@ -1139,6 +1156,7 @@ protected ObjectInfo putObject( // Generate tmp file and write to it logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); File tmpFile = generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); + Path tmpFilePath = tmpFile.toPath(); Map hexDigests; try { hexDigests = writeToTmpFileAndGenerateChecksums( @@ -1163,7 +1181,7 @@ protected ObjectInfo putObject( // Validate object if checksum and checksum algorithm is passed validateTmpObject( - requestValidation, checksum, checksumAlgorithm, tmpFile, hexDigests, objSize, + requestValidation, checksum, checksumAlgorithm, tmpFilePath, hexDigests, objSize, storedObjFileSize ); @@ -1208,26 +1226,28 @@ protected ObjectInfo putObject( * @throws IOException When tmpFile fails to be deleted */ private void validateTmpObject( - boolean requestValidation, String checksum, String checksumAlgorithm, File tmpFile, + boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, Map hexDigests, long objSize, long storedObjFileSize ) throws NoSuchAlgorithmException, IOException { if (objSize > 0) { if (objSize != storedObjFileSize) { // Delete tmp File - boolean deleteStatus = tmpFile.delete(); - if (!deleteStatus) { + try { + Files.delete(tmpFile); + + } catch (Exception ge) { String errMsg = "FileHashStore.validateTmpObject - objSize given is not equal to the" + " stored object size. ObjSize: " + objSize + ". storedObjFileSize:" - + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile - .getName(); + + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile; logFileHashStore.error(errMsg); throw new IOException(errMsg); } + String errMsg = "FileHashStore.validateTmpObject - objSize given is not equal to the" + " stored object size. ObjSize: " + objSize + ". storedObjFileSize:" - + storedObjFileSize + ". Deleting tmpFile: " + tmpFile.getName(); + + storedObjFileSize + ". Deleting tmpFile: " + tmpFile; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } @@ -1248,23 +1268,28 @@ private void validateTmpObject( throw new NoSuchAlgorithmException(errMsg); } - if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { + if (checksum.equalsIgnoreCase(digestFromHexDigests)) { + return; + + } else { // Delete tmp File - boolean deleteStatus = tmpFile.delete(); - if (!deleteStatus) { + try { + Files.delete(tmpFile); + + } catch (Exception ge) { String errMsg = "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" + " is not equal to the calculated hex digest: " + digestFromHexDigests + ". Checksum" + " provided: " + checksum - + ". Failed to delete tmpFile: " + tmpFile.getName(); + + ". Failed to delete tmpFile: " + tmpFile; logFileHashStore.error(errMsg); throw new IOException(errMsg); } + String errMsg = "FileHashStore.validateTmpObject - Checksum given is not equal to the" + " calculated hex digest: " + digestFromHexDigests + ". Checksum" - + " provided: " + checksum + ". tmpFile has been deleted: " + tmpFile - .getName(); + + " provided: " + checksum + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 6bf5f91c..866595c8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -9,14 +9,17 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.security.NoSuchAlgorithmException; import java.util.List; import java.util.Properties; +import org.dataone.hashstore.ObjectInfo; import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -27,6 +30,8 @@ public class FileHashStoreReferencesTest { private FileHashStore fileHashStore; private Path rootDirectory; + private Properties fhsProperties; + private static final TestDataHarness testData = new TestDataHarness(); /** * Initialize FileHashStore before each test to creates tmp directories @@ -45,6 +50,7 @@ public void initializeFileHashStore() { ); try { + fhsProperties = storeProperties; fileHashStore = new FileHashStore(storeProperties); } catch (IOException ioe) { @@ -361,4 +367,65 @@ public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(cidRefsFilePath)); } + + /** + * Check that verifyObject verifies with good values + */ + @Test + public void verifyObject_correctValues() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + } + } + + /** + * Check that verifyObject deletes file when there is a mismatch + */ + @Test + public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = "intentionallyWrongValue"; + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + }); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String actualCid = objInfo.getId(); + String cidShardString = fileHashStore.getHierarchicalPathString( + storeDepth, storeWidth, actualCid + ); + Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); + System.out.println(objectStoreDirectory); + assertFalse(Files.exists(objectStoreDirectory)); + + } + } } From f2b891368b63580324c9b5ed8c326f7ff680a3a1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 11:55:46 -0800 Subject: [PATCH 141/553] Add new 'verifyObject' junit test for mismatched object size --- .../FileHashStoreReferencesTest.java | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 866595c8..da5cc144 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -392,6 +392,32 @@ public void verifyObject_correctValues() throws Exception { } } + /** + * Check that verifyObject verifies with good values + */ + @Test + public void verifyObject_mismatchedValuesBadSize() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = 123456789; + + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + }); + } + } + /** * Check that verifyObject deletes file when there is a mismatch */ From 8d197ba45a4e6bb8ffa490cc7162877de909b4b7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 13:53:00 -0800 Subject: [PATCH 142/553] Finalize 'HashAddress' rename to 'ObjectMetadata' by updating the ObjectInfo classname and all affected classes --- .../java/org/dataone/hashstore/HashStore.java | 23 ++++---- .../dataone/hashstore/HashStoreClient.java | 2 +- .../{ObjectInfo.java => ObjectMetadata.java} | 4 +- .../filehashstore/FileHashStore.java | 28 +++++----- .../org/dataone/hashstore/HashStoreTest.java | 2 +- ...tInfoTest.java => ObjectMetadataTest.java} | 28 +++++----- .../FileHashStoreInterfaceTest.java | 56 +++++++++++-------- .../FileHashStoreProtectedTest.java | 16 +++--- .../FileHashStoreReferencesTest.java | 8 +-- 9 files changed, 89 insertions(+), 78 deletions(-) rename src/main/java/org/dataone/hashstore/{ObjectInfo.java => ObjectMetadata.java} (92%) rename src/test/java/org/dataone/hashstore/{ObjectInfoTest.java => ObjectMetadataTest.java} (70%) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 304c825c..166d73cd 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -19,7 +19,7 @@ public interface HashStore { /** * The `storeOject` method is responsible for the atomic storage of objects to disk using a - * given InputStream. Upon successful storage, the method returns a (ObjectInfo) object + * given InputStream. Upon successful storage, the method returns a (ObjectMetadata) object * containing relevant file information, such as the file's id (which can be used to locate * the object on disk), the file's size, and a hex digest dict of algorithms and checksums. * Storing an object with `store_object` also tags an object (creating references) which @@ -55,7 +55,7 @@ public interface HashStore { * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing - * @return ObjectInfo object encapsulating file information + * @return ObjectMetadata object encapsulating file information * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is invalid * @throws IOException I/O Error when writing file, generating checksums and/or * moving file @@ -64,7 +64,7 @@ public interface HashStore { * arguments (ex. empty pid) or null pointers * @throws InterruptedException When tagging pid and cid process is interrupted */ - ObjectInfo storeObject( + ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, @@ -73,13 +73,13 @@ ObjectInfo storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, + ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectInfo storeObject( + ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException; @@ -87,14 +87,14 @@ ObjectInfo storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) + ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectInfo storeObject(InputStream object, String pid, long objSize) + ObjectMetadata storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException; @@ -120,10 +120,11 @@ boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsE InterruptedException; /** - * Confirms that an ObjectInfo's content is equal to the given values. If it is not equal, - * it will delete the object referenced by the ObjectInfo object. + * Confirms that an ObjectMetadata's content is equal to the given values. If it is not + * equal, + * it will delete the object referenced by the ObjectMetadata object. * - * @param objectInfo ObjectInfo object with values + * @param objectInfo ObjectMetadata object with values * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing @@ -134,7 +135,7 @@ boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsE * @throws IllegalArgumentException An expected value does not match */ void verifyObject( - ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException; /** diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index c12100a5..1be0a268 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -168,7 +168,7 @@ public static void main(String[] args) throws Exception { } InputStream pidObjStream = Files.newInputStream(path); - ObjectInfo objInfo = hashStore.storeObject( + ObjectMetadata objInfo = hashStore.storeObject( pidObjStream, pid, additional_algo, checksum, checksum_algo, size ); pidObjStream.close(); diff --git a/src/main/java/org/dataone/hashstore/ObjectInfo.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java similarity index 92% rename from src/main/java/org/dataone/hashstore/ObjectInfo.java rename to src/main/java/org/dataone/hashstore/ObjectMetadata.java index db9fef17..22ea397f 100644 --- a/src/main/java/org/dataone/hashstore/ObjectInfo.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -8,7 +8,7 @@ * ObjectMetadata objects, client code can easily obtain metadata of a store object in HashStore * without needing to know the underlying file system details. */ -public class ObjectInfo { +public class ObjectMetadata { private final String id; private final long size; private final Map hexDigests; @@ -21,7 +21,7 @@ public class ObjectInfo { * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the * file */ - public ObjectInfo(String id, long size, Map hexDigests) { + public ObjectMetadata(String id, long size, Map hexDigests) { this.id = id; this.size = size; this.hexDigests = hexDigests; diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 520468ad..46d6a571 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -37,7 +37,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dataone.hashstore.ObjectInfo; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; @@ -420,7 +420,7 @@ protected String buildHashStoreYamlString( // HashStore Public API Methods @Override - public ObjectInfo storeObject( + public ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, @@ -458,7 +458,7 @@ public ObjectInfo storeObject( /** * Method to synchronize storing objects with FileHashStore */ - private ObjectInfo syncPutObject( + private ObjectMetadata syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, PidObjectExistsException, IOException, RuntimeException, @@ -487,7 +487,7 @@ private ObjectInfo syncPutObject( + ". checksumAlgorithm: " + checksumAlgorithm ); // Store object - ObjectInfo objInfo = putObject( + ObjectMetadata objInfo = putObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize ); // Tag object @@ -541,8 +541,8 @@ private ObjectInfo syncPutObject( * Overload method for storeObject with just an InputStream */ @Override - public ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, - PidObjectExistsException, RuntimeException, InterruptedException { + public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, + IOException, PidObjectExistsException, RuntimeException, InterruptedException { // 'putObject' is called directly to bypass the pid synchronization implemented to // efficiently handle duplicate object store requests. Since there is no pid, calling // 'storeObject' would unintentionally create a bottleneck for all requests without a @@ -560,7 +560,7 @@ public ObjectInfo storeObject(InputStream object) throws NoSuchAlgorithmExceptio * Overload method for storeObject with an additionalAlgorithm */ @Override - public ObjectInfo storeObject(InputStream object, String pid, String additionalAlgorithm) + public ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException { FileHashStoreUtility.ensureNotNull( @@ -574,7 +574,7 @@ public ObjectInfo storeObject(InputStream object, String pid, String additionalA * Overload method for storeObject with just a checksum and checksumAlgorithm */ @Override - public ObjectInfo storeObject( + public ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException { @@ -588,7 +588,7 @@ public ObjectInfo storeObject( * Overload method for storeObject with size of object to validate */ @Override - public ObjectInfo storeObject(InputStream object, String pid, long objSize) + public ObjectMetadata storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, InterruptedException { FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); @@ -600,7 +600,7 @@ public ObjectInfo storeObject(InputStream object, String pid, long objSize) @Override public void verifyObject( - ObjectInfo objectInfo, String checksum, String checksumAlgorithm, long objSize + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); @@ -1110,7 +1110,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing - * @return 'ObjectInfo' object that contains the file id, size, and a checksum map based on + * @return 'ObjectMetadata' object that contains the file id, size, and a checksum map based on * the default algorithm list. * @throws IOException I/O Error when writing file, generating checksums, * moving file or deleting tmpFile upon duplicate found @@ -1125,7 +1125,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE * @throws NullPointerException Arguments are null for pid or object * @throws AtomicMoveNotSupportedException When attempting to move files across file systems */ - protected ObjectInfo putObject( + protected ObjectMetadata putObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, @@ -1208,8 +1208,8 @@ protected ObjectInfo putObject( ); } - // Create ObjectInfo to return with pertinent data - return new ObjectInfo(objectCid, storedObjFileSize, hexDigests); + // Create ObjectMetadata to return with pertinent data + return new ObjectMetadata(objectCid, storedObjFileSize, hexDigests); } /** diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 06444610..ee06190e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -127,7 +127,7 @@ public void hashStore_storeObjects() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); diff --git a/src/test/java/org/dataone/hashstore/ObjectInfoTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java similarity index 70% rename from src/test/java/org/dataone/hashstore/ObjectInfoTest.java rename to src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index f3f44c88..4c0baf2e 100644 --- a/src/test/java/org/dataone/hashstore/ObjectInfoTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -10,15 +10,15 @@ import org.junit.jupiter.api.BeforeEach; /** - * Test class for ObjectInfo + * Test class for ObjectMetadata */ -public class ObjectInfoTest { +public class ObjectMetadataTest { private static String id = ""; private static long size; private static Map hexDigests; /** - * Initialize ObjectInfo variables for test efficiency purposes + * Initialize ObjectMetadata variables for test efficiency purposes */ @BeforeEach public void initializeInstanceVariables() { @@ -41,40 +41,40 @@ public void initializeInstanceVariables() { } /** - * Check ObjectInfo constructor + * Check ObjectMetadata constructor */ @Test - public void testObjectInfo() { - ObjectInfo objInfo = new ObjectInfo(id, size, hexDigests); + public void testObjectMetadata() { + ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); assertNotNull(objInfo); } /** - * Check ObjectInfo get id + * Check ObjectMetadata get id */ @Test - public void testObjectInfoGetId() { - ObjectInfo objInfo = new ObjectInfo(id, size, hexDigests); + public void testObjectMetadataGetId() { + ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); String objId = objInfo.getId(); assertEquals(objId, id); } /** - * Check ObjectInfo get size + * Check ObjectMetadata get size */ @Test public void testHashAddressGetSize() { - ObjectInfo objInfo = new ObjectInfo(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); long objSize = objInfo.getSize(); assertEquals(objSize, size); } /** - * Check ObjectInfo get hexDigests + * Check ObjectMetadata get hexDigests */ @Test - public void testObjectInfoGetHexDigests() { - ObjectInfo objInfo = new ObjectInfo(id, size, hexDigests); + public void testObjectMetadataGetHexDigests() { + ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); Map objInfoMap = objInfo.getHexDigests(); assertEquals(objInfoMap, hexDigests); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index c55e9168..5e37972a 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -29,7 +29,7 @@ import javax.xml.bind.DatatypeConverter; -import org.dataone.hashstore.ObjectInfo; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -81,7 +81,7 @@ public void initializeFileHashStore() { public Path tempFolder; /** - * Check that store object returns the correct ObjectInfo id + * Check that store object returns the correct ObjectMetadata id */ @Test public void storeObject() throws Exception { @@ -90,7 +90,9 @@ public void storeObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); @@ -99,7 +101,7 @@ public void storeObject() throws Exception { } /** - * Check that store object returns the correct ObjectInfo size + * Check that store object returns the correct ObjectMetadata size */ @Test public void storeObject_objSize() throws Exception { @@ -108,7 +110,9 @@ public void storeObject_objSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); // Check the object size long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -117,7 +121,7 @@ public void storeObject_objSize() throws Exception { } /** - * Check that store object returns the correct ObjectInfo hex digests + * Check that store object returns the correct ObjectMetadata hex digests */ @Test public void storeObject_hexDigests() throws Exception { @@ -126,7 +130,9 @@ public void storeObject_hexDigests() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); Map hexDigests = objInfo.getHexDigests(); @@ -213,7 +219,7 @@ public void storeObject_additionalAlgorithm_overload() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); Map hexDigests = objInfo.getHexDigests(); @@ -234,7 +240,7 @@ public void storeObject_validateChecksum_overload() throws Exception { String md2 = testData.pidData.get(pid).get("md2"); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, md2, "MD2"); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, md2, "MD2"); Map hexDigests = objInfo.getHexDigests(); @@ -244,7 +250,7 @@ public void storeObject_validateChecksum_overload() throws Exception { } /** - * Check that store object returns the correct ObjectInfo size with overloaded method + * Check that store object returns the correct ObjectMetadata size with overloaded method */ @Test public void storeObject_objSize_overload() throws Exception { @@ -254,7 +260,7 @@ public void storeObject_objSize_overload() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, objectSize); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, objectSize); assertEquals(objectSize, objInfo.getSize()); } @@ -271,7 +277,7 @@ public void storeObject_inputStream_overload() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); Map hexDigests = objInfo.getHexDigests(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); @@ -385,7 +391,7 @@ public void storeObject_objSizeCorrect() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, objectSize ); @@ -405,7 +411,7 @@ public void storeObject_objSizeIncorrect() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 1000 ); @@ -555,7 +561,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { Future future1 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -572,7 +578,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { Future future2 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -589,7 +595,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { Future future3 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -606,7 +612,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { Future future4 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -623,7 +629,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { Future future5 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -668,7 +674,7 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { Future future1 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -685,7 +691,7 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { Future future2 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject( + ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 0 ); if (objInfo != null) { @@ -1267,7 +1273,9 @@ public void deleteObject_referencesDeleted() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); String cid = objInfo.getId(); // Path objAbsPath = fileHashStore.getRealPath(pid, "object", null); @@ -1463,7 +1471,9 @@ public void getHexDigest() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); // Then get the checksum String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index aede8db6..6306e517 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -20,7 +20,7 @@ import javax.xml.bind.DatatypeConverter; -import org.dataone.hashstore.ObjectInfo; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -210,7 +210,7 @@ public void putObject_testHarness_id() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); @@ -219,7 +219,7 @@ public void putObject_testHarness_id() throws Exception { } /** - * Check that store object returns the correct ObjectInfo size + * Check that store object returns the correct ObjectMetadata size */ @Test public void putObject_objSize() throws Exception { @@ -228,7 +228,7 @@ public void putObject_objSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -246,7 +246,7 @@ public void putObject_testHarness_hexDigests() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); Map hexDigests = address.getHexDigests(); @@ -276,7 +276,7 @@ public void putObject_validateChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo address = fileHashStore.putObject( + ObjectMetadata address = fileHashStore.putObject( dataStream, pid, null, checksumCorrect, "MD2", -1 ); @@ -396,7 +396,7 @@ public void putObject_objSizeCorrect() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.putObject( + ObjectMetadata objInfo = fileHashStore.putObject( dataStream, pid, null, null, null, objectSize ); @@ -416,7 +416,7 @@ public void putObject_objSizeIncorrect() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.putObject( + ObjectMetadata objInfo = fileHashStore.putObject( dataStream, pid, null, null, null, 1000 ); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index da5cc144..0b9b627c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Properties; -import org.dataone.hashstore.ObjectInfo; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.testdata.TestDataHarness; @@ -378,7 +378,7 @@ public void verifyObject_correctValues() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); @@ -402,7 +402,7 @@ public void verifyObject_mismatchedValuesBadSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); @@ -428,7 +428,7 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectInfo objInfo = fileHashStore.storeObject(dataStream); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); From a76250208b05f819f9a3ffb33101ff9fa919c7a4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 14:27:08 -0800 Subject: [PATCH 143/553] Clean up code and fix minor bugs --- .../java/org/dataone/hashstore/HashStore.java | 7 +++---- .../org/dataone/hashstore/HashStoreClient.java | 2 +- .../hashstore/filehashstore/FileHashStore.java | 17 +++++------------ .../FileHashStoreReferencesTest.java | 5 +++-- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 166d73cd..88cba833 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -18,7 +18,7 @@ */ public interface HashStore { /** - * The `storeOject` method is responsible for the atomic storage of objects to disk using a + * The `storeObject` method is responsible for the atomic storage of objects to disk using a * given InputStream. Upon successful storage, the method returns a (ObjectMetadata) object * containing relevant file information, such as the file's id (which can be used to locate * the object on disk), the file's size, and a hex digest dict of algorithms and checksums. @@ -29,7 +29,7 @@ public interface HashStore { * calls and rejecting calls to store duplicate objects. Note, calling `storeObject` without * a pid is a possibility, but should only store the object without tagging the object. It * is then the caller's responsibility to finalize the process by calling `tagObject` after - * veriftying the correct object is stored. + * verifying the correct object is stored. * * The file's id is determined by calculating the object's content identifier based on the * store's default algorithm, which is also used as the permanent address of the file. The @@ -105,7 +105,6 @@ ObjectMetadata storeObject(InputStream object, String pid, long objSize) * * @param pid Authority-based identifier * @param cid Content-identifier (hash identifier) - * @return Boolean to indicate the pid and cid has been tagged. * @throws IOException Failure to create tmp file * @throws PidRefsFileExistsException When pid refs file already exists * @throws PidExistsInCidRefsFileException pid to write already exists in a cid refs file @@ -115,7 +114,7 @@ ObjectMetadata storeObject(InputStream object, String pid, long objSize) * @throws InterruptedException When tagObject is waiting to execute but is * interrupted */ - boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, + void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, PidExistsInCidRefsFileException, NoSuchAlgorithmException, FileNotFoundException, InterruptedException; diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 1be0a268..d65b5a5e 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -160,7 +160,7 @@ public static void main(String[] args) throws Exception { if (cmd.hasOption("checksum_algo")) { checksum_algo = cmd.getOptionValue("checksum_algo"); } - long size = 0; + long size; if (cmd.hasOption("size")) { size = Long.parseLong(cmd.getOptionValue("size")); } else { diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 46d6a571..dfe3c27a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -542,7 +542,7 @@ private ObjectMetadata syncPutObject( */ @Override public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidObjectExistsException, RuntimeException, InterruptedException { + IOException, PidObjectExistsException, RuntimeException { // 'putObject' is called directly to bypass the pid synchronization implemented to // efficiently handle duplicate object store requests. Since there is no pid, calling // 'storeObject' would unintentionally create a bottleneck for all requests without a @@ -605,7 +605,6 @@ public void verifyObject( FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); - FileHashStoreUtility.ensureNotNull(objSize, "objSize", "verifyObject"); Map hexDigests = objectInfo.getHexDigests(); long objInfoRetrievedSize = objectInfo.getSize(); @@ -617,11 +616,10 @@ public void verifyObject( validateTmpObject( true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, objInfoRetrievedSize ); - return; } @Override - public boolean tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, + public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, PidExistsInCidRefsFileException, InterruptedException { logFileHashStore.debug( @@ -688,7 +686,6 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile "FileHashStore.tagObject - Object with cid: " + cid + " has been updated successfully with pid: " + pid ); - return true; } else { // Get pid and cid refs files @@ -706,7 +703,6 @@ public boolean tagObject(String pid, String cid) throws IOException, PidRefsFile "FileHashStore.tagObject - Object with cid: " + cid + " has been tagged successfully with pid: " + pid ); - return true; } } finally { @@ -1268,10 +1264,7 @@ private void validateTmpObject( throw new NoSuchAlgorithmException(errMsg); } - if (checksum.equalsIgnoreCase(digestFromHexDigests)) { - return; - - } else { + if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { // Delete tmp File try { Files.delete(tmpFile); @@ -1914,7 +1907,7 @@ protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IO throw new FileNotFoundException(errMsg); } else { - // A cid refs file is only deleted if it is empty. Client must removed pid(s) first + // A cid refs file is only deleted if it is empty. Client must remove pid(s) first if (Files.size(absCidRefsPath) == 0) { Files.delete(absCidRefsPath); logFileHashStore.debug( @@ -2025,7 +2018,7 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea /** * Get the absolute path of a HashStore object or metadata file * - * @param abId Authority-based, persistent or content idenfitier + * @param abId Authority-based, persistent or content identifier * @param entity "object" or "metadata" * @param formatId Metadata namespace or reference type (pid/cid) * @return Actual path to object diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 0b9b627c..70206c8e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -132,6 +132,7 @@ public void tagObject_cidRefsFileExists() throws Exception { for (String line : lines) { if (line.equals(pidAdditional)) { pidFoundInCidRefFiles = true; + break; } } assertTrue(pidFoundInCidRefFiles); @@ -174,7 +175,7 @@ public void findObject_content() throws Exception { * Check that exception is thrown when pid refs file doesn't exist */ @Test - public void findObject_pidNotFound() throws Exception { + public void findObject_pidNotFound() { String pid = "dou.test.1"; assertThrows(FileNotFoundException.class, () -> { fileHashStore.findObject(pid); @@ -271,7 +272,7 @@ public void updateCidRefsFiles_content() throws Exception { if (line.equals(pidAdditional)) { pidAdditional_foundInCidRefFiles = true; } - if (line.equals(pidAdditional)) { + if (line.equals(pid)) { pidOriginal_foundInCidRefFiles = true; } } From b04d3c117ee3b609d56a95eaecbb57b6dfcc4b3c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 14:28:05 -0800 Subject: [PATCH 144/553] Update java version from 1.8 to 17 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 796c5bae..532e1c7b 100644 --- a/pom.xml +++ b/pom.xml @@ -14,8 +14,8 @@ UTF-8 - 1.8 - 1.8 + 17 + 17 From a1b675ce8982a49fbc291020a3f9ae7fbaaf1d24 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 14:56:36 -0800 Subject: [PATCH 145/553] Refactor 'deleteObject' to share synchronization with 'tagObject' on content identifier string to mitigate reference file conflicts --- .../java/org/dataone/hashstore/HashStore.java | 6 +- .../filehashstore/FileHashStore.java | 76 +++++++++++++------ 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 88cba833..fe2cab7d 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -218,12 +218,14 @@ InputStream retrieveMetadata(String pid, String formatId) throws IllegalArgument * @param pid Authority-based identifier * @throws IllegalArgumentException When pid is null or empty * @throws FileNotFoundException When requested pid has no associated object - * @throws IOException I/O error when deleting empty directories + * @throws IOException I/O error when deleting empty directories, + * modifying/deleting reference files * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported + * @throws InterruptedException When deletion synchronization is interrupted */ void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException; + IOException, NoSuchAlgorithmException, InterruptedException; /** * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index dfe3c27a..6a92e830 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -596,8 +596,6 @@ public ObjectMetadata storeObject(InputStream object, String pid, long objSize) return storeObject(object, pid, null, null, null, objSize); } - // TODO: Clean up code and review everything line by line - @Override public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize @@ -976,7 +974,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, @Override public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException { + IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for pid: " + pid ); @@ -984,29 +982,63 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteObject"); - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); + String cid = findObject(pid); - // Check to see if object exists - if (!Files.exists(objRealPath)) { - String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " + pid - + " with object address: " + objRealPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" + + " waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.add(cid); } - // Proceed to delete - Files.delete(objRealPath); - // Remove pid from the cid refs file - String cid = findObject(pid); - deleteCidRefsPid(pid, cid); - // Delete pid reference file - deletePidRefsFile(pid); + try { + // Get permanent address of the pid by calculating its sha-256 hex digest + Path objRealPath = getRealPath(pid, "object", null); + + // Check to see if object exists + if (!Files.exists(objRealPath)) { + String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " + pid + + " with object address: " + objRealPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } + + // Proceed to delete + Files.delete(objRealPath); + // Remove pid from the cid refs file + deleteCidRefsPid(pid, cid); + // Delete pid reference file + deletePidRefsFile(pid); + + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath + ); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.remove(cid); + referenceLockedCids.notifyAll(); + } + } - logFileHashStore.info( - "FileHashStore.deleteObject - File deleted for: " + pid + " with object address: " - + objRealPath - ); } @Override From 8dfb5c08c2b801752c6ebb3f36ca3e4796ec104b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 15:18:44 -0800 Subject: [PATCH 146/553] Clean up code --- .../dataone/hashstore/filehashstore/FileHashStore.java | 9 +++++---- .../filehashstore/FileHashStoreProtectedTest.java | 1 - .../filehashstore/FileHashStoreReferencesTest.java | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6a92e830..794daa57 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1662,8 +1662,8 @@ protected void move(File source, File target, String entity) throws IOException, } /** - * Writes the given 'pid' into a tmp file in the cid refs file format, which consists of - * multiple pids that references a 'cid' delimited by "\n". + * Writes the given 'pid' into a file in the 'cid' refs file format, which consists of + * multiple pids that references a 'cid' on its own line/delimited by "\n". * * @param pid Authority-based or persistent identifier to write * @throws IOException Failure to write pid refs file @@ -1692,7 +1692,7 @@ protected File writeCidRefsFile(String pid) throws IOException { } /** - * Writes the given 'cid' into a tmp file in the 'pid' refs file format. A pid refs file + * Writes the given 'cid' into a file in the 'pid' refs file format. A pid refs file * contains a single 'cid'. Note, a 'pid' can only ever reference one 'cid'. * * @param cid Content identifier to write @@ -1755,7 +1755,7 @@ protected boolean isPidInCidRefsFile(String pid, Path absCidRefsPath) throws IOE protected void verifyHashStoreRefsFiles( String pid, String cid, Path absPidRefsPath, Path absCidRefsPath ) throws FileNotFoundException, IOException { - // First check that the files exist + // First confirm that the files were created if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " + absCidRefsPath + " for pid: " + pid; @@ -1811,6 +1811,7 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce try (BufferedWriter writer = new BufferedWriter( new FileWriter(absPathCidRefsFile, true) )) { + // Adds the given pid on its own new line, without any other changes writer.write(pid + "\n"); writer.close(); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 6306e517..ffe53164 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -69,7 +69,6 @@ public void initializeFileHashStore() { */ public File generateTemporaryFile() throws Exception { Path directory = tempFolder.resolve("metacat"); - System.out.println(directory); // newFile return fileHashStore.generateTmpFile("testfile", directory); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 70206c8e..530b9ec1 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -450,7 +450,6 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { storeDepth, storeWidth, actualCid ); Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - System.out.println(objectStoreDirectory); assertFalse(Files.exists(objectStoreDirectory)); } From cb7fef2348b023ea3dcfffb12e32f7d31e852f48 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 15:56:14 -0800 Subject: [PATCH 147/553] Update HashStore interface javadoc for accuracy --- src/main/java/org/dataone/hashstore/HashStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index fe2cab7d..6f66d218 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -10,10 +10,10 @@ import org.dataone.hashstore.exceptions.PidRefsFileExistsException; /** - * HashStore is a content-addressable file management system that utilizes the hash/hex digest of a - * given persistent identifier (PID) to address files. The system stores both objects and metadata - * in its respective directories and provides an API for interacting with the store. HashStore - * storage classes (like `FileHashStore`) must implement the HashStore interface to ensure proper + * HashStore is a content-addressable file management system that utilizes the content identifier of + * an object to address files. The system stores both objects, references (refs) and metadata in its + * respective directories and provides an API for interacting with the store. HashStore storage + * classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected * usage of the system. */ public interface HashStore { From 57025be80e17bea014f8c9ac1da91da9c802a7ff Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 16:02:01 -0800 Subject: [PATCH 148/553] Update README.md --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index b9cf3cb4..41b40ae9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,36 @@ DataONE in general, and HashStore in particular, are open source, community proj Documentation is a work in progress, and can be found on the [Metacat repository](https://github.com/NCEAS/metacat/blob/feature-1436-storage-and-indexing/docs/user/metacat/source/storage-subsystem.rst#physical-file-layout) as part of the storage redesign planning. Future updates will include documentation here as the package matures. +## HashStore Summary + +HashStore is a content-addressable file management system that utilizes the content identifier of an object to address files. The system stores both objects, references (refs) and metadata in its respective directories and provides an API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of the system. + +###### Working with objects + +As content identifiers are used to store objects (files) in HashStore, objects are stored once and only once. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: +```java +// All-in-one process which stores, validates and tags an object +objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) + +// Manual Process +// Store object +objectMetadata objInfo = storeObject(InputStream) +// Validate object, throws exceptions if there is a mismatch and deletes the associated file +verifyObject(objInfo, checksum, checksumAlgorithn, objSize) +// Tag object, makes the object discoverable (find, retrieve, delete) +tagObject(pid, cid) +``` + +To retrieve an object, the client calls `retrieveObject` which returns a stream if the object exists. To find the location of the object, the client is expected to call `findObject` which will return the content identifier of the object. This can then be used to locate the object on disk. + +To delete an object, the client calls `deleteObject` which will delete the object and its associated references and reference files where relevant. Note, `deleteObject` and `tagObject` calls are synchronized so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to enhance clarity in the event of an unexpected conflict.issue. + +###### Working with metadata +Coming Soon + +###### Additional information +Coming Soon + ## Development build HashStore is a Java package, and built using the [Maven](https://maven.apache.org/) build tool. From da118dd9302fbb6fdd3df7a8652b1faf8720c0d0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 20 Dec 2023 16:03:55 -0800 Subject: [PATCH 149/553] Update README.md with missing comments regarding deleting objects --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 41b40ae9..8630d089 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ tagObject(pid, cid) To retrieve an object, the client calls `retrieveObject` which returns a stream if the object exists. To find the location of the object, the client is expected to call `findObject` which will return the content identifier of the object. This can then be used to locate the object on disk. -To delete an object, the client calls `deleteObject` which will delete the object and its associated references and reference files where relevant. Note, `deleteObject` and `tagObject` calls are synchronized so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to enhance clarity in the event of an unexpected conflict.issue. +To delete an object, the client calls `deleteObject` which will delete the object and its associated references and reference files where relevant. Note, `deleteObject` and `tagObject` calls are synchronized so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. ###### Working with metadata Coming Soon From eb408bf21d4d58d7dd992f0f9b3a239fff3256eb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 10:32:19 -0800 Subject: [PATCH 150/553] Refactor 'tagObject' update condition and revise junit tests --- .../java/org/dataone/hashstore/HashStore.java | 4 +-- .../PidExistsInCidRefsFileException.java | 13 -------- .../filehashstore/FileHashStore.java | 31 +++++++++---------- .../FileHashStoreReferencesTest.java | 29 +++++++++-------- 4 files changed, 29 insertions(+), 48 deletions(-) delete mode 100644 src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 6f66d218..d0cd848f 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -5,7 +5,6 @@ import java.io.InputStream; import java.security.NoSuchAlgorithmException; -import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -115,8 +114,7 @@ ObjectMetadata storeObject(InputStream object, String pid, long objSize) * interrupted */ void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - PidExistsInCidRefsFileException, NoSuchAlgorithmException, FileNotFoundException, - InterruptedException; + NoSuchAlgorithmException, FileNotFoundException, InterruptedException; /** * Confirms that an ObjectMetadata's content is equal to the given values. If it is not diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java deleted file mode 100644 index 85d222b7..00000000 --- a/src/main/java/org/dataone/hashstore/exceptions/PidExistsInCidRefsFileException.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.dataone.hashstore.exceptions; - -import java.io.IOException; - -/** - * Custom exception class for FileHashStore pidObjects - */ -public class PidExistsInCidRefsFileException extends IOException { - public PidExistsInCidRefsFileException(String message) { - super(message); - } - -} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 794daa57..f196158b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -39,7 +39,6 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; -import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -551,8 +550,8 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // unavailable. // // Note: This method does not tag the object to make it discoverable, so the client must - // call 'tagObject' and 'verifyObject' separately to ensure that the object stored - // is discoverable and is what is expected. + // call 'verifyObject' and 'tagObject' separately to ensure that the object stored + // is what is expected and is discoverable. return putObject(object, "HashStoreNoPid", null, null, null, -1); } @@ -600,6 +599,9 @@ public ObjectMetadata storeObject(InputStream object, String pid, long objSize) public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { + logFileHashStore.debug( + "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getId() + ); FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); @@ -614,12 +616,14 @@ public void verifyObject( validateTmpObject( true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, objInfoRetrievedSize ); + logFileHashStore.info( + "FileHashStore.verifyObject - Object with id: " + objId + " has been verified." + ); } @Override public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException, PidExistsInCidRefsFileException, - InterruptedException { + NoSuchAlgorithmException, FileNotFoundException, InterruptedException { logFileHashStore.debug( "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid ); @@ -661,28 +665,21 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi throw new PidRefsFileExistsException(errMsg); } else if (Files.exists(absCidRefsPath)) { - // Ensure that the pid is not already found in the file + // Only update cid refs file if pid is not in the file boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); - if (pidFoundInCidRefFiles) { - String errMsg = "FileHashStore.tagObject - cid refs file already contains pid: " - + pid + ". Refs file not created for both the given pid. Cid refs file (" - + absCidRefsPath + ") has not been updated."; - logFileHashStore.error(errMsg); - throw new PidExistsInCidRefsFileException(errMsg); + if (!pidFoundInCidRefFiles) { + updateCidRefsFiles(pid, absCidRefsPath); } - - // Write pid refs file to tmp file + // Get the pid refs file File pidRefsTmpFile = writePidRefsFile(cid); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - // Now update cid refs file - updateCidRefsFiles(pid, absCidRefsPath); // Verify tagging process, this throws exceptions if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid - + " has been updated successfully with pid: " + pid + + " has been updated and tagged successfully with pid: " + pid ); } else { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 530b9ec1..f1bb75b6 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -17,7 +17,6 @@ import java.util.Properties; import org.dataone.hashstore.ObjectMetadata; -import org.dataone.hashstore.exceptions.PidExistsInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -127,20 +126,14 @@ public void tagObject_cidRefsFileExists() throws Exception { // Check cid refs file Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - List lines = Files.readAllLines(cidRefsFilePath); - boolean pidFoundInCidRefFiles = false; - for (String line : lines) { - if (line.equals(pidAdditional)) { - pidFoundInCidRefFiles = true; - break; - } - } + boolean pidFoundInCidRefFiles = fileHashStore.isPidInCidRefsFile( + pidAdditional, cidRefsFilePath + ); assertTrue(pidFoundInCidRefFiles); } /** - * Check that tagObject throws an exception when calling to write a pid into a cid refs - * file that already contains the pid + * Check that tagObject creates pid refs file when pid already exists in cid refs file */ @Test public void tagObject_pidExistsInCidRefsFile() throws Exception { @@ -148,13 +141,19 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { String cid = "abcdef123456789"; File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); - assertThrows(PidExistsInCidRefsFileException.class, () -> { - fileHashStore.tagObject(pid, cid); - }); + fileHashStore.tagObject(pid, cid); + + Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + assertTrue(Files.exists(pidRefsFilePath)); + + // Confirm that cid refs file only has 1 line + List lines = Files.readAllLines(cidRefsFilePath); + int numberOfLines = lines.size(); + assertEquals(numberOfLines, 1); + } /** From 92701cb8b0cc79057eff9650843244c11b7b7302 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 10:36:31 -0800 Subject: [PATCH 151/553] Add missing 'Override' declarations and update HashStore interface --- src/main/java/org/dataone/hashstore/HashStore.java | 12 ++++++++++++ .../hashstore/filehashstore/FileHashStore.java | 2 ++ 2 files changed, 14 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index d0cd848f..0d45b164 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -209,6 +209,12 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, InputStream retrieveMetadata(String pid, String formatId) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * @see #retrieveMetadata(String, String) + */ + InputStream retrieveMetadata(String pid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; + /** * Deletes an object (and its empty subdirectories) permanently from HashStore using a given * persistent identifier. @@ -240,6 +246,12 @@ void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundExcep void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * @see #deleteMetadata(String, String) + */ + void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, + IOException, NoSuchAlgorithmException; + /** * Calculates the hex digest of an object that exists in HashStore using a given persistent * identifier and hash algorithm. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f196158b..4ea236a5 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -927,6 +927,7 @@ public InputStream retrieveMetadata(String pid, String formatId) /** * Overload method for retrieveMetadata with default metadata namespace */ + @Override public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( @@ -1072,6 +1073,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx /** * Overload method for deleteMetadata with default metadata namespace */ + @Override public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); From 38ae659227a737d08e78d529e6edf91b21ea740d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 10:50:18 -0800 Subject: [PATCH 152/553] Move 'getHierarchicalPathString' method to FileHashStoreUtility class and refactor FileHashStore --- .../filehashstore/FileHashStore.java | 46 ++++--------------- .../filehashstore/FileHashStoreUtility.java | 34 ++++++++++++++ .../FileHashStoreInterfaceTest.java | 6 +-- .../FileHashStoreProtectedTest.java | 6 +-- .../FileHashStoreReferencesTest.java | 2 +- 5 files changed, 49 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4ea236a5..6e9061ea 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -610,7 +610,9 @@ public void verifyObject( long objInfoRetrievedSize = objectInfo.getSize(); String objId = objectInfo.getId(); // Object is not tagged at this stage, so we must manually form the permanent address of the file - String cidShardString = getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, objId); + String cidShardString = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, objId + ); Path objAbsPath = OBJECT_STORE_DIRECTORY.resolve(cidShardString); validateTmpObject( @@ -1214,7 +1216,7 @@ protected ObjectMetadata putObject( // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); - String objShardString = getHierarchicalPathString( + String objShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); @@ -1406,38 +1408,6 @@ protected String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgo return DatatypeConverter.printHexBinary(stringMessageDigest.digest()).toLowerCase(); } - /** - * Generates a hierarchical path by dividing a given digest into tokens of fixed width, and - * concatenating them with '/' as the delimiter. - * - * @param dirDepth integer to represent number of directories - * @param dirWidth width of each directory - * @param digest value to shard - * @return String - */ - protected String getHierarchicalPathString(int dirDepth, int dirWidth, String digest) { - List tokens = new ArrayList<>(); - int digestLength = digest.length(); - for (int i = 0; i < dirDepth; i++) { - int start = i * dirWidth; - int end = Math.min((i + 1) * dirWidth, digestLength); - tokens.add(digest.substring(start, end)); - } - - if (dirDepth * dirWidth < digestLength) { - tokens.add(digest.substring(dirDepth * dirWidth)); - } - - List stringArray = new ArrayList<>(); - for (String str : tokens) { - if (!str.trim().isEmpty()) { - stringArray.add(str); - } - } - // stringShard - return String.join("/", stringArray); - } - /** * Creates an empty file in a given location * @@ -2064,14 +2034,14 @@ protected Path getRealPath(String abId, String entity, String formatId) if (entity.equalsIgnoreCase("object")) { // 'abId' is expected to be a pid String objectCid = findObject(abId); - String objShardString = getHierarchicalPathString( + String objShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); } else if (entity.equalsIgnoreCase("metadata")) { String objectCid = getPidHexDigest(abId + formatId, OBJECT_STORE_ALGORITHM); - String objShardString = getHierarchicalPathString( + String objShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); realPath = METADATA_STORE_DIRECTORY.resolve(objShardString); @@ -2079,12 +2049,12 @@ protected Path getRealPath(String abId, String entity, String formatId) } else if (entity.equalsIgnoreCase("refs")) { if (formatId.equalsIgnoreCase("pid")) { String pidRefId = getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); - String pidShardString = getHierarchicalPathString( + String pidShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidRefId ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); } else if (formatId.equalsIgnoreCase("cid")) { - String cidShardString = getHierarchicalPathString( + String cidShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId ); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 4ddee363..f93a88c0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -6,6 +6,8 @@ import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; import java.util.stream.Stream; import javax.xml.bind.DatatypeConverter; @@ -122,4 +124,36 @@ public static void checkNotNegativeOrZero(long longInt, String method) } } + /** + * Generates a hierarchical path by dividing a given digest into tokens of fixed width, and + * concatenating them with '/' as the delimiter. + * + * @param depth integer to represent number of directories + * @param width width of each directory + * @param digest value to shard + * @return String + */ + public static String getHierarchicalPathString(int depth, int width, String digest) { + List tokens = new ArrayList<>(); + int digestLength = digest.length(); + for (int i = 0; i < depth; i++) { + int start = i * width; + int end = Math.min((i + 1) * width, digestLength); + tokens.add(digest.substring(start, end)); + } + + if (depth * width < digestLength) { + tokens.add(digest.substring(depth * width)); + } + + List stringArray = new ArrayList<>(); + for (String str : tokens) { + if (!str.trim().isEmpty()) { + stringArray.add(str); + } + } + // stringShard + return String.join("/", stringArray); + } + } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 5e37972a..e7a509c9 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -729,7 +729,7 @@ public void storeMetadata() throws Exception { String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, null); // Get relative path - String metadataCidShardString = fileHashStore.getHierarchicalPathString( + String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( 3, 2, metadataCid ); // Get absolute path @@ -759,7 +759,7 @@ public void storeMetadata_defaultFormatId_overload() throws Exception { String metadataCid = fileHashStore.storeMetadata(metadataStream, pid); // Get relative path - String metadataCidShardString = fileHashStore.getHierarchicalPathString( + String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( 3, 2, metadataCid ); // Get absolute path @@ -789,7 +789,7 @@ public void storeMetadata_fileSize() throws Exception { String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, null); // Get relative path - String metadataCidShardString = fileHashStore.getHierarchicalPathString( + String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( 3, 2, metadataCid ); // Get absolute path diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index ffe53164..fa42b474 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -166,7 +166,7 @@ public void generateTempFile() throws Exception { */ @Test public void getHierarchicalPathString() { - String shardedPath = fileHashStore.getHierarchicalPathString( + String shardedPath = FileHashStoreUtility.getHierarchicalPathString( 3, 2, "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a" ); String shardedPathExpected = @@ -281,7 +281,7 @@ public void putObject_validateChecksumValue() throws Exception { String objCid = address.getId(); // Get relative path - String objCidShardString = fileHashStore.getHierarchicalPathString(3, 2, objCid); + String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); // Get absolute path Path storePath = Paths.get(fhsProperties.getProperty("storePath")); Path objCidAbsPath = storePath.resolve("objects/" + objCidShardString); @@ -718,7 +718,7 @@ public void putMetadata() throws Exception { String metadataCid = fileHashStore.putMetadata(metadataStream, pid, null); // Get relative path - String metadataCidShardString = fileHashStore.getHierarchicalPathString( + String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( 3, 2, metadataCid ); // Get absolute path diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index f1bb75b6..8868c65e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -445,7 +445,7 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); String actualCid = objInfo.getId(); - String cidShardString = fileHashStore.getHierarchicalPathString( + String cidShardString = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, actualCid ); Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); From 2e8649adfbd1a813912f88026bd4be44396a4122 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 10:55:30 -0800 Subject: [PATCH 153/553] Move 'generateTmpFile' method to FileHashStoreUtility class and refactor FileHashStore and junit tests --- .../filehashstore/FileHashStore.java | 49 +++---------------- .../filehashstore/FileHashStoreUtility.java | 32 ++++++++++++ .../FileHashStoreProtectedTest.java | 2 +- 3 files changed, 39 insertions(+), 44 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6e9061ea..053c40a3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1184,7 +1184,7 @@ protected ObjectMetadata putObject( // Generate tmp file and write to it logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); - File tmpFile = generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); + File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); Path tmpFilePath = tmpFile.toPath(); Map hexDigests; try { @@ -1408,45 +1408,6 @@ protected String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgo return DatatypeConverter.printHexBinary(stringMessageDigest.digest()).toLowerCase(); } - /** - * Creates an empty file in a given location - * - * @param prefix string to prepend before tmp file - * @param directory location to create tmp file - * @return Temporary file (File) ready to write into - * @throws IOException Issues with generating tmpFile - * @throws SecurityException Insufficient permissions to create tmpFile - */ - protected File generateTmpFile(String prefix, Path directory) throws IOException, - SecurityException { - Random rand = new Random(); - int randomNumber = rand.nextInt(1000000); - String newPrefix = prefix + "-" + System.currentTimeMillis() + randomNumber; - - try { - Path newPath = Files.createTempFile(directory, newPrefix, null); - File newFile = newPath.toFile(); - logFileHashStore.trace( - "FileHashStore.generateTmpFile - tmpFile generated: " + newFile.getAbsolutePath() - ); - newFile.deleteOnExit(); - return newFile; - - } catch (IOException ioe) { - String errMsg = "FileHashStore.generateTmpFile - Unable to generate tmpFile: " + ioe - .fillInStackTrace(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - - } catch (SecurityException se) { - String errMsg = "FileHashStore.generateTmpFile - Unable to generate tmpFile: " + se - .fillInStackTrace(); - logFileHashStore.error(errMsg); - throw new SecurityException(errMsg); - - } - } - /** * Write the input stream into a given file (tmpFile) and return a HashMap consisting of * algorithms and their respective hex digests. If an additional algorithm is supplied and @@ -1638,7 +1599,7 @@ protected void move(File source, File target, String entity) throws IOException, * @throws IOException Failure to write pid refs file */ protected File writeCidRefsFile(String pid) throws IOException { - File cidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + File cidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); String pidNewLine = pid + "\n"; try (BufferedWriter writer = new BufferedWriter( @@ -1668,7 +1629,7 @@ protected File writeCidRefsFile(String pid) throws IOException { * @throws IOException Failure to write pid refs file */ protected File writePidRefsFile(String cid) throws IOException { - File pidRefsTmpFile = generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + File pidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter( Files.newOutputStream(pidRefsTmpFile.toPath()), StandardCharsets.UTF_8 @@ -1965,7 +1926,9 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) Path metadataCidPath = getRealPath(pid, "metadata", checkedFormatId); // Store metadata to tmpMetadataFile - File tmpMetadataFile = generateTmpFile("tmp", METADATA_TMP_FILE_DIRECTORY); + File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( + "tmp", METADATA_TMP_FILE_DIRECTORY + ); boolean tmpMetadataWritten = writeToTmpMetadataFile(tmpMetadataFile, metadata); if (tmpMetadataWritten) { logFileHashStore.debug( diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index f93a88c0..1666d2c0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -1,5 +1,6 @@ package org.dataone.hashstore.filehashstore; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -8,6 +9,7 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; +import java.util.Random; import java.util.stream.Stream; import javax.xml.bind.DatatypeConverter; @@ -156,4 +158,34 @@ public static String getHierarchicalPathString(int depth, int width, String dige return String.join("/", stringArray); } + /** + * Creates a empty/temporary file in a given location. If this file is not moved, it will + * be deleted upon JVM gracefully exiting or shutting down. + * + * @param prefix string to prepend before tmp file + * @param directory location to create tmp file + * @return Temporary file ready to write into + * @throws IOException Issues with generating tmpFile + * @throws SecurityException Insufficient permissions to create tmpFile + */ + public static File generateTmpFile(String prefix, Path directory) throws IOException, + SecurityException { + Random rand = new Random(); + int randomNumber = rand.nextInt(1000000); + String newPrefix = prefix + "-" + System.currentTimeMillis() + randomNumber; + + try { + Path newPath = Files.createTempFile(directory, newPrefix, null); + File newFile = newPath.toFile(); + newFile.deleteOnExit(); + return newFile; + + } catch (IOException ioe) { + throw ioe; + + } catch (SecurityException se) { + throw se; + + } + } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index fa42b474..26f31416 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -70,7 +70,7 @@ public void initializeFileHashStore() { public File generateTemporaryFile() throws Exception { Path directory = tempFolder.resolve("metacat"); // newFile - return fileHashStore.generateTmpFile("testfile", directory); + return FileHashStoreUtility.generateTmpFile("testfile", directory); } /** From ba15322affd2b0a9549dcd37b63d7a84f6f6ac57 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 11:11:20 -0800 Subject: [PATCH 154/553] Clean up code, update javadocs and comments --- .../filehashstore/FileHashStore.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 053c40a3..744071cf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1593,23 +1593,25 @@ protected void move(File source, File target, String entity) throws IOException, /** * Writes the given 'pid' into a file in the 'cid' refs file format, which consists of - * multiple pids that references a 'cid' on its own line/delimited by "\n". + * multiple pids that references a 'cid' each on its own line (delimited by "\n"). * * @param pid Authority-based or persistent identifier to write * @throws IOException Failure to write pid refs file */ protected File writeCidRefsFile(String pid) throws IOException { File cidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - String pidNewLine = pid + "\n"; - try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter( Files.newOutputStream(cidRefsTmpFile.toPath()), StandardCharsets.UTF_8 ) )) { + String pidNewLine = pid + "\n"; writer.write(pidNewLine); writer.close(); + logFileHashStore.debug( + "FileHashStore.writeCidRefsFile - cid refs file written for: " + pid + ); return cidRefsTmpFile; } catch (IOException ioe) { @@ -1638,6 +1640,9 @@ protected File writePidRefsFile(String cid) throws IOException { writer.write(cid); writer.close(); + logFileHashStore.debug( + "FileHashStore.writePidRefsFile - pid refs file written for: " + cid + ); return pidRefsTmpFile; } catch (IOException ioe) { @@ -1650,7 +1655,7 @@ protected File writePidRefsFile(String cid) throws IOException { } /** - * Checks a given cid refs file for a pid. + * Checks a given cid refs file for a pid. This is case sensitive. * * @param pid Authority-based or persistent identifier to search * @param absCidRefsPath Path to the cid refs file to check @@ -1707,7 +1712,6 @@ protected void verifyHashStoreRefsFiles( + cid; logFileHashStore.error(errMsg); throw new IOException(errMsg); - } boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { @@ -1715,7 +1719,6 @@ protected void verifyHashStoreRefsFiles( + pid + " in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); throw new IOException(errMsg); - } } catch (IOException ioe) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); @@ -1737,7 +1740,7 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce // Obtain a lock on the file before updating it try (RandomAccessFile raf = new RandomAccessFile(absPathCidRefsFile, "rw"); FileChannel channel = raf.getChannel(); FileLock lock = channel.lock()) { - + // The boolean 'true' in new FileWriter()'s constructor sets it to append mode try (BufferedWriter writer = new BufferedWriter( new FileWriter(absPathCidRefsFile, true) )) { @@ -1771,7 +1774,6 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO FileHashStoreUtility.checkForEmptyString(pid, "pid", "deletePidRefsFile"); Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - // Check to see if pid refs file exists if (!Files.exists(absPidRefsPath)) { String errMsg = @@ -1804,7 +1806,6 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsPid"); Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - // Check to see if cid refs file exists if (!Files.exists(absCidRefsPath)) { String errMsg = @@ -1816,8 +1817,10 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx } else { if (isPidInCidRefsFile(pid, absCidRefsPath)) { try { + // Read all lines into a List List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.remove(pid); + // TRUNCATE_EXISTING reduces a file size to zero bytes Files.write( absCidRefsPath, lines, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING @@ -1850,7 +1853,7 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx /** - * Deletes a cid refs file if it is empty. + * Deletes a cid refs file only if it is empty. * * @param cid Content identifier * @throws IOException Unable to delete object cid refs file @@ -1860,7 +1863,6 @@ protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IO FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsFile"); Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - // Check to see if cid refs file exists if (!Files.exists(absCidRefsPath)) { String errMsg = From 2654f30a16d6b171a84349ee458ec2ba071f1d27 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 11:28:16 -0800 Subject: [PATCH 155/553] Refactor updating and deleting pid references in a cid refs file to be atomic --- .../filehashstore/FileHashStore.java | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 744071cf..07cdbd7c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1735,22 +1735,23 @@ protected void verifyHashStoreRefsFiles( * @throws IOException Issue with updating a cid refs file */ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOException { - File absPathCidRefsFile = absCidRefsPath.toFile(); try { // Obtain a lock on the file before updating it - try (RandomAccessFile raf = new RandomAccessFile(absPathCidRefsFile, "rw"); - FileChannel channel = raf.getChannel(); FileLock lock = channel.lock()) { - // The boolean 'true' in new FileWriter()'s constructor sets it to append mode - try (BufferedWriter writer = new BufferedWriter( - new FileWriter(absPathCidRefsFile, true) - )) { - // Adds the given pid on its own new line, without any other changes - writer.write(pid + "\n"); - writer.close(); - } + try (FileChannel channel = FileChannel.open( + absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE + ); FileLock lock = channel.lock()) { + String newPidReference = pid + "\n"; + List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); + lines.add(newPidReference); + // This update process is atomic, so we first write the updated content + // into a temporary file before overwriting it. + File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + Path tmpFilePath = tmpFile.toPath(); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absCidRefsPath.toFile(), "refs"); logFileHashStore.debug( "FileHashStore.updateCidRefsFiles - Pid: " + pid - + " has been added to cid refs file: " + absPathCidRefsFile + + " has been added to cid refs file: " + absCidRefsPath ); } // The lock is automatically released when the try block exits @@ -1816,15 +1817,20 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx } else { if (isPidInCidRefsFile(pid, absCidRefsPath)) { - try { + try (FileChannel channel = FileChannel.open( + absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE + ); FileLock lock = channel.lock()) { // Read all lines into a List List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.remove(pid); - // TRUNCATE_EXISTING reduces a file size to zero bytes - Files.write( - absCidRefsPath, lines, StandardOpenOption.WRITE, - StandardOpenOption.TRUNCATE_EXISTING + // This delete process is atomic, so we first write the updated content + // into a temporary file before overwriting it. + File tmpFile = FileHashStoreUtility.generateTmpFile( + "tmp", REFS_TMP_FILE_DIRECTORY ); + Path tmpFilePath = tmpFile.toPath(); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absCidRefsPath.toFile(), "refs"); logFileHashStore.debug( "FileHashStore.deleteCidRefsPid - Pid: " + pid + " removed from cid refs file: " + absCidRefsPath From 3c5ee0b52dafcf9a1e334c6e62bf47bb87f00739 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 11:34:44 -0800 Subject: [PATCH 156/553] Clean up code --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 07cdbd7c..a17e97fa 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -4,11 +4,9 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; -import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; -import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import java.nio.channels.FileLock; import java.nio.charset.StandardCharsets; @@ -28,7 +26,6 @@ import java.util.Map; import java.util.Objects; import java.util.Properties; -import java.util.Random; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -1739,7 +1736,7 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce // Obtain a lock on the file before updating it try (FileChannel channel = FileChannel.open( absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock lock = channel.lock()) { + ); FileLock ignored = channel.lock()) { String newPidReference = pid + "\n"; List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.add(newPidReference); @@ -1819,7 +1816,7 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx if (isPidInCidRefsFile(pid, absCidRefsPath)) { try (FileChannel channel = FileChannel.open( absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock lock = channel.lock()) { + ); FileLock ignored = channel.lock()) { // Read all lines into a List List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.remove(pid); From 3404d7f73cc958a8276dae1390a3f9458015a271 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 21 Dec 2023 17:29:24 -0800 Subject: [PATCH 157/553] Update README.md with overview of HashStore, missing section on references --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8630d089..d966b9df 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,43 @@ DataONE in general, and HashStore in particular, are open source, community proj Documentation is a work in progress, and can be found on the [Metacat repository](https://github.com/NCEAS/metacat/blob/feature-1436-storage-and-indexing/docs/user/metacat/source/storage-subsystem.rst#physical-file-layout) as part of the storage redesign planning. Future updates will include documentation here as the package matures. -## HashStore Summary +## HashStore Overview -HashStore is a content-addressable file management system that utilizes the content identifier of an object to address files. The system stores both objects, references (refs) and metadata in its respective directories and provides an API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of the system. +HashStore is a content-addressable file management system that utilizes the content identifier of an object to address files. The system stores both objects, references (refs) and metadata in its respective directories and provides an API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. + +###### How do I create a HashStore? + +To create or interact with a HashStore, the client should first instantiate a HashStore object with the following set of properties: +- storePath +- storeDepth +- storeWidth +- storeAlgorithm +- storeMetadataNamespace + +```java +String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; +Path rootDirectory = tempFolder.resolve("metacat"); + +Properties storeProperties = new Properties(); +storeProperties.setProperty("storePath", rootDirectory.toString()); +storeProperties.setProperty("storeDepth", "3"); +storeProperties.setProperty("storeWidth", "2"); +storeProperties.setProperty("storeAlgorithm", "SHA-256"); +storeProperties.setProperty( + "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" +); + +// Instantiate a HashStore +HashStore hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + +// Store an object +hashStore.storeObject(stream, pid) +// ... +``` ###### Working with objects -As content identifiers are used to store objects (files) in HashStore, objects are stored once and only once. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: +As content identifiers are used to store objects/files in HashStore, objects are stored once and only once. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: ```java // All-in-one process which stores, validates and tags an object objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) @@ -35,16 +65,67 @@ verifyObject(objInfo, checksum, checksumAlgorithn, objSize) tagObject(pid, cid) ``` -To retrieve an object, the client calls `retrieveObject` which returns a stream if the object exists. To find the location of the object, the client is expected to call `findObject` which will return the content identifier of the object. This can then be used to locate the object on disk. +**How do I retrieve an object if I have the pid?** +- To retrieve an object, the client calls `retrieveObject(pid)` which returns a stream if the object exists. + +**How do I find an object or check that it exists if I have the pid?** +- To find the location of the object, the client calls `findObject` which will return the content identifier (cid) of the object. +- This cid can then be used to locate the object on disk by following HashStore's store configuration. -To delete an object, the client calls `deleteObject` which will delete the object and its associated references and reference files where relevant. Note, `deleteObject` and `tagObject` calls are synchronized so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. +**How do I delete an object if I have the pid?** +- To delete an object, the client calls `deleteObject` which will delete the object and its associated references and reference files where relevant. +- Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. ###### Working with metadata -Coming Soon +The metadata objects/files for an object are stored in HashStore's '/metadata' directory. By default, calling `storeMetadata` will use the HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). + +**How do I retrieve a metadata file?** +- To find a metadata object, the client calls `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. +- If there are multiple metadata objects, a 'formatId' must be specified when calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) + +**How do I delete a metadata file?** +- Like `retrieveMetadata`, the client calls `deleteMetadata` which will delete the metadata object associated with the given pid. +- If there are multiple metadata objects, a 'formatId' must be specified when calling `deleteMetadata` to ensure the expected metadata object is deleted. + +###### Working with references -###### Additional information Coming Soon +###### What does HashStore look like? + +``` +# Example layout in HashStore with a single file stored along with its metadata and reference files. +# This uses a store depth of 3, with a width of 2 and "SHA-256" as its default store algorithm +## Notes: +## - Objects are stored using their content identifier as the file address +## - The reference file for each pid contains a single cid +## - The reference file for each cid contains multiple pids each on its own line + +.../metacat/hashstore/ +└─ objects + └─ /d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 +└─ metadata + └─ /15/8d/7e/55c36a810d7c14479c9...b20d7df66768b04 +└─ refs + └─ pid/0d/55/5e/d77052d7e166017f779...7230bcf7abcef65e + └─ cid/d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 +hashstore.yaml +``` + +###### Public API Methods +- storeObject +- verifyObject +- tagObject +- findObject +- storeMetadata +- retrieveObject +- retrieveMetadata +- deleteObject +- deleteMetadata +- getHexDigest + +For details, please see the HashStore interface (HashStore.java) + ## Development build HashStore is a Java package, and built using the [Maven](https://maven.apache.org/) build tool. From 8a47dbad30b1b3a32a00d29611e1acb1ab129be6 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 22 Dec 2023 10:01:39 -0800 Subject: [PATCH 158/553] Clean up code --- .../java/org/dataone/hashstore/HashStore.java | 1 - .../filehashstore/FileHashStore.java | 4 ++-- .../filehashstore/FileHashStoreUtility.java | 19 +++++-------------- .../FileHashStoreReferencesTest.java | 4 ++-- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 0d45b164..eb6307b0 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -106,7 +106,6 @@ ObjectMetadata storeObject(InputStream object, String pid, long objSize) * @param cid Content-identifier (hash identifier) * @throws IOException Failure to create tmp file * @throws PidRefsFileExistsException When pid refs file already exists - * @throws PidExistsInCidRefsFileException pid to write already exists in a cid refs file * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address * does not exist * @throws FileNotFoundException If refs file is missing during verification diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a17e97fa..73fd960d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1652,7 +1652,7 @@ protected File writePidRefsFile(String cid) throws IOException { } /** - * Checks a given cid refs file for a pid. This is case sensitive. + * Checks a given cid refs file for a pid. This is case-sensitive. * * @param pid Authority-based or persistent identifier to search * @param absCidRefsPath Path to the cid refs file to check @@ -1820,7 +1820,7 @@ protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmEx // Read all lines into a List List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.remove(pid); - // This delete process is atomic, so we first write the updated content + // This deletes process is atomic, so we first write the updated content // into a temporary file before overwriting it. File tmpFile = FileHashStoreUtility.generateTmpFile( "tmp", REFS_TMP_FILE_DIRECTORY diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 1666d2c0..4504778f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -159,7 +159,7 @@ public static String getHierarchicalPathString(int depth, int width, String dige } /** - * Creates a empty/temporary file in a given location. If this file is not moved, it will + * Creates an empty/temporary file in a given location. If this file is not moved, it will * be deleted upon JVM gracefully exiting or shutting down. * * @param prefix string to prepend before tmp file @@ -174,18 +174,9 @@ public static File generateTmpFile(String prefix, Path directory) throws IOExcep int randomNumber = rand.nextInt(1000000); String newPrefix = prefix + "-" + System.currentTimeMillis() + randomNumber; - try { - Path newPath = Files.createTempFile(directory, newPrefix, null); - File newFile = newPath.toFile(); - newFile.deleteOnExit(); - return newFile; - - } catch (IOException ioe) { - throw ioe; - - } catch (SecurityException se) { - throw se; - - } + Path newPath = Files.createTempFile(directory, newPrefix, null); + File newFile = newPath.toFile(); + newFile.deleteOnExit(); + return newFile; } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 8868c65e..9cde118e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -298,7 +298,7 @@ public void deletePidRefsFile_fileDeleted() throws Exception { * Check that deletePidRefsFile throws exception when there is no file to delete */ @Test - public void deletePidRefsFile_missingPidRefsFile() throws Exception { + public void deletePidRefsFile_missingPidRefsFile() { String pid = "dou.test.1"; assertThrows(FileNotFoundException.class, () -> { @@ -327,7 +327,7 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { * Check that deleteCidRefsPid throws exception when there is no file to delete the pid from */ @Test - public void deleteCidRefsPid_missingCidRefsFile() throws Exception { + public void deleteCidRefsPid_missingCidRefsFile() { String pid = "dou.test.1"; String cid = "abc123456789"; From e50f55cecf5f9fbc8869b67bf8facf659e989c19 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 22 Dec 2023 10:49:54 -0800 Subject: [PATCH 159/553] Update README.md with new section on reference files and revise wording --- README.md | 75 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index d966b9df..a0d5493e 100644 --- a/README.md +++ b/README.md @@ -19,9 +19,24 @@ Documentation is a work in progress, and can be found on the [Metacat repository HashStore is a content-addressable file management system that utilizes the content identifier of an object to address files. The system stores both objects, references (refs) and metadata in its respective directories and provides an API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. +###### Public API Methods +- storeObject +- verifyObject +- tagObject +- findObject +- storeMetadata +- retrieveObject +- retrieveMetadata +- deleteObject +- deleteMetadata +- getHexDigest + +For details, please see the HashStore interface (HashStore.java) + + ###### How do I create a HashStore? -To create or interact with a HashStore, the client should first instantiate a HashStore object with the following set of properties: +To create or interact with a HashStore, instantiate a HashStore object with the following set of properties: - storePath - storeDepth - storeWidth @@ -49,9 +64,12 @@ hashStore.storeObject(stream, pid) // ... ``` -###### Working with objects -As content identifiers are used to store objects/files in HashStore, objects are stored once and only once. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: +###### Working with objects (store, retrieve, delete) + +In HashStore, objects are first saved as temporary files while their content identifiers are calculated. Once the default hash algorithm list and their hashes are generated, objects are stored in their permanent location using the store's algorithm's corresponding hash value, the store depth and the store width. Lastly, reference files are created for the object so that they can be found and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored once and only once. + +By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: ```java // All-in-one process which stores, validates and tags an object objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) @@ -66,30 +84,50 @@ tagObject(pid, cid) ``` **How do I retrieve an object if I have the pid?** -- To retrieve an object, the client calls `retrieveObject(pid)` which returns a stream if the object exists. +- To retrieve an object, call the Public API method `retrieveObject` which opens a stream to the object if it exists. **How do I find an object or check that it exists if I have the pid?** -- To find the location of the object, the client calls `findObject` which will return the content identifier (cid) of the object. +- To find the location of the object, call the Public API method `findObject` which will return the content identifier (cid) of the object. - This cid can then be used to locate the object on disk by following HashStore's store configuration. **How do I delete an object if I have the pid?** -- To delete an object, the client calls `deleteObject` which will delete the object and its associated references and reference files where relevant. +- To delete an object, call the Public API method `deleteObject` which will delete the object and its associated references and reference files where relevant. - Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. -###### Working with metadata -The metadata objects/files for an object are stored in HashStore's '/metadata' directory. By default, calling `storeMetadata` will use the HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). + +###### Working with metadata (store, retrieve, delete) + +HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. To differentiate between metadata documents for a given object, HashStore includes the 'formatId' (format or namespace of the metadata) when generating the address of the metadata document to store (the hash of the 'pid' + 'formatId'). By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). **How do I retrieve a metadata file?** -- To find a metadata object, the client calls `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. +- To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. - If there are multiple metadata objects, a 'formatId' must be specified when calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) **How do I delete a metadata file?** -- Like `retrieveMetadata`, the client calls `deleteMetadata` which will delete the metadata object associated with the given pid. +- Like `retrieveMetadata`, call the Public API method `deleteMetadata` which will delete the metadata object associated with the given pid. - If there are multiple metadata objects, a 'formatId' must be specified when calling `deleteMetadata` to ensure the expected metadata object is deleted. -###### Working with references -Coming Soon +###### What are HashStore reference files? + +HashStore assumes that every object to store has a respective identifier. This identifier is then used when storing, retrieving and deleting an object. In order to facilitate this process, we create two types of reference files: +- pid (persistent identifier) reference files +- cid (content identifier) reference files + +These reference files are implemented in HashStore underneath the hood with no expectation for modification from the calling app/client. The one and only exception to this process when the calling client/app does not have an identifier, and solely stores an objects raw bytes in HashStore (calling `storeObject(InputStream)`). + +**'pid' Reference Files** +- Pid (persistent identifier) reference files are created when storing an object with an identifier. +- Pid reference files are located in HashStores '/refs/pid' directory +- If an identifier is not available at the time of storing an object, the calling app/client must create this association between a pid and the object it represents by calling `tagObject` separately. +- Each pid reference file contains a string that represents the content identifier of the object it references +- Like how objects are stored once and only once, there is also only one pid reference file for each object. + +**'cid' Reference Files** +- Cid (content identifier) reference files are created at the same time as pid reference files when storing an object with an identifier. +- Cid reference files are located in HashStore's '/refs/cid' directory +- A cid reference file is a list of all the pids that reference a cid, delimited by a new line ("\n") character + ###### What does HashStore look like? @@ -112,19 +150,6 @@ Coming Soon hashstore.yaml ``` -###### Public API Methods -- storeObject -- verifyObject -- tagObject -- findObject -- storeMetadata -- retrieveObject -- retrieveMetadata -- deleteObject -- deleteMetadata -- getHexDigest - -For details, please see the HashStore interface (HashStore.java) ## Development build From e4b553b6b08658445d39bde4b814169ebe88b223 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 27 Dec 2023 11:37:16 -0800 Subject: [PATCH 160/553] Fix bug in HashStoreClient with incorrect boolean type for hasArg for option 'nobj' --- src/main/java/org/dataone/hashstore/HashStoreClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index d65b5a5e..98dd478b 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -316,7 +316,7 @@ private static Options addHashStoreClientOptions() { "knbvm", "knbvmtestadc", false, "(knbvm) Flag to specify testing with knbvm." ); options.addOption( - "nobj", "numberofobj", false, + "nobj", "numberofobj", true, "(knbvm) Option to specify number of objects to retrieve from a Metacat db." ); options.addOption( From 81dedaef803c0f467a5920d17abcf7f2843c7254 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 2 Jan 2024 15:01:01 -0800 Subject: [PATCH 161/553] Fix bug in 'deleteObject' where an object is deleted even if its cid references file contains pids and add new junit test --- .../filehashstore/FileHashStore.java | 12 ++++++-- .../FileHashStoreInterfaceTest.java | 30 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 73fd960d..e5d51dc3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1013,12 +1013,20 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou throw new FileNotFoundException(errMsg); } - // Proceed to delete - Files.delete(objRealPath); // Remove pid from the cid refs file + // If there are no more reference, 'deleteCidRefsPid()' will also delete the cid reference file deleteCidRefsPid(pid, cid); // Delete pid reference file deletePidRefsFile(pid); + // Proceed to delete object only if cid refs file is no longer present + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + if (!Files.exists(absCidRefsPath)) { + Files.delete(objRealPath); + } else { + String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (references exist for the cid). Skipping object deletion. "; + logFileHashStore.warn(warnMsg); + } logFileHashStore.info( "FileHashStore.deleteObject - File and references deleted for: " + pid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e7a509c9..9caad98b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -19,6 +19,7 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutorService; @@ -1287,6 +1288,35 @@ public void deleteObject_referencesDeleted() throws Exception { } } + /** + * Confirm that cid refs file and object still exists when an object has more than one reference + * and client calls 'deleteObject' on a pid that references an object that still has references. + */ + @Test + public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String pidExtra = "dou.test" + pid; + String cid = objInfo.getId(); + fileHashStore.tagObject(pidExtra, cid); + + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + fileHashStore.deleteObject(pid); + + assertTrue(Files.exists(objCidAbsPath)); + assertFalse(Files.exists(absPathPidRefsPath)); + assertTrue(Files.exists(absPathCidRefsPath)); + } + } + /** * Confirm that deleteObject throws exception when associated pid obj not found */ From 2eae4079fb54a30f2a84f087e4456f0ca018f8f3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jan 2024 09:07:27 -0800 Subject: [PATCH 162/553] Refactor 'store_object' to only delete the associated tmp file when storing a duplicate object and update junit tests --- .../filehashstore/FileHashStore.java | 3 +- .../FileHashStoreInterfaceTest.java | 27 ++++++++++------- .../FileHashStoreProtectedTest.java | 30 +++++++++++-------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e5d51dc3..54150729 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1229,10 +1229,9 @@ protected ObjectMetadata putObject( // Confirm that the object does not yet exist, delete tmpFile if so if (Files.exists(objRealPath)) { String errMsg = "FileHashStore.putObject - File already exists for pid: " + pid - + ". Object address: " + objRealPath + ". Aborting request."; + + ". Object address: " + objRealPath + ". Deleting temporary file."; logFileHashStore.warn(errMsg); tmpFile.delete(); - throw new PidObjectExistsException(errMsg); } else { // Move object File permFile = objRealPath.toFile(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 9caad98b..0a8df3a4 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -19,7 +19,6 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutorService; @@ -439,21 +438,27 @@ public void storeObject_invalidAlgorithm() { } /** - * Check that store object throws FileAlreadyExists error when storing duplicate object + * Check that store object tags cid refs file as expected when called + * to store a duplicate object (two pids that reference the same cid) */ @Test - public void storeObject_duplicate() { + public void storeObject_duplicate() throws Exception { for (String pid : testData.pidList) { - assertThrows(PidObjectExistsException.class, () -> { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - InputStream dataStreamDup = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStreamDup, pid, null, null, null, -1); - }); + String pidTwo = pid + ".test"; + InputStream dataStreamDup = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStreamDup, pidTwo, null, null, null, -1 + ); + + String cid = objInfo.getId(); + Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 26f31416..8203e506 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -21,7 +21,6 @@ import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.ObjectMetadata; -import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -427,22 +426,27 @@ public void putObject_objSizeIncorrect() { } /** - * Verify putObject throws exception when storing a duplicate object + * Verify putObject deletes temporary file written if called to store an object + * that already exists (duplicate) */ @Test - public void putObject_duplicateObject() { - assertThrows(PidObjectExistsException.class, () -> { - // Get test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + public void putObject_duplicateObject() throws Exception { + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, null, -1); + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStream, pid, null, null, null, -1); - // Try duplicate upload - InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStreamTwo, pid, null, null, null, -1); - }); + // Try duplicate upload + String pidTwo = pid + ".test"; + InputStream dataStreamTwo = Files.newInputStream(testDataFile); + fileHashStore.putObject(dataStreamTwo, pidTwo, null, null, null, -1); + + // Confirm there are no files in 'objects/tmp' directory + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + File[] files = storePath.resolve("objects/tmp").toFile().listFiles(); + assertEquals(0, files.length); } /** From 4efd1fbca9c131a0a728dd3213aa5e78379fb226 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jan 2024 15:06:15 -0800 Subject: [PATCH 163/553] Reorganize methods in FileHashStore --- .../filehashstore/FileHashStore.java | 105 +++++++++--------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 54150729..1b07c7d1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1595,6 +1595,58 @@ protected void move(File source, File target, String entity) throws IOException, } } + /** + * Verifies that the reference files for the given pid and cid exist and contain + * the expected values. + * + * @param pid Authority-based or persistent identifier + * @param cid Content identifier + * @param absPidRefsPath Path to where the pid refs file exists + * @param absCidRefsPath Path to where the cid refs file exists + * @throws FileNotFoundException Any refs files are missing + * @throws IOException Unable to read any of the refs files or if the refs content + * is not what is expected + */ + protected void verifyHashStoreRefsFiles( + String pid, String cid, Path absPidRefsPath, Path absCidRefsPath + ) throws FileNotFoundException, IOException { + // First confirm that the files were created + if (!Files.exists(absCidRefsPath)) { + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " + + absCidRefsPath + " for pid: " + pid; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + } + if (!Files.exists(absPidRefsPath)) { + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - pid refs file is missing: " + + absPidRefsPath + " for cid: " + cid; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + } + // Now verify the content + try { + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + if (!cidRead.equals(cid)) { + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " + + cidRead + " found in pid refs file: " + absPidRefsPath + ". Expected cid: " + + cid; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); + if (!pidFoundInCidRefFiles) { + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + + pid + " in cid refs file: " + absCidRefsPath; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } catch (IOException ioe) { + String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } + /** * Writes the given 'pid' into a file in the 'cid' refs file format, which consists of * multiple pids that references a 'cid' each on its own line (delimited by "\n"). @@ -1678,59 +1730,6 @@ protected boolean isPidInCidRefsFile(String pid, Path absCidRefsPath) throws IOE return pidFoundInCidRefFiles; } - - /** - * Verifies that the reference files for the given pid and cid exist and contain - * the expected values. - * - * @param pid Authority-based or persistent identifier - * @param cid Content identifier - * @param absPidRefsPath Path to where the pid refs file exists - * @param absCidRefsPath Path to where the cid refs file exists - * @throws FileNotFoundException Any refs files are missing - * @throws IOException Unable to read any of the refs files or if the refs content - * is not what is expected - */ - protected void verifyHashStoreRefsFiles( - String pid, String cid, Path absPidRefsPath, Path absCidRefsPath - ) throws FileNotFoundException, IOException { - // First confirm that the files were created - if (!Files.exists(absCidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " - + absCidRefsPath + " for pid: " + pid; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - } - if (!Files.exists(absPidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - pid refs file is missing: " - + absPidRefsPath + " for cid: " + cid; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - } - // Now verify the content - try { - String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - if (!cidRead.equals(cid)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " - + cidRead + " found in pid refs file: " + absPidRefsPath + ". Expected cid: " - + cid; - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); - if (!pidFoundInCidRefFiles) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " - + pid + " in cid refs file: " + absCidRefsPath; - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - } catch (IOException ioe) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - } - /** * Updates a cid refs file with a pid that references the cid * From 79c18c87194b535a62d0f9d677ed4dc724d0608b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jan 2024 15:53:05 -0800 Subject: [PATCH 164/553] Remove redundant addition of new lines when writing refs related files --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 ++---- .../filehashstore/FileHashStoreReferencesTest.java | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1b07c7d1..5f44ca30 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1661,8 +1661,7 @@ protected File writeCidRefsFile(String pid) throws IOException { Files.newOutputStream(cidRefsTmpFile.toPath()), StandardCharsets.UTF_8 ) )) { - String pidNewLine = pid + "\n"; - writer.write(pidNewLine); + writer.write(pid); writer.close(); logFileHashStore.debug( @@ -1743,9 +1742,8 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce try (FileChannel channel = FileChannel.open( absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE ); FileLock ignored = channel.lock()) { - String newPidReference = pid + "\n"; List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); - lines.add(newPidReference); + lines.add(pid); // This update process is atomic, so we first write the updated content // into a temporary file before overwriting it. File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 9cde118e..ed0b7702 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -202,7 +202,7 @@ public void writeCidRefsFile_content() throws Exception { File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pidToWrite); String pidRead = new String(Files.readAllBytes(cidRefsTmpFile.toPath())); - assertEquals(pidRead, pidToWrite + "\n"); + assertEquals(pidRead, pidToWrite); } /** From 87e15def463224e6bf7f618212087989d8e2d292 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jan 2024 15:58:08 -0800 Subject: [PATCH 165/553] Clean up code and comments --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 5f44ca30..8ccd4d4a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -335,7 +335,6 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8) )) { writer.write(yamlString); - writer.close(); } catch (IOException ioe) { logFileHashStore.fatal( @@ -1021,6 +1020,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou // Proceed to delete object only if cid refs file is no longer present Path absCidRefsPath = getRealPath(cid, "refs", "cid"); if (!Files.exists(absCidRefsPath)) { + // Delete actual object Files.delete(objRealPath); } else { String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid From 5c8aa024cbb81f82ad6da8954a0051fafc4343e9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 14:08:15 -0800 Subject: [PATCH 166/553] Refactor 'deleteObject' to streamline the process, revise junit tests and add new exception class 'PidNotFoundInCidRefsFileException' --- .../PidNotFoundInCidRefsFileException.java | 13 ++ .../filehashstore/FileHashStore.java | 147 +++++++++--------- .../FileHashStoreInterfaceTest.java | 31 ++++ .../FileHashStoreReferencesTest.java | 35 +---- 4 files changed, 120 insertions(+), 106 deletions(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java new file mode 100644 index 00000000..2cd9d4b6 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java @@ -0,0 +1,13 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +/** + * Custom exception class for FileHashStore when a pid is not found in a cid refs file. + */ +public class PidNotFoundInCidRefsFileException extends IOException { + public PidNotFoundInCidRefsFileException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8ccd4d4a..4a17f030 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -36,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -1003,36 +1004,53 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou try { // Get permanent address of the pid by calculating its sha-256 hex digest Path objRealPath = getRealPath(pid, "object", null); + // Get the path to the cid refs file to work with + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + // Check that the pid is found in the cid refs file + boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); - // Check to see if object exists if (!Files.exists(objRealPath)) { + // Throw exception if object doesn't exist String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " + pid + " with object address: " + objRealPath; - logFileHashStore.warn(errMsg); + logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); - } - // Remove pid from the cid refs file - // If there are no more reference, 'deleteCidRefsPid()' will also delete the cid reference file - deleteCidRefsPid(pid, cid); - // Delete pid reference file - deletePidRefsFile(pid); - // Proceed to delete object only if cid refs file is no longer present - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - if (!Files.exists(absCidRefsPath)) { - // Delete actual object - Files.delete(objRealPath); - } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (references exist for the cid). Skipping object deletion. "; - logFileHashStore.warn(warnMsg); - } + } else if (!Files.exists(absCidRefsPath)) { + // Throw exception if the cid refs file doesn't exist + String errMsg = + "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid + + " with address" + absCidRefsPath; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); + } else if (!pidFoundInCidRefFiles) { + // Throw exception if the given pid is not in the expected cid refs file + String errMsg = "FileHashStore.deleteObject - pid: " + pid + + " is not found in cid refs file: " + absCidRefsPath; + logFileHashStore.error(errMsg); + throw new PidNotFoundInCidRefsFileException(errMsg); + } else { + // Proceed to delete the reference files and object + // If there are no more reference, 'deleteCidRefsPid()' will also delete the cid reference file + deleteCidRefsPid(pid, absCidRefsPath); + // Delete pid reference file + deletePidRefsFile(pid); + // Proceed to delete object only if cid refs file is no longer present + if (!Files.exists(absCidRefsPath)) { + // Delete actual object + Files.delete(objRealPath); + } else { + String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (references exist for the cid). Skipping object deletion. "; + logFileHashStore.warn(warnMsg); + } + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath + ); + } } finally { // Release lock synchronized (referenceLockedCids) { @@ -1798,63 +1816,42 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO /** * Removes a pid from a cid refs file. * - * @param pid Authority-based or persistent identifier. - * @param cid Content identifier + * @param pid Authority-based or persistent identifier. + * @param absCidRefsPath Path to the cid refs file to remove the pid from * @throws IOException Unable to access cid refs file */ - protected void deleteCidRefsPid(String pid, String cid) throws NoSuchAlgorithmException, - IOException { - FileHashStoreUtility.ensureNotNull(cid, "pid", "deleteCidRefsPid"); - FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsPid"); + protected void deleteCidRefsPid(String pid, Path absCidRefsPath) + throws NoSuchAlgorithmException, IOException { + FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteCidRefsPid"); + FileHashStoreUtility.ensureNotNull(absCidRefsPath, "absCidRefsPath", "deleteCidRefsPid"); + + try (FileChannel channel = FileChannel.open( + absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE + ); FileLock ignored = channel.lock()) { + // Read all lines into a List + List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); + lines.remove(pid); + // This deletes process is atomic, so we first write the updated content + // into a temporary file before overwriting it. + File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + Path tmpFilePath = tmpFile.toPath(); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absCidRefsPath.toFile(), "refs"); + logFileHashStore.debug( + "FileHashStore.deleteCidRefsPid - Pid: " + pid + " removed from cid refs file: " + + absCidRefsPath + ); - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - // Check to see if cid refs file exists - if (!Files.exists(absCidRefsPath)) { - String errMsg = - "FileHashStore.deleteCidRefsPid - Cid refs file does not exist for cid: " + cid - + " with address" + absCidRefsPath; + } catch (IOException ioe) { + String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid + + "from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe + .getMessage(); logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - - } else { - if (isPidInCidRefsFile(pid, absCidRefsPath)) { - try (FileChannel channel = FileChannel.open( - absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock ignored = channel.lock()) { - // Read all lines into a List - List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); - lines.remove(pid); - // This deletes process is atomic, so we first write the updated content - // into a temporary file before overwriting it. - File tmpFile = FileHashStoreUtility.generateTmpFile( - "tmp", REFS_TMP_FILE_DIRECTORY - ); - Path tmpFilePath = tmpFile.toPath(); - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absCidRefsPath.toFile(), "refs"); - logFileHashStore.debug( - "FileHashStore.deleteCidRefsPid - Pid: " + pid - + " removed from cid refs file: " + absCidRefsPath - ); - - } catch (IOException ioe) { - String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid - + "from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - // Perform clean up on cid refs file - if it is empty, delete it - if (Files.size(absCidRefsPath) == 0) { - Files.delete(absCidRefsPath); - } - - } else { - String errMsg = "FileHashStore.deleteCidRefsPid - pid: " + pid - + " not found in cid refs file: " + absCidRefsPath; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } + throw new IOException(errMsg); + } + // Perform clean up on cid refs file - if it is empty, delete it + if (Files.size(absCidRefsPath) == 0) { + Files.delete(absCidRefsPath); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 0a8df3a4..acc0b363 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -30,6 +30,7 @@ import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -1322,6 +1323,36 @@ public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exceptio } } + /** + * Confirm that an exception is thrown when called to delete an object that exists + * and has a cid refs file, but does not have the expected pid to delete. + * + * @throws Exception + */ + @Test + public void deleteObject_pidNotFoundInCidRefsFile() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String pidExtra = "dou.test" + pid; + String cid = objInfo.getId(); + fileHashStore.tagObject(pidExtra, cid); + + // Manually remove the pid + Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + fileHashStore.deleteCidRefsPid(pidExtra, absPathCidRefsPath); + + assertThrows( + PidNotFoundInCidRefsFileException.class, () -> fileHashStore.deleteObject(pidExtra) + ); + } + } + /** * Confirm that deleteObject throws exception when associated pid obj not found */ diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index ed0b7702..b59f1272 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -317,37 +317,10 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - fileHashStore.deleteCidRefsPid(pid, cid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertFalse(fileHashStore.isPidInCidRefsFile(pid, cidRefsFilePath)); - } - - /** - * Check that deleteCidRefsPid throws exception when there is no file to delete the pid from - */ - @Test - public void deleteCidRefsPid_missingCidRefsFile() { - String pid = "dou.test.1"; - String cid = "abc123456789"; - - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.deleteCidRefsPid(pid, cid); - }); - } + fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); - /** - * Check that deleteCidRefsPid throws exception when there is no file to delete the pid from - */ - @Test - public void deleteCidRefsPid_pidNotFoundInCidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.deleteCidRefsPid("bad.pid", cid); - }); + assertFalse(fileHashStore.isPidInCidRefsFile(pid, cidRefsFilePath)); } /** @@ -360,11 +333,11 @@ public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { fileHashStore.tagObject(pid, cid); String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); + Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pid, cid); + fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); fileHashStore.deleteCidRefsFile(cid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(cidRefsFilePath)); } From ce85fed3554eb252cd20c747f7e45a748ee60798 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 14:36:13 -0800 Subject: [PATCH 167/553] Delete redundant exception class 'PidObjectExistsException' and revise affected tests and classes, clean up code --- .../java/org/dataone/hashstore/HashStore.java | 46 +++++++++---------- .../dataone/hashstore/HashStoreClient.java | 8 ++-- .../exceptions/PidObjectExistsException.java | 13 ------ .../filehashstore/FileHashStore.java | 31 ++++++------- .../FileHashStoreInterfaceTest.java | 30 ++++++++---- 5 files changed, 65 insertions(+), 63 deletions(-) delete mode 100644 src/main/java/org/dataone/hashstore/exceptions/PidObjectExistsException.java diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index eb6307b0..7bbb3e20 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -4,8 +4,6 @@ import java.io.IOException; import java.io.InputStream; import java.security.NoSuchAlgorithmException; - -import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; /** @@ -55,46 +53,48 @@ public interface HashStore { * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing * @return ObjectMetadata object encapsulating file information - * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is invalid - * @throws IOException I/O Error when writing file, generating checksums and/or - * moving file - * @throws PidObjectExistsException When duplicate pid object is found - * @throws RuntimeException Thrown when there is an issue with permissions, illegal - * arguments (ex. empty pid) or null pointers - * @throws InterruptedException When tagging pid and cid process is interrupted + * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is + * invalid + * @throws IOException I/O Error when writing file, generating checksums + * and/or moving file + * @throws PidRefsFileExistsException If a pid refs file already exists, meaning the pid is + * already referencing a file. + * @throws RuntimeException Thrown when there is an issue with permissions, + * illegal arguments (ex. empty pid) or null pointers + * @throws InterruptedException When tagging pid and cid process is interrupted */ ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, - InterruptedException; + ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, + RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, - PidObjectExistsException, RuntimeException, InterruptedException; + PidRefsFileExistsException, RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, - InterruptedException; + ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, + RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidObjectExistsException, + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ ObjectMetadata storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidObjectExistsException, + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; /** @@ -104,13 +104,13 @@ ObjectMetadata storeObject(InputStream object, String pid, long objSize) * * @param pid Authority-based identifier * @param cid Content-identifier (hash identifier) - * @throws IOException Failure to create tmp file - * @throws PidRefsFileExistsException When pid refs file already exists - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address - * does not exist - * @throws FileNotFoundException If refs file is missing during verification - * @throws InterruptedException When tagObject is waiting to execute but is - * interrupted + * @throws IOException Failure to create tmp file + * @throws PidRefsFileExistsException When pid refs file already exists + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address + * does not exist + * @throws FileNotFoundException If refs file is missing during verification + * @throws InterruptedException When tagObject is waiting to execute but is + * interrupted */ void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, InterruptedException; diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 98dd478b..ed66196f 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -29,7 +29,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.dataone.hashstore.exceptions.HashStoreFactoryException; -import org.dataone.hashstore.exceptions.PidObjectExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -558,10 +558,12 @@ private static void storeObjsWithChecksumFromDb(List> result System.out.println("Storing object for guid: " + guid); hashStore.storeObject(objStream, guid, checksum, algorithm); - } catch (PidObjectExistsException poee) { + } catch (PidRefsFileExistsException poee) { String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); try { - logExceptionToFile(guid, errMsg, "java/store_obj_errors/pidobjectexists"); + logExceptionToFile( + guid, errMsg, "java/store_obj_errors/PidRefsFileExistsException" + ); } catch (Exception e) { e.printStackTrace(); } diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidObjectExistsException.java b/src/main/java/org/dataone/hashstore/exceptions/PidObjectExistsException.java deleted file mode 100644 index a0f7b7f7..00000000 --- a/src/main/java/org/dataone/hashstore/exceptions/PidObjectExistsException.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.dataone.hashstore.exceptions; - -import java.io.IOException; - -/** - * Custom exception class for FileHashStore pidObjects - */ -public class PidObjectExistsException extends IOException { - public PidObjectExistsException(String message) { - super(message); - } - -} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4a17f030..df871182 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -37,7 +37,6 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.exceptions.PidObjectExistsException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; /** @@ -419,8 +418,8 @@ protected String buildHashStoreYamlString( public ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, - InterruptedException { + ) throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException, + PidRefsFileExistsException { logFileHashStore.debug( "FileHashStore.storeObject - Called to store object for pid: " + pid ); @@ -457,7 +456,7 @@ public ObjectMetadata storeObject( private ObjectMetadata syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, PidObjectExistsException, IOException, RuntimeException, + ) throws NoSuchAlgorithmException, PidRefsFileExistsException, IOException, RuntimeException, InterruptedException { // Lock pid for thread safety, transaction control and atomic writing // A pid can only be stored once and only once, subsequent calls will @@ -501,11 +500,11 @@ private ObjectMetadata syncPutObject( logFileHashStore.error(errMsg); throw nsae; - } catch (PidObjectExistsException poee) { + } catch (PidRefsFileExistsException prfee) { String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". PidObjectExistsException: " + poee.getMessage(); + + ". PidRefsFileExistsException: " + prfee.getMessage(); logFileHashStore.error(errMsg); - throw poee; + throw prfee; } catch (IOException ioe) { // Covers AtomicMoveNotSupportedException, FileNotFoundException @@ -538,7 +537,7 @@ private ObjectMetadata syncPutObject( */ @Override public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidObjectExistsException, RuntimeException { + IOException, PidRefsFileExistsException, RuntimeException { // 'putObject' is called directly to bypass the pid synchronization implemented to // efficiently handle duplicate object store requests. Since there is no pid, calling // 'storeObject' would unintentionally create a bottleneck for all requests without a @@ -557,7 +556,7 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce */ @Override public ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException { FileHashStoreUtility.ensureNotNull( additionalAlgorithm, "additionalAlgorithm", "storeObject" @@ -572,7 +571,7 @@ public ObjectMetadata storeObject(InputStream object, String pid, String additio @Override public ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException { FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); @@ -585,7 +584,7 @@ public ObjectMetadata storeObject( */ @Override public ObjectMetadata storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidObjectExistsException, RuntimeException, + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException { FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); @@ -971,7 +970,8 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, @Override public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException { + IOException, NoSuchAlgorithmException, InterruptedException, + PidNotFoundInCidRefsFileException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for pid: " + pid ); @@ -1171,7 +1171,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE * @throws SecurityException Insufficient permissions to read/access files or when * generating/writing to a file * @throws FileNotFoundException tmpFile not found during store - * @throws PidObjectExistsException Duplicate object in store exists + * @throws PidRefsFileExistsException If the given pid already references an object * @throws IllegalArgumentException When signature values are empty (checksum, pid, * etc.) * @throws NullPointerException Arguments are null for pid or object @@ -1181,7 +1181,7 @@ protected ObjectMetadata putObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, - PidObjectExistsException, IllegalArgumentException, NullPointerException, + PidRefsFileExistsException, IllegalArgumentException, NullPointerException, AtomicMoveNotSupportedException { logFileHashStore.debug("FileHashStore.putObject - Called to put object for pid: " + pid); @@ -1820,8 +1820,7 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO * @param absCidRefsPath Path to the cid refs file to remove the pid from * @throws IOException Unable to access cid refs file */ - protected void deleteCidRefsPid(String pid, Path absCidRefsPath) - throws NoSuchAlgorithmException, IOException { + protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOException { FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteCidRefsPid"); FileHashStoreUtility.ensureNotNull(absCidRefsPath, "absCidRefsPath", "deleteCidRefsPid"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index acc0b363..e2ba16ae 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -31,7 +31,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.exceptions.PidObjectExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -579,7 +579,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); Future future2 = executorService.submit(() -> { @@ -596,7 +598,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); Future future3 = executorService.submit(() -> { @@ -613,7 +617,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); Future future4 = executorService.submit(() -> { @@ -630,7 +636,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); Future future5 = executorService.submit(() -> { @@ -647,7 +655,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); @@ -692,7 +702,9 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); Future future2 = executorService.submit(() -> { @@ -709,7 +721,9 @@ public void storeObject_objectLockedIds_TwoThreads() throws Exception { } catch (Exception e) { System.out.println(e.getClass()); e.printStackTrace(); - assertTrue(e instanceof RuntimeException || e instanceof PidObjectExistsException); + assertTrue( + e instanceof RuntimeException || e instanceof PidRefsFileExistsException + ); } }); From 48a26eb3cef927d98bf844e78b76e5e42b631daf Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 14:43:02 -0800 Subject: [PATCH 168/553] Add 'findobject' api option to HashStoreClient and new junit test --- .../dataone/hashstore/HashStoreClient.java | 11 +++++ .../hashstore/HashStoreClientTest.java | 40 ++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index ed66196f..9c66c63e 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -141,6 +141,13 @@ public static void main(String[] args) throws Exception { String hexDigest = hashStore.getHexDigest(pid, algo); System.out.println(hexDigest); + } else if (cmd.hasOption("findobject")) { + String pid = cmd.getOptionValue("pid"); + FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + + String cid = hashStore.findObject(pid); + System.out.println(cid); + } else if (cmd.hasOption("storeobject")) { System.out.println("Storing object"); String pid = cmd.getOptionValue("pid"); @@ -274,6 +281,10 @@ private static Options addHashStoreClientOptions() { "getchecksum", "client_getchecksum", false, "Flag to get the hex digest of a data object in a HashStore." ); + options.addOption( + "findobject", "client_findobject", false, + "Flag to get the hex digest of a data object in a HashStore." + ); options.addOption( "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore." ); diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 3cd0c1b4..40bd443a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -387,7 +387,7 @@ public void client_deleteMetadata() throws Exception { } /** - * Test hashStore client returns hex digest of object. + * Test hashStore client calculates the hex digest of object. */ @Test public void client_getHexDigest() throws Exception { @@ -427,4 +427,42 @@ public void client_getHexDigest() throws Exception { assertEquals(testDataChecksum, pidStdOut.trim()); } } + + /** + * Test hashStore client returns the content identifier (cid) of an object + */ + @Test + public void client_findObject() throws Exception { + for (String pid : testData.pidList) { + // Redirect stdout to capture output + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(outputStream); + PrintStream old = System.out; + System.setOut(ps); + + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + InputStream dataStream = Files.newInputStream(testDataFile); + hashStore.storeObject(dataStream, pid, null, null, null, -1); + + // Call client + String optFindObject = "-findobject"; + String optStore = "-store"; + String optStorePath = hsProperties.getProperty("storePath"); + String optPid = "-pid"; + String optPidValue = pid; + String[] args = {optFindObject, optStore, optStorePath, optPid, optPidValue}; + HashStoreClient.main(args); + + String contentIdentifier = testData.pidData.get(pid).get("sha256"); + + // Put things back + System.out.flush(); + System.setOut(old); + + // Confirm correct content identifier has been saved + String pidStdOut = outputStream.toString(); + assertEquals(contentIdentifier, pidStdOut.trim()); + } + } } From a431e724b9d9995aac8ad66f68312ff11c7832ed Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 15:13:44 -0800 Subject: [PATCH 169/553] Add option 'gbskip' in HashStoreClient when testing in knbvm to control what size of files to skip --- .../dataone/hashstore/HashStoreClient.java | 74 ++++++++++++------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 9c66c63e..69827593 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -124,13 +124,14 @@ public static void main(String[] args) throws Exception { String objType = cmd.getOptionValue("stype"); String originDirectory = cmd.getOptionValue("sdir"); String numObjects = cmd.getOptionValue("nobj"); + String sizeOfFilesToSkip = cmd.getOptionValue("gbskip"); FileHashStoreUtility.ensureNotNull(objType, "-stype", "HashStoreClient"); FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir", "HashStoreClient"); FileHashStoreUtility.ensureNotNull( action, "-sts, -rav, -dfs", "HashStoreClient" ); - testWithKnbvm(action, objType, originDirectory, numObjects); + testWithKnbvm(action, objType, originDirectory, numObjects, sizeOfFilesToSkip); } else if (cmd.hasOption("getchecksum")) { String pid = cmd.getOptionValue("pid"); @@ -330,6 +331,9 @@ private static Options addHashStoreClientOptions() { "nobj", "numberofobj", true, "(knbvm) Option to specify number of objects to retrieve from a Metacat db." ); + options.addOption( + "gbskip", "gbsizetoskip", true, "(knbvm) Option to specify the size of objects to skip." + ); options.addOption( "sdir", "storedirectory", true, "(knbvm) Option to specify the directory of objects to convert." @@ -446,14 +450,17 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE /** * Entry point for working with test data found in knbvm (test.arcticdata.io) * - * @param actionFlag String representing a knbvm test-related method to call. - * @param objType "data" (objects) or "documents" (metadata). - * @param numObjects Number of rows to retrieve from metacat db, - * if null, will retrieve all rows. + * @param actionFlag String representing a knbvm test-related method to call. + * @param objType "data" (objects) or "documents" (metadata). + * @param originDir Directory path of given objType + * @param numObjects Number of rows to retrieve from metacat db, + * if null, will retrieve all rows. + * @param sizeOfFilesToSkip Size of files in GB to skip * @throws IOException Related to accessing config files or objects */ private static void testWithKnbvm( - String actionFlag, String objType, String originDir, String numObjects + String actionFlag, String objType, String originDir, String numObjects, + String sizeOfFilesToSkip ) throws IOException { // Load metacat db yaml // Note: In order to test with knbvm, you must manually create a `pgdb.yaml` file with the @@ -475,15 +482,22 @@ private static void testWithKnbvm( try { System.out.println("Connecting to metacat db."); + if (!objType.equals("object")) { + if (!objType.equals("metadata")) { + String errMsg = "HashStoreClient - objType must be 'object' or 'metadata'"; + throw new IllegalArgumentException(errMsg); + } + } + // Setup metacat db access Class.forName("org.postgresql.Driver"); // Force driver to register itself Connection connection = DriverManager.getConnection(url, user, password); Statement statement = connection.createStatement(); String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev," + " systemmetadata.object_format, systemmetadata.checksum," - + " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata" - + " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid" - + sqlLimitQuery + ";"; + + " systemmetadata.checksum_algorithm, systemmetadata.size FROM identifier" + + " INNER JOIN systemmetadata ON identifier.guid = systemmetadata.guid" + + " ORDER BY identifier.guid" + sqlLimitQuery + ";"; ResultSet resultSet = statement.executeQuery(sqlQuery); // For each row, get guid, docid, rev, checksum and checksum_algorithm @@ -497,26 +511,34 @@ private static void testWithKnbvm( String checksumAlgorithm = resultSet.getString("checksum_algorithm"); String formattedChecksumAlgo = formatAlgo(checksumAlgorithm); String formatId = resultSet.getString("object_format"); - - if (!objType.equals("object")) { - if (!objType.equals("metadata")) { - String errMsg = "HashStoreClient - objType must be 'object' or 'metadata'"; - throw new IllegalArgumentException(errMsg); + long setItemSize = resultSet.getLong("size"); + + boolean skipFile = false; + if (sizeOfFilesToSkip != null) { + // Calculate the size of requested gb to skip in bytes + long gbFilesToSkip = Integer.getInteger(sizeOfFilesToSkip) * (1024L * 1024 + * 1024); + if (setItemSize > gbFilesToSkip) { + skipFile = true; } } - Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev); - if (Files.exists(setItemFilePath)) { - System.out.println( - "File exists (" + setItemFilePath + ")! Adding to resultObjList." - ); - Map resultObj = new HashMap<>(); - resultObj.put("pid", guid); - resultObj.put("algorithm", formattedChecksumAlgo); - resultObj.put("checksum", checksum); - resultObj.put("path", setItemFilePath.toString()); - resultObj.put("namespace", formatId); - resultObjList.add(resultObj); + if (skipFile) { + continue; + } else { + Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev); + if (Files.exists(setItemFilePath)) { + System.out.println( + "File exists (" + setItemFilePath + ")! Adding to resultObjList." + ); + Map resultObj = new HashMap<>(); + resultObj.put("pid", guid); + resultObj.put("algorithm", formattedChecksumAlgo); + resultObj.put("checksum", checksum); + resultObj.put("path", setItemFilePath.toString()); + resultObj.put("namespace", formatId); + resultObjList.add(resultObj); + } } } From 449f0b9525f58766f1ad388df3bf22c59ac96400 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 15:15:16 -0800 Subject: [PATCH 170/553] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index a0d5493e..3cac7b18 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,9 @@ $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreCl # Get the checksum of a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 +# Find an object in HashStore (returns its content identifer if it exists) +$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -findobject -pid testpid1 + # Store a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 From 1f88f68ba7f14a7acb6b86025a022167d24026ba Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 15:25:20 -0800 Subject: [PATCH 171/553] Clean up logging statements --- .../hashstore/filehashstore/FileHashStore.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index df871182..f9bcc1bd 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1020,7 +1020,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou // Throw exception if the cid refs file doesn't exist String errMsg = "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid - + " with address" + absCidRefsPath; + + " with address: " + absCidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); @@ -1043,7 +1043,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou Files.delete(objRealPath); } else { String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (references exist for the cid). Skipping object deletion. "; + + " is not empty (references exist for the cid). Skipping object deletion."; logFileHashStore.warn(warnMsg); } logFileHashStore.info( @@ -1289,7 +1289,7 @@ private void validateTmpObject( } catch (Exception ge) { String errMsg = "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize:" + + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile; logFileHashStore.error(errMsg); throw new IOException(errMsg); @@ -1297,7 +1297,7 @@ private void validateTmpObject( String errMsg = "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize:" + + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " + storedObjFileSize + ". Deleting tmpFile: " + tmpFile; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); @@ -1798,7 +1798,7 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO if (!Files.exists(absPidRefsPath)) { String errMsg = "FileHashStore.deletePidRefsFile - File refs file does not exist for pid: " + pid - + " with address" + absPidRefsPath; + + " with address: " + absPidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); @@ -1843,7 +1843,7 @@ protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOExcept } catch (IOException ioe) { String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid - + "from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe + + " from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe .getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); @@ -1870,7 +1870,7 @@ protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IO if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.deleteCidRefsFile - Cid refs file does not exist for cid: " + cid - + " with address" + absCidRefsPath; + + " with address: " + absCidRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); From be0f4ffd9a953edf72dca8aa3768cca8103efc66 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jan 2024 15:26:02 -0800 Subject: [PATCH 172/553] Revise if statement in 'HashStoreClient' to determine whether an object from knbvm's metacatdb should be skipped or not --- src/main/java/org/dataone/hashstore/HashStoreClient.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 69827593..5b1c34df 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -523,9 +523,7 @@ private static void testWithKnbvm( } } - if (skipFile) { - continue; - } else { + if (!skipFile) { Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev); if (Files.exists(setItemFilePath)) { System.out.println( From 6b651d5242694c81e068e6a3dfe14e6c8d40de85 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 18 Jan 2024 10:39:41 -0800 Subject: [PATCH 173/553] Remove redundant method 'deleteCidRefsFile' and clean up code --- .../filehashstore/FileHashStore.java | 49 ++----------------- .../FileHashStoreReferencesTest.java | 8 +-- 2 files changed, 10 insertions(+), 47 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f9bcc1bd..4aace90d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1033,12 +1033,14 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou } else { // Proceed to delete the reference files and object - // If there are no more reference, 'deleteCidRefsPid()' will also delete the cid reference file + // Remove pid from cid refs file deleteCidRefsPid(pid, absCidRefsPath); // Delete pid reference file deletePidRefsFile(pid); - // Proceed to delete object only if cid refs file is no longer present - if (!Files.exists(absCidRefsPath)) { + // Delete obj and cid refs file only if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Delete empty cid refs file + Files.delete(absCidRefsPath); // Delete actual object Files.delete(objRealPath); } else { @@ -1848,47 +1850,6 @@ protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOExcept logFileHashStore.error(errMsg); throw new IOException(errMsg); } - // Perform clean up on cid refs file - if it is empty, delete it - if (Files.size(absCidRefsPath) == 0) { - Files.delete(absCidRefsPath); - } - } - - - /** - * Deletes a cid refs file only if it is empty. - * - * @param cid Content identifier - * @throws IOException Unable to delete object cid refs file - */ - protected void deleteCidRefsFile(String cid) throws NoSuchAlgorithmException, IOException { - FileHashStoreUtility.ensureNotNull(cid, "pid", "deleteCidRefsFile"); - FileHashStoreUtility.checkForEmptyString(cid, "pid", "deleteCidRefsFile"); - - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - // Check to see if cid refs file exists - if (!Files.exists(absCidRefsPath)) { - String errMsg = - "FileHashStore.deleteCidRefsFile - Cid refs file does not exist for cid: " + cid - + " with address: " + absCidRefsPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - - } else { - // A cid refs file is only deleted if it is empty. Client must remove pid(s) first - if (Files.size(absCidRefsPath) == 0) { - Files.delete(absCidRefsPath); - logFileHashStore.debug( - "FileHashStore.deleteCidRefsFile - Deleted cid refs file: " + absCidRefsPath - ); - - } else { - String errMsg = - "FileHashStore.deleteCidRefsFile - Unable to delete cid refs file, it is not empty: " - + absCidRefsPath; - logFileHashStore.warn(errMsg); - } - } } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index b59f1272..0549b01f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -324,10 +324,11 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { } /** - * Check that deleteCidRefsFile still exists if called and cid refs file is not empty + * Check that deleteCidRefsPid removes all pids as expected and leaves an + * empty file. */ @Test - public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { + public void deleteCidRefsPid_allPidsRemoved() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); @@ -336,9 +337,10 @@ public void deleteCidRefsFile_cidRefsFileNotEmpty() throws Exception { Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); - fileHashStore.deleteCidRefsFile(cid); + fileHashStore.deleteCidRefsPid(pidAdditional, cidRefsFilePath); assertTrue(Files.exists(cidRefsFilePath)); + assertTrue(Files.size(cidRefsFilePath) == 0); } /** From 0be692c4c5cdea81af5136a83f575f0e6157fa53 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 18 Jan 2024 10:49:58 -0800 Subject: [PATCH 174/553] Move 'getPidHexDigest' method to FileHashStoreUtility class and clean up code --- .../filehashstore/FileHashStore.java | 42 ++++++------------- .../filehashstore/FileHashStoreUtility.java | 24 +++++++++++ .../FileHashStoreProtectedTest.java | 25 ----------- 3 files changed, 37 insertions(+), 54 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4aace90d..62db0608 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1363,8 +1363,10 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { - String errMsg = "Algorithm not supported: " + algorithm + ". Supported algorithms: " - + Arrays.toString(SUPPORTED_HASH_ALGORITHMS); + String errMsg = "FileHashStore - validateAlgorithm: Algorithm not supported: " + + algorithm + ". Supported algorithms: " + Arrays.toString( + SUPPORTED_HASH_ALGORITHMS + ); logFileHashStore.error(errMsg); throw new NoSuchAlgorithmException(errMsg); } @@ -1408,30 +1410,6 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor return requestValidation; } - /** - * Given a string and supported algorithm returns the hex digest - * - * @param pid authority based identifier or persistent identifier - * @param algorithm string value (ex. SHA-256) - * @return Hex digest of the given string in lower-case - * @throws IllegalArgumentException String or algorithm cannot be null or empty - * @throws NoSuchAlgorithmException Algorithm not supported - */ - protected String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, - IllegalArgumentException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); - validateAlgorithm(algorithm); - - MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); - byte[] bytes = pid.getBytes(StandardCharsets.UTF_8); - stringMessageDigest.update(bytes); - // stringDigest - return DatatypeConverter.printHexBinary(stringMessageDigest.digest()).toLowerCase(); - } - /** * Write the input stream into a given file (tmpFile) and return a HashMap consisting of * algorithms and their respective hex digests. If an additional algorithm is supplied and @@ -1888,7 +1866,9 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) } // Get permanent address for the given metadata document - String metadataCid = getPidHexDigest(pid + checkedFormatId, OBJECT_STORE_ALGORITHM); + String metadataCid = FileHashStoreUtility.getPidHexDigest( + pid + checkedFormatId, OBJECT_STORE_ALGORITHM + ); Path metadataCidPath = getRealPath(pid, "metadata", checkedFormatId); // Store metadata to tmpMetadataFile @@ -1969,7 +1949,9 @@ protected Path getRealPath(String abId, String entity, String formatId) realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); } else if (entity.equalsIgnoreCase("metadata")) { - String objectCid = getPidHexDigest(abId + formatId, OBJECT_STORE_ALGORITHM); + String objectCid = FileHashStoreUtility.getPidHexDigest( + abId + formatId, OBJECT_STORE_ALGORITHM + ); String objShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); @@ -1977,7 +1959,9 @@ protected Path getRealPath(String abId, String entity, String formatId) } else if (entity.equalsIgnoreCase("refs")) { if (formatId.equalsIgnoreCase("pid")) { - String pidRefId = getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); + String pidRefId = FileHashStoreUtility.getPidHexDigest( + abId, OBJECT_STORE_ALGORITHM + ); String pidShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidRefId ); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 4504778f..6b7ae759 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -3,6 +3,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.security.MessageDigest; @@ -71,6 +72,29 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); } + /** + * Given a string and supported algorithm returns the hex digest + * + * @param pid authority based identifier or persistent identifier + * @param algorithm string value (ex. SHA-256) + * @return Hex digest of the given string in lower-case + * @throws IllegalArgumentException String or algorithm cannot be null or empty + * @throws NoSuchAlgorithmException Algorithm not supported + */ + public static String getPidHexDigest(String pid, String algorithm) + throws NoSuchAlgorithmException, IllegalArgumentException { + FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); + + MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); + byte[] bytes = pid.getBytes(StandardCharsets.UTF_8); + stringMessageDigest.update(bytes); + // stringDigest + return DatatypeConverter.printHexBinary(stringMessageDigest.digest()).toLowerCase(); + } + /** * Checks whether a directory is empty or contains files. If a file is found, it returns true. * diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 8203e506..68e74e8d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -173,31 +173,6 @@ public void getHierarchicalPathString() { assertEquals(shardedPath, shardedPathExpected); } - /** - * Check getPidHexDigest calculates correct hex digest value - */ - @Test - public void getPidHexDigest() throws Exception { - for (String pid : testData.pidList) { - String abIdDigest = fileHashStore.getPidHexDigest(pid, "SHA-256"); - String abIdTestData = testData.pidData.get(pid).get("object_cid"); - assertEquals(abIdDigest, abIdTestData); - } - } - - /** - * Check that getPidHexDigest throws NoSuchAlgorithmException - */ - @Test - public void getPidHexDigest_badAlgorithm() { - for (String pid : testData.pidList) { - assertThrows( - NoSuchAlgorithmException.class, () -> fileHashStore.getPidHexDigest(pid, "SM2") - ); - - } - } - /** * Verify that putObject returns correct id */ From 913e90a3267aec26ca23d37e364ef220c96c85af Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 18 Jan 2024 11:50:49 -0800 Subject: [PATCH 175/553] Update javadocs --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 62db0608..0ff72eb0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -545,9 +545,9 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // the object (ex. form data including the pid, checksum, checksum algorithm, etc.) is // unavailable. // - // Note: This method does not tag the object to make it discoverable, so the client must - // call 'verifyObject' and 'tagObject' separately to ensure that the object stored - // is what is expected and is discoverable. + // Note: This method does not tag the object to make it discoverable, so the client can + // call 'verifyObject' (optional) to check that the object is valid, and 'tagObject' + // (required) to create the reference files needed to associate the respective pids/cids. return putObject(object, "HashStoreNoPid", null, null, null, -1); } From 87bfa4f79f72e5b98066cfe13de07b1c4740bf13 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 18 Jan 2024 12:36:47 -0800 Subject: [PATCH 176/553] Add '.close()' statement to finally block on given stream in 'writeToTmpFileAndGenerateChecksums' --- .../filehashstore/FileHashStore.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0ff72eb0..87ead32b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -720,11 +720,21 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio Path absPidRefsPath = getRealPath(pid, "refs", "pid"); if (Files.exists(absPidRefsPath)) { - String cidFromPidRefsFile = new String(Files.readAllBytes(absPidRefsPath)); + String cid = new String(Files.readAllBytes(absPidRefsPath)); logFileHashStore.info( - "FileHashStore.findObject - Cid (" + cidFromPidRefsFile + ") found for pid:" + pid + "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid ); - return cidFromPidRefsFile; + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + if (!Files.exists(absCidRefsPath)) { + // Throw exception if the cid refs file doesn't exist + String errMsg = + "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid + + " with address: " + absCidRefsPath + ", but pid refs file exists."; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); + } else { + return cid; + } } else { String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid @@ -1016,14 +1026,6 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); - } else if (!Files.exists(absCidRefsPath)) { - // Throw exception if the cid refs file doesn't exist - String errMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid - + " with address: " + absCidRefsPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - } else if (!pidFoundInCidRefFiles) { // Throw exception if the given pid is not in the expected cid refs file String errMsg = "FileHashStore.deleteObject - pid: " + pid @@ -1494,6 +1496,7 @@ protected Map writeToTmpFileAndGenerateChecksums( throw ioe; } finally { + dataStream.close(); os.flush(); os.close(); } From 762cfa174517d277d2765403bd7a3ade2ce6062e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 18 Jan 2024 12:38:01 -0800 Subject: [PATCH 177/553] Clean up/revise test classes '...InterfaceTest', '...ProtectedTest', and 'ReferencesTest' --- .../FileHashStoreInterfaceTest.java | 265 +++++++++--------- .../FileHashStoreProtectedTest.java | 62 ++++ .../FileHashStoreReferencesTest.java | 25 -- 3 files changed, 191 insertions(+), 161 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e2ba16ae..f1c4d316 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -31,7 +31,6 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -39,7 +38,9 @@ /** - * Test class for FileHashStore HashStoreInterface override methods + * Test class for FileHashStore HashStore Interface methods. + * + * Note: `tagObject` & `verifyObject` tests can be found in the `FileHashStoreReferences` class */ public class FileHashStoreInterfaceTest { private FileHashStore fileHashStore; @@ -459,6 +460,7 @@ public void storeObject_duplicate() throws Exception { String cid = objInfo.getId(); Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertTrue(fileHashStore.isPidInCidRefsFile(pid, absCidRefsPath)); assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); } } @@ -466,7 +468,8 @@ public void storeObject_duplicate() throws Exception { /** * Test that storeObject successfully stores a 1GB file * - * Note, a 4GB successfully stored in approximately 1m30s + * Note 1: a 4GB successfully stored in approximately 1m30s + * Note 2: Successfully stores 250GB file confirmed from knbvm */ @Test public void storeObject_largeSparseFile() throws Exception { @@ -546,14 +549,9 @@ public void storeObject_interruptProcess() throws Exception { * will encounter an `ExecutionException`. The thread that does not encounter an exception will * store the given object, and verifies that the object is stored successfully. * - * The threads that run into exceptions will encounter a `RunTimeException` or a - * `PidObjectExistsException`. If a call is made to 'storeObject' for a pid that is already in - * progress of being stored, a `RunTimeException` will be thrown. - * - * If a call is made to 'storeObject' for a pid that has been stored, the thread will encounter - * a `PidObjectExistsException` - since `putObject` checks for the existence of a given data - * object before it attempts to generate a temp file (write to it, generate checksums, etc.). - * + * The threads that run into exceptions will encounter a `RunTimeException` since the expected + * object to store is already in progress (thrown by `syncPutObject` which coordinates + * `store_object` requests with a pid). */ @Test public void storeObject_objectLockedIds_FiveThreads() throws Exception { @@ -562,102 +560,106 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { Path testDataFile = testData.getTestFile(pid); // Create a thread pool with 3 threads - ExecutorService executorService = Executors.newFixedThreadPool(3); + ExecutorService executorService = Executors.newFixedThreadPool(5); - // Submit 3 threads, each calling storeObject + // Submit 5 futures to the thread pool, each calling storeObject Future future1 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 + dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); + String cid = objInfo.getId(); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(pidRefsPath)); + assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { + System.out.println("Start Thread 1 Exception:"); System.out.println(e.getClass()); e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); + System.out.println("End Thread 1 Exception\n"); + assertTrue(e instanceof RuntimeException); } }); Future future2 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 + dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); + String cid = objInfo.getId(); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(pidRefsPath)); + assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println(e.getClass()); - e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); + assertTrue(e instanceof RuntimeException); } }); Future future3 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 + dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); + String cid = objInfo.getId(); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(pidRefsPath)); + assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println(e.getClass()); - e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); + assertTrue(e instanceof RuntimeException); } }); Future future4 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 + dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); + String cid = objInfo.getId(); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(pidRefsPath)); + assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println(e.getClass()); - e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); + assertTrue(e instanceof RuntimeException); } }); Future future5 = executorService.submit(() -> { try { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 + dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); + String cid = objInfo.getId(); + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(pidRefsPath)); + assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println(e.getClass()); - e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); + assertTrue(e instanceof RuntimeException); } }); @@ -672,69 +674,6 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { executorService.awaitTermination(1, TimeUnit.MINUTES); } - /** - * Tests that the `storeObject` method can store an object successfully with two threads. This - * test uses two futures (threads) that run concurrently, one of which will encounter an - * `ExecutionException`. The thread that does not encounter an exception will store the given - * object, and verifies that the object is stored successfully. - */ - @Test - public void storeObject_objectLockedIds_TwoThreads() throws Exception { - // Get single test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); - - // Create a thread pool with 3 threads - ExecutorService executorService = Executors.newFixedThreadPool(3); - - // Submit 3 threads, each calling storeObject - Future future1 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 - ); - if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); - assertTrue(Files.exists(objCidAbsPath)); - } - } catch (Exception e) { - System.out.println(e.getClass()); - e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); - } - }); - Future future2 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 0 - ); - if (objInfo != null) { - String objCid = objInfo.getId(); - Path objCidAbsPath = fileHashStore.getRealPath(objCid, "object", null); - assertTrue(Files.exists(objCidAbsPath)); - } - } catch (Exception e) { - System.out.println(e.getClass()); - e.printStackTrace(); - assertTrue( - e instanceof RuntimeException || e instanceof PidRefsFileExistsException - ); - } - }); - - // Wait for all tasks to complete and check results - // .get() on the future ensures that all tasks complete before the test ends - future1.get(); - future2.get(); - executorService.shutdown(); - executorService.awaitTermination(1, TimeUnit.MINUTES); - } - /** * Test storeMetadata stores metadata as expected */ @@ -894,9 +833,10 @@ public void storeMetadata_pidEmptySpaces() { /** * Tests that the `storeMetadata()` method can store metadata successfully with multiple threads - * (3). This test uses three futures (threads) that run concurrently, each of which will have to - * wait for the given `pid` to be released from metadataLockedIds before proceeding to store the - * given metadata content from its `storeMetadata()` request. + * (3) and does not throw any exceptions. This test uses three futures (threads) that run + * concurrently, each of which will have to wait for the given `pid` to be released from + * metadataLockedIds before proceeding to store the given metadata content from its + * `storeMetadata()` request. * * All requests to store the same metadata will be executed, and the existing metadata file will * be overwritten by each thread. No exceptions should be encountered during these tests. @@ -983,17 +923,28 @@ public void retrieveObject() throws Exception { // Retrieve object InputStream objectCidInputStream = fileHashStore.retrieveObject(pid); assertNotNull(objectCidInputStream); + objectCidInputStream.close(); } } + /** + * Check that retrieveObject throws exception when there is no object + * associated with a given pid + */ + @Test + public void retrieveObject_pidDoesNotExist() { + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.retrieveObject("pid.whose.object.does.not.exist"); + }); + } + /** * Check that retrieveObject throws exception when pid is null */ @Test public void retrieveObject_pidNull() { assertThrows(IllegalArgumentException.class, () -> { - InputStream pidInputStream = fileHashStore.retrieveObject(null); - pidInputStream.close(); + fileHashStore.retrieveObject(null); }); } @@ -1003,8 +954,7 @@ public void retrieveObject_pidNull() { @Test public void retrieveObject_pidEmpty() { assertThrows(IllegalArgumentException.class, () -> { - InputStream pidInputStream = fileHashStore.retrieveObject(""); - pidInputStream.close(); + fileHashStore.retrieveObject(""); }); } @@ -1014,8 +964,7 @@ public void retrieveObject_pidEmpty() { @Test public void retrieveObject_pidEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { - InputStream pidInputStream = fileHashStore.retrieveObject(" "); - pidInputStream.close(); + fileHashStore.retrieveObject(" "); }); } @@ -1025,8 +974,7 @@ public void retrieveObject_pidEmptySpaces() { @Test public void retrieveObject_pidNotFound() { assertThrows(FileNotFoundException.class, () -> { - InputStream pidInputStream = fileHashStore.retrieveObject("dou.2023.hs.1"); - pidInputStream.close(); + fileHashStore.retrieveObject("dou.2023.hs.1"); }); } @@ -1066,15 +1014,15 @@ public void retrieveObject_verifyContent() throws Exception { ioe.printStackTrace(); throw ioe; + } finally { + // Close stream + objectCidInputStream.close(); } // Get hex digest String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); assertEquals(sha256Digest, sha256DigestFromTestData); - - // Close stream - objectCidInputStream.close(); } } @@ -1241,6 +1189,9 @@ public void retrieveMetadata_verifyContent() throws Exception { ioe.printStackTrace(); throw ioe; + } finally { + // Close stream + metadataCidInputStream.close(); } // Get hex digest @@ -1250,17 +1201,14 @@ public void retrieveMetadata_verifyContent() throws Exception { "metadata_sha256" ); assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); - - // Close stream - metadataCidInputStream.close(); } } /** - * Confirm that deleteObject deletes object and empty subdirectories + * Confirm that deleteObject deletes object */ @Test - public void deleteObject() throws Exception { + public void deleteObject_objectDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1309,8 +1257,9 @@ public void deleteObject_referencesDeleted() throws Exception { } /** - * Confirm that cid refs file and object still exists when an object has more than one reference - * and client calls 'deleteObject' on a pid that references an object that still has references. + * Confirm that cid refs file and object do not get deleted when an object has more than one + * reference (when the client calls 'deleteObject' on a pid that references an object that still + * has references). */ @Test public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exception { @@ -1331,8 +1280,8 @@ public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exceptio Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); fileHashStore.deleteObject(pid); - assertTrue(Files.exists(objCidAbsPath)); assertFalse(Files.exists(absPathPidRefsPath)); + assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(absPathCidRefsPath)); } } @@ -1625,4 +1574,48 @@ public void getHexDigest_badAlgo() { }); } } + + /** + * Confirm expected cid is returned + */ + @Test + public void findObject_cid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String cidRetrieved = fileHashStore.findObject(pid); + + assertEquals(cid, cidRetrieved); + } + + /** + * Confirm that findObject throws an exception when pid refs file found + * but cid refs file is missing. + */ + @Test + public void findObject_cidRefsFileMissing() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Files.delete(cidRefsPath); + + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.findObject(pid); + }); + } + + /** + * Check that exception is thrown when pid refs file doesn't exist + */ + @Test + public void findObject_pidNotFound() { + String pid = "dou.test.1"; + assertThrows(FileNotFoundException.class, () -> { + fileHashStore.findObject(pid); + }); + } + } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 68e74e8d..538e56d5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -874,4 +874,66 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { metadataStoredStream.close(); } } + + /** + * Confirm that isPidInCidRefsFile returns true when pid is found + */ + @Test + public void isPidInCidRefsFile_pidFound() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + String pidTwo = pid + ".test"; + InputStream dataStreamDup = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStreamDup, pidTwo, null, null, null, -1 + ); + + String cid = objInfo.getId(); + Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); + } + } + + /** + * Confirm that isPidInCidRefsFile returns false when pid is found + */ + @Test + public void isPidInCidRefsFile_pidNotFound() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + + String cid = objInfo.getId(); + Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertFalse(fileHashStore.isPidInCidRefsFile("pid.not.found", absCidRefsPath)); + } + } + + @Test + public void getRealPath() throws Exception { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.getId(); + + Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(pidRefsPath)); + assertTrue(Files.exists(cidRefsPath)); + } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 0549b01f..8e5959da 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -156,31 +156,6 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { } - /** - * Confirm expected cid is returned - */ - @Test - public void findObject_content() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - String cidRetrieved = fileHashStore.findObject(pid); - - assertEquals(cid, cidRetrieved); - } - - /** - * Check that exception is thrown when pid refs file doesn't exist - */ - @Test - public void findObject_pidNotFound() { - String pid = "dou.test.1"; - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.findObject(pid); - }); - } - /** * Check that the cid supplied is written into the file given */ From e5a37013731145524216c2ec22dcd7be3c08d4d5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 18 Jan 2024 12:58:22 -0800 Subject: [PATCH 178/553] Fix bug in HashStoreClient where code incorrectly calls '.getInteger()' to convert a String object to an int object, now calls '.parseInt()' --- src/main/java/org/dataone/hashstore/HashStoreClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 5b1c34df..ceb31efb 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -516,7 +516,7 @@ private static void testWithKnbvm( boolean skipFile = false; if (sizeOfFilesToSkip != null) { // Calculate the size of requested gb to skip in bytes - long gbFilesToSkip = Integer.getInteger(sizeOfFilesToSkip) * (1024L * 1024 + long gbFilesToSkip = Integer.parseInt(sizeOfFilesToSkip) * (1024L * 1024 * 1024); if (setItemSize > gbFilesToSkip) { skipFile = true; From 074dd9dfb05ec42f05db5b90508830c6637142b7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 14:22:32 -0800 Subject: [PATCH 179/553] Update HashStore interface with missing access modifiers and add todo items --- .../java/org/dataone/hashstore/HashStore.java | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 7bbb3e20..6a62f19b 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -63,7 +63,7 @@ public interface HashStore { * illegal arguments (ex. empty pid) or null pointers * @throws InterruptedException When tagging pid and cid process is interrupted */ - ObjectMetadata storeObject( + public ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, @@ -72,13 +72,14 @@ ObjectMetadata storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, - PidRefsFileExistsException, RuntimeException, InterruptedException; + public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, + IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; + // TODO: Revise this overload method to take size, this is likely the default storeObject /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectMetadata storeObject( + public ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; @@ -86,14 +87,15 @@ ObjectMetadata storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, + public ObjectMetadata storeObject( + InputStream object, String pid, String additionalAlgorithm + ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) */ - ObjectMetadata storeObject(InputStream object, String pid, long objSize) + public ObjectMetadata storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; @@ -112,13 +114,13 @@ ObjectMetadata storeObject(InputStream object, String pid, long objSize) * @throws InterruptedException When tagObject is waiting to execute but is * interrupted */ - void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException, InterruptedException; + public void tagObject(String pid, String cid) throws IOException, + PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, + InterruptedException; /** * Confirms that an ObjectMetadata's content is equal to the given values. If it is not - * equal, - * it will delete the object referenced by the ObjectMetadata object. + * equal, it will return False - otherwise True. * * @param objectInfo ObjectMetadata object with values * @param checksum Value of checksum to validate against @@ -130,7 +132,7 @@ void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsExce * not in objectInfo * @throws IllegalArgumentException An expected value does not match */ - void verifyObject( + public boolean verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException; @@ -138,13 +140,13 @@ void verifyObject( * Checks whether an object referenced by a pid exists and returns the content identifier. * * @param pid Authority-based identifier - * @return Content identifier + * @return Content identifier (cid) * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs file's * absolute address is not valid * @throws IOException Unable to read from a pid refs file or pid refs file * does not exist */ - String findObject(String pid) throws NoSuchAlgorithmException, IOException; + public String findObject(String pid) throws NoSuchAlgorithmException, IOException; /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a @@ -167,14 +169,14 @@ void verifyObject( * @throws NoSuchAlgorithmException Algorithm used to calculate permanent address is not * supported */ - String storeMetadata(InputStream metadata, String pid, String formatId) throws IOException, - IllegalArgumentException, FileNotFoundException, InterruptedException, - NoSuchAlgorithmException; + public String storeMetadata(InputStream metadata, String pid, String formatId) + throws IOException, IllegalArgumentException, FileNotFoundException, + InterruptedException, NoSuchAlgorithmException; /** * @see #storeMetadata(InputStream, String, String) */ - String storeMetadata(InputStream metadata, String pid) throws IOException, + public String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException; @@ -189,7 +191,7 @@ String storeMetadata(InputStream metadata, String pid) throws IOException, * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - InputStream retrieveObject(String pid) throws IllegalArgumentException, + public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** @@ -205,13 +207,14 @@ InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not * supported */ - InputStream retrieveMetadata(String pid, String formatId) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + public InputStream retrieveMetadata(String pid, String formatId) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; /** * @see #retrieveMetadata(String, String) */ - InputStream retrieveMetadata(String pid) throws IllegalArgumentException, + public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** @@ -227,9 +230,11 @@ InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * supported * @throws InterruptedException When deletion synchronization is interrupted */ - void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, + public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException; + // TODO: Determine path to directly delete a cid that exists by itself (store data only) + /** * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given * persistent identifier and its respective metadata namespace. @@ -242,14 +247,14 @@ void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundExcep * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, + public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** * @see #deleteMetadata(String, String) */ - void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException; + public void deleteMetadata(String pid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; /** * Calculates the hex digest of an object that exists in HashStore using a given persistent @@ -264,6 +269,6 @@ void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundExc * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, + public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; } From 75865c727f074d2530ad8aef071327fe9967555c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 14:42:11 -0800 Subject: [PATCH 180/553] Optimize updating/removing pids from cid refs files and add todo items --- .../filehashstore/FileHashStore.java | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 87ead32b..9d5be3e1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -580,7 +580,7 @@ public ObjectMetadata storeObject( } /** - * Overload method for storeObject with size of object to validate + * Overload method for storeObject with just the size of object to validate */ @Override public ObjectMetadata storeObject(InputStream object, String pid, long objSize) @@ -592,7 +592,7 @@ public ObjectMetadata storeObject(InputStream object, String pid, long objSize) } @Override - public void verifyObject( + public boolean verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { logFileHashStore.debug( @@ -611,12 +611,15 @@ public void verifyObject( ); Path objAbsPath = OBJECT_STORE_DIRECTORY.resolve(cidShardString); + // TODO: ValidateTmpObject should not delete the object, just return a boolean + // - Revise this, and also ensure other methods that call it are reviewed. validateTmpObject( true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, objInfoRetrievedSize ); logFileHashStore.info( "FileHashStore.verifyObject - Object with id: " + objId + " has been verified." ); + return true; } @Override @@ -725,13 +728,16 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid ); Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + // TODO: Check that pid is also found in the cid reference file if (!Files.exists(absCidRefsPath)) { // Throw exception if the cid refs file doesn't exist String errMsg = "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid + " with address: " + absCidRefsPath + ", but pid refs file exists."; logFileHashStore.error(errMsg); + // Create custom exception to handle it properly throw new FileNotFoundException(errMsg); + } else { return cid; } @@ -740,6 +746,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid + ". Pid refs file does not exist at: " + absPidRefsPath; logFileHashStore.error(errMsg); + // Create custom exception class throw new FileNotFoundException(errMsg); } } @@ -989,6 +996,13 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteObject"); + // TODO: First, determine that the pid, object and the related HashStore refs files exists + // - If the pid reference exists, but cid doesn't, just delete the reference file + // - Then log warning, be sure to catch the specific type of exception to handle and + // add clarity to this process (ex. in Line 734) + // - If the pid reference file exists, but it's not found in the cid reference file, + // check the cid reference file for the pid and remove it, then delete the orphaned file + String cid = findObject(pid); synchronized (referenceLockedCids) { @@ -1031,14 +1045,16 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou String errMsg = "FileHashStore.deleteObject - pid: " + pid + " is not found in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); + // TODO: Just delete the pid reference file in this scenario + // - Swallow exception and log warning, then return; throw new PidNotFoundInCidRefsFileException(errMsg); } else { // Proceed to delete the reference files and object - // Remove pid from cid refs file - deleteCidRefsPid(pid, absCidRefsPath); // Delete pid reference file deletePidRefsFile(pid); + // Remove pid from cid refs file + deleteCidRefsPid(pid, absCidRefsPath); // Delete obj and cid refs file only if the cid refs file is empty if (Files.size(absCidRefsPath) == 0) { // Delete empty cid refs file @@ -1054,6 +1070,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou "FileHashStore.deleteObject - File and references deleted for: " + pid + " with object address: " + objRealPath ); + // TODO: Discuss where deleteObject should also remove all default system metadata } } finally { // Release lock @@ -1738,6 +1755,10 @@ protected boolean isPidInCidRefsFile(String pid, Path absCidRefsPath) throws IOE * @throws IOException Issue with updating a cid refs file */ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOException { + // This update process is atomic, so we first write the updated content + // into a temporary file before overwriting it. + File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + Path tmpFilePath = tmpFile.toPath(); try { // Obtain a lock on the file before updating it try (FileChannel channel = FileChannel.open( @@ -1745,10 +1766,7 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce ); FileLock ignored = channel.lock()) { List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.add(pid); - // This update process is atomic, so we first write the updated content - // into a temporary file before overwriting it. - File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - Path tmpFilePath = tmpFile.toPath(); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absCidRefsPath.toFile(), "refs"); logFileHashStore.debug( @@ -1757,7 +1775,6 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce ); } // The lock is automatically released when the try block exits - } catch (IOException ioe) { String errMsg = "FileHashStore.updateCidRefsFiles - " + ioe.getMessage(); logFileHashStore.error(errMsg); @@ -1806,24 +1823,23 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOException { FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteCidRefsPid"); FileHashStoreUtility.ensureNotNull(absCidRefsPath, "absCidRefsPath", "deleteCidRefsPid"); - + // This deletes process is atomic, so we first write the updated content + // into a temporary file before overwriting it. + File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); + Path tmpFilePath = tmpFile.toPath(); try (FileChannel channel = FileChannel.open( absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE ); FileLock ignored = channel.lock()) { // Read all lines into a List List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); lines.remove(pid); - // This deletes process is atomic, so we first write the updated content - // into a temporary file before overwriting it. - File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - Path tmpFilePath = tmpFile.toPath(); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absCidRefsPath.toFile(), "refs"); logFileHashStore.debug( "FileHashStore.deleteCidRefsPid - Pid: " + pid + " removed from cid refs file: " + absCidRefsPath ); - + // The lock is automatically released when the try block exits } catch (IOException ioe) { String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe From 63e46b0b1d635c21ef5329ea3d136c46420429e5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 14:44:48 -0800 Subject: [PATCH 181/553] Rename 'ObjectMetadata' class's 'id' to 'cid' and update all affected code and junit tests --- .../org/dataone/hashstore/ObjectMetadata.java | 10 ++++----- .../filehashstore/FileHashStore.java | 6 ++--- .../org/dataone/hashstore/HashStoreTest.java | 2 +- .../dataone/hashstore/ObjectMetadataTest.java | 2 +- .../FileHashStoreInterfaceTest.java | 22 +++++++++---------- .../FileHashStoreProtectedTest.java | 10 ++++----- .../FileHashStoreReferencesTest.java | 2 +- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 22ea397f..3c7986c7 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -9,7 +9,7 @@ * without needing to know the underlying file system details. */ public class ObjectMetadata { - private final String id; + private final String cid; private final long size; private final Map hexDigests; @@ -21,8 +21,8 @@ public class ObjectMetadata { * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the * file */ - public ObjectMetadata(String id, long size, Map hexDigests) { - this.id = id; + public ObjectMetadata(String cid, long size, Map hexDigests) { + this.cid = cid; this.size = size; this.hexDigests = hexDigests; } @@ -32,8 +32,8 @@ public ObjectMetadata(String id, long size, Map hexDigests) { * * @return id */ - public String getId() { - return id; + public String getCid() { + return cid; } /** diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9d5be3e1..8455bd34 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -486,7 +486,7 @@ private ObjectMetadata syncPutObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize ); // Tag object - String cid = objInfo.getId(); + String cid = objInfo.getCid(); tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid @@ -596,7 +596,7 @@ public boolean verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { logFileHashStore.debug( - "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getId() + "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() ); FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); @@ -604,7 +604,7 @@ public boolean verifyObject( Map hexDigests = objectInfo.getHexDigests(); long objInfoRetrievedSize = objectInfo.getSize(); - String objId = objectInfo.getId(); + String objId = objectInfo.getCid(); // Object is not tagged at this stage, so we must manually form the permanent address of the file String cidShardString = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objId diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index ee06190e..1f34c70a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -131,7 +131,7 @@ public void hashStore_storeObjects() throws Exception { // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, objInfo.getId()); + assertEquals(objContentId, objInfo.getCid()); } } } diff --git a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index 4c0baf2e..8d97a0f3 100644 --- a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -55,7 +55,7 @@ public void testObjectMetadata() { @Test public void testObjectMetadataGetId() { ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); - String objId = objInfo.getId(); + String objId = objInfo.getCid(); assertEquals(objId, id); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index f1c4d316..a3bea232 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -98,7 +98,7 @@ public void storeObject() throws Exception { // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); - assertEquals(objectCid, objInfo.getId()); + assertEquals(objectCid, objInfo.getCid()); } } @@ -283,7 +283,7 @@ public void storeObject_inputStream_overload() throws Exception { Map hexDigests = objInfo.getHexDigests(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - String cid = objInfo.getId(); + String cid = objInfo.getCid(); assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); @@ -458,7 +458,7 @@ public void storeObject_duplicate() throws Exception { dataStreamDup, pidTwo, null, null, null, -1 ); - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(fileHashStore.isPidInCidRefsFile(pid, absCidRefsPath)); assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); @@ -570,7 +570,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); @@ -593,7 +593,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); @@ -612,7 +612,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); @@ -631,7 +631,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); @@ -650,7 +650,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); @@ -1245,7 +1245,7 @@ public void deleteObject_referencesDeleted() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - String cid = objInfo.getId(); + String cid = objInfo.getCid(); // Path objAbsPath = fileHashStore.getRealPath(pid, "object", null); Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); @@ -1272,7 +1272,7 @@ public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exceptio dataStream, pid, null, null, null, -1 ); String pidExtra = "dou.test" + pid; - String cid = objInfo.getId(); + String cid = objInfo.getCid(); fileHashStore.tagObject(pidExtra, cid); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); @@ -1303,7 +1303,7 @@ public void deleteObject_pidNotFoundInCidRefsFile() throws Exception { dataStream, pid, null, null, null, -1 ); String pidExtra = "dou.test" + pid; - String cid = objInfo.getId(); + String cid = objInfo.getCid(); fileHashStore.tagObject(pidExtra, cid); // Manually remove the pid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 538e56d5..3951f4d6 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -187,7 +187,7 @@ public void putObject_testHarness_id() throws Exception { // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, address.getId()); + assertEquals(objContentId, address.getCid()); } } @@ -253,7 +253,7 @@ public void putObject_validateChecksumValue() throws Exception { dataStream, pid, null, checksumCorrect, "MD2", -1 ); - String objCid = address.getId(); + String objCid = address.getCid(); // Get relative path String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); // Get absolute path @@ -893,7 +893,7 @@ public void isPidInCidRefsFile_pidFound() throws Exception { dataStreamDup, pidTwo, null, null, null, -1 ); - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); } @@ -913,7 +913,7 @@ public void isPidInCidRefsFile_pidNotFound() throws Exception { dataStream, pid, null, null, null, -1 ); - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); assertFalse(fileHashStore.isPidInCidRefsFile("pid.not.found", absCidRefsPath)); } @@ -927,7 +927,7 @@ public void getRealPath() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getId(); + String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 8e5959da..ee2d9747 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -394,7 +394,7 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getId(); + String actualCid = objInfo.getCid(); String cidShardString = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, actualCid ); From da0d245b3781db7a361e73ffa3ec7483b539380e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 16:16:18 -0800 Subject: [PATCH 182/553] Refactor 'findObject' to throw custom exception classes and revise/add new junit tests --- .../java/org/dataone/hashstore/HashStore.java | 18 ++++++++---- .../OrphanPidRefsFileException.java | 14 ++++++++++ .../filehashstore/FileHashStore.java | 14 ++++++++-- .../FileHashStoreInterfaceTest.java | 28 ++++++++++++++++--- 4 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/OrphanPidRefsFileException.java diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 6a62f19b..0253da4d 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -4,6 +4,9 @@ import java.io.IOException; import java.io.InputStream; import java.security.NoSuchAlgorithmException; + +import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; /** @@ -141,12 +144,17 @@ public boolean verifyObject( * * @param pid Authority-based identifier * @return Content identifier (cid) - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs file's - * absolute address is not valid - * @throws IOException Unable to read from a pid refs file or pid refs file - * does not exist + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs + * file's absolute address is not valid + * @throws IOException Unable to read from a pid refs file or pid refs + * file does not exist + * @throws OrphanPidRefsFileException When pid refs file exists and the cid found + * inside does not exist. + * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the + * expected pid is not found in the cid refs file. */ - public String findObject(String pid) throws NoSuchAlgorithmException, IOException; + public String findObject(String pid) throws NoSuchAlgorithmException, IOException, + OrphanPidRefsFileException, PidNotFoundInCidRefsFileException; /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a diff --git a/src/main/java/org/dataone/hashstore/exceptions/OrphanPidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/OrphanPidRefsFileException.java new file mode 100644 index 00000000..dd42e99f --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/OrphanPidRefsFileException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +/** + * Custom exception class for FileHashStore when a pid reference file is found and the + * cid refs file that it is referencing does not contain the pid. + */ +public class OrphanPidRefsFileException extends IOException { + public OrphanPidRefsFileException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8455bd34..841d8e9c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -36,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -728,17 +729,24 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid ); Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - // TODO: Check that pid is also found in the cid reference file if (!Files.exists(absCidRefsPath)) { // Throw exception if the cid refs file doesn't exist String errMsg = "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid + " with address: " + absCidRefsPath + ", but pid refs file exists."; logFileHashStore.error(errMsg); - // Create custom exception to handle it properly - throw new FileNotFoundException(errMsg); + throw new OrphanPidRefsFileException(errMsg); + + } else if (!isPidInCidRefsFile(pid, absCidRefsPath)) { + // If pid is not in cid refs file, throw custom exception + String errMsg = + "FileHashStore.deleteObject - Pid not found in cid refs file for cid: " + cid + + " with address: " + absCidRefsPath; + logFileHashStore.error(errMsg); + throw new PidNotFoundInCidRefsFileException(errMsg); } else { + // The pid is found in its expected cid refs file, return the cid return cid; } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index a3bea232..24a1053d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -30,6 +30,7 @@ import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -1590,11 +1591,11 @@ public void findObject_cid() throws Exception { } /** - * Confirm that findObject throws an exception when pid refs file found - * but cid refs file is missing. + * Confirm that findObject throws OrphanPidRefsFileException exception when + * pid refs file found but cid refs file is missing. */ @Test - public void findObject_cidRefsFileMissing() throws Exception { + public void findObject_cidRefsFileNotFound() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); @@ -1602,7 +1603,26 @@ public void findObject_cidRefsFileMissing() throws Exception { Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); Files.delete(cidRefsPath); - assertThrows(FileNotFoundException.class, () -> { + assertThrows(OrphanPidRefsFileException.class, () -> { + fileHashStore.findObject(pid); + }); + } + + + /** + * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when + * pid refs file found but cid refs file is missing. + */ + @Test + public void findObject_cidRefsFileMissingPid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + fileHashStore.deleteCidRefsPid(pid, cidRefsPath); + + assertThrows(PidNotFoundInCidRefsFileException.class, () -> { fileHashStore.findObject(pid); }); } From 119a4b885801168ab04480da1847d3d0394b2cfc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 16:25:41 -0800 Subject: [PATCH 183/553] Refactor 'deleteObject' to handle orphaned pid refs files, revise 'findObject' process, and update junit tests --- .../filehashstore/FileHashStore.java | 68 ++++++++++--------- .../FileHashStoreInterfaceTest.java | 30 +++++--- 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 841d8e9c..ba9c8aa5 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -725,29 +725,29 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio if (Files.exists(absPidRefsPath)) { String cid = new String(Files.readAllBytes(absPidRefsPath)); - logFileHashStore.info( - "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid - ); Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + + // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { - // Throw exception if the cid refs file doesn't exist String errMsg = "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid + " with address: " + absCidRefsPath + ", but pid refs file exists."; logFileHashStore.error(errMsg); throw new OrphanPidRefsFileException(errMsg); + } + // If the pid is found in the expected cid refs file, return it + if (isPidInCidRefsFile(pid, absCidRefsPath)) { + logFileHashStore.info( + "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid + ); + return cid; - } else if (!isPidInCidRefsFile(pid, absCidRefsPath)) { - // If pid is not in cid refs file, throw custom exception - String errMsg = - "FileHashStore.deleteObject - Pid not found in cid refs file for cid: " + cid - + " with address: " + absCidRefsPath; + } else { + String errMsg = "FileHashStore.deleteObject - Pid refs file exists, but pid (" + pid + + ") not found in cid refs file for cid: " + cid + " with address: " + + absCidRefsPath; logFileHashStore.error(errMsg); throw new PidNotFoundInCidRefsFileException(errMsg); - - } else { - // The pid is found in its expected cid refs file, return the cid - return cid; } } else { @@ -1004,15 +1004,32 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteObject"); - // TODO: First, determine that the pid, object and the related HashStore refs files exists - // - If the pid reference exists, but cid doesn't, just delete the reference file - // - Then log warning, be sure to catch the specific type of exception to handle and - // add clarity to this process (ex. in Line 734) - // - If the pid reference file exists, but it's not found in the cid reference file, - // check the cid reference file for the pid and remove it, then delete the orphaned file + // First, find the object and evaluate its state + String cid; + try { + cid = findObject(pid); - String cid = findObject(pid); + } catch (OrphanPidRefsFileException oprfe) { + // Delete the pid refs file and return, nothing else to delete. + Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Files.delete(absPidRefsPath); + String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + + pid + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + return; + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // Delete pid refs file and return, nothing else to delete + Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Files.delete(absPidRefsPath); + String warnMsg = + "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " + + pid + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + return; + } + + // If cid has been retrieved without any errors, proceed with second stage of deletion. synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -1038,8 +1055,6 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou Path objRealPath = getRealPath(pid, "object", null); // Get the path to the cid refs file to work with Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - // Check that the pid is found in the cid refs file - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); if (!Files.exists(objRealPath)) { // Throw exception if object doesn't exist @@ -1048,15 +1063,6 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); - } else if (!pidFoundInCidRefFiles) { - // Throw exception if the given pid is not in the expected cid refs file - String errMsg = "FileHashStore.deleteObject - pid: " + pid - + " is not found in cid refs file: " + absCidRefsPath; - logFileHashStore.error(errMsg); - // TODO: Just delete the pid reference file in this scenario - // - Swallow exception and log warning, then return; - throw new PidNotFoundInCidRefsFileException(errMsg); - } else { // Proceed to delete the reference files and object // Delete pid reference file diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 24a1053d..f15394df 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1288,13 +1288,13 @@ public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exceptio } /** - * Confirm that an exception is thrown when called to delete an object that exists - * and has a cid refs file, but does not have the expected pid to delete. + * Confirm that deleteObject removes an orphan pid reference file when the associated cid refs + * file does not contain the expected pid. * * @throws Exception */ @Test - public void deleteObject_pidNotFoundInCidRefsFile() throws Exception { + public void deleteObject_pidOrphan() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1303,17 +1303,27 @@ public void deleteObject_pidNotFoundInCidRefsFile() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - String pidExtra = "dou.test" + pid; String cid = objInfo.getCid(); - fileHashStore.tagObject(pidExtra, cid); + String pidExtra = "dou.test" + pid; + Path objRealPath = fileHashStore.getRealPath(pid, "object", null); - // Manually remove the pid + // Manually change the pid found in the cid refs file Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pidExtra, absPathCidRefsPath); + fileHashStore.updateCidRefsFiles(pidExtra, absPathCidRefsPath); + // Create an orphaned pid refs file + fileHashStore.deleteCidRefsPid(pid, absPathCidRefsPath); - assertThrows( - PidNotFoundInCidRefsFileException.class, () -> fileHashStore.deleteObject(pidExtra) - ); + fileHashStore.deleteObject(pid); + + // Confirm cid refs file still exists + assertTrue(Files.exists(absPathCidRefsPath)); + // Confirm the original (and now orphaned) pid refs file is deleted + Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + assertFalse(Files.exists(absPathPidRefsPath)); + // Confirm the object has not been deleted + assertTrue(Files.exists(objRealPath)); + // Confirm the cid refs file still exists + assertTrue(Files.exists(absPathCidRefsPath)); } } From dac8a345404254ae44937a14d5dc7ba63e011978 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 16:44:15 -0800 Subject: [PATCH 184/553] Swallow unnecessary exceptions in 'move' and 'deleteMetadata' methods and replace with return statement, and update junit tests --- .../java/org/dataone/hashstore/HashStore.java | 1 - .../filehashstore/FileHashStore.java | 27 ++++++++++--------- .../FileHashStoreInterfaceTest.java | 13 +++++---- .../FileHashStoreProtectedTest.java | 19 ++++++------- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 0253da4d..34400ce1 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -250,7 +250,6 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou * @param pid Authority-based identifier * @param formatId Metadata namespace/format * @throws IllegalArgumentException When pid or formatId is null or empty - * @throws FileNotFoundException When requested pid has no metadata * @throws IOException I/O error when deleting empty directories * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ba9c8aa5..7b4cd0a6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1101,7 +1101,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou @Override public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { + IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid ); @@ -1114,20 +1114,20 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx // Get permanent address of the pid by calculating its sha-256 hex digest Path metadataCidPath = getRealPath(pid, "metadata", formatId); - // Check to see if object exists if (!Files.exists(metadataCidPath)) { String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid + " with metadata address: " + metadataCidPath; logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } + return; - // Proceed to delete - Files.delete(metadataCidPath); - logFileHashStore.info( - "FileHashStore.deleteMetadata - File deleted for: " + pid + " with metadata address: " - + metadataCidPath - ); + } else { + // Proceed to delete + Files.delete(metadataCidPath); + logFileHashStore.info( + "FileHashStore.deleteMetadata - File deleted for: " + pid + + " with metadata address: " + metadataCidPath + ); + } } /** @@ -1233,6 +1233,9 @@ protected ObjectMetadata putObject( ); validateAlgorithm(checksumAlgorithm); } + if (checksum != null) { + FileHashStoreUtility.checkForEmptyString(checksum, "checksum", "putObject"); + } if (objSize != -1) { FileHashStoreUtility.checkNotNegativeOrZero(objSize, "putObject"); } @@ -1588,8 +1591,8 @@ protected void move(File source, File target, String entity) throws IOException, // Entity is only used when checking for an existence of an object if (entity.equals("object") && target.exists()) { String errMsg = "FileHashStore.move - File already exists for target: " + target; - logFileHashStore.debug(errMsg); - throw new FileAlreadyExistsException(errMsg); + logFileHashStore.warn(errMsg); + return; } File destinationDirectory = new File(target.getParent()); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index f15394df..6c7b45d5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1424,14 +1424,13 @@ public void deleteMetadata_overload() throws Exception { } /** - * Confirm that deleteMetadata throws exception when associated pid obj not found + * Confirm that no exceptions are thrown when called to delete metadata + * that does not exist. */ @Test - public void deleteMetadata_pidNotFound() { - assertThrows(FileNotFoundException.class, () -> { - String formatId = "http://hashstore.tests/types/v1.0"; - fileHashStore.deleteMetadata("dou.2023.hashstore.1", formatId); - }); + public void deleteMetadata_pidNotFound() throws Exception { + String formatId = "http://hashstore.tests/types/v1.0"; + fileHashStore.deleteMetadata("dou.2023.hashstore.1", formatId); } /** @@ -1449,7 +1448,7 @@ public void deleteMetadata_pidNull() { * Confirm that deleteMetadata throws exception when pid is empty */ @Test - public void deleteMetadata_pidEmpty() { + public void deleteMetadata_pidEmpty() throws Exception { assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata("", formatId); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 3951f4d6..09a26f15 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -9,7 +9,6 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.nio.file.FileAlreadyExistsException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -628,19 +627,17 @@ public void testMove() throws Exception { } /** - * Confirm that FileAlreadyExistsException is thrown when target already exists + * Confirm that exceptions are not thrown when move is called on an object that already exists */ @Test - public void testMove_targetExists() { - assertThrows(FileAlreadyExistsException.class, () -> { - File newTmpFile = generateTemporaryFile(); - String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; - File targetFile = new File(targetString); - fileHashStore.move(newTmpFile, targetFile, "object"); + public void testMove_targetExists() throws Exception { + File newTmpFile = generateTemporaryFile(); + String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; + File targetFile = new File(targetString); + fileHashStore.move(newTmpFile, targetFile, "object"); - File newTmpFileTwo = generateTemporaryFile(); - fileHashStore.move(newTmpFileTwo, targetFile, "object"); - }); + File newTmpFileTwo = generateTemporaryFile(); + fileHashStore.move(newTmpFileTwo, targetFile, "object"); } /** From 16fa960884a1ef8771a74a179e75d44ac9948edf Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 17:12:43 -0800 Subject: [PATCH 185/553] Refactor 'validateTmpObject' to return true when object has validated, and 'verifyObject' to return a boolean value in 'FileHashStore' --- .../filehashstore/FileHashStore.java | 93 +++++++++++-------- .../FileHashStoreReferencesTest.java | 30 +++--- 2 files changed, 71 insertions(+), 52 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7b4cd0a6..92d58ef9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -612,15 +612,21 @@ public boolean verifyObject( ); Path objAbsPath = OBJECT_STORE_DIRECTORY.resolve(cidShardString); - // TODO: ValidateTmpObject should not delete the object, just return a boolean - // - Revise this, and also ensure other methods that call it are reviewed. - validateTmpObject( - true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, objInfoRetrievedSize - ); - logFileHashStore.info( - "FileHashStore.verifyObject - Object with id: " + objId + " has been verified." - ); - return true; + try { + validateTmpObject( + true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, + objInfoRetrievedSize, false + ); + logFileHashStore.info( + "FileHashStore.verifyObject - Object with id: " + objId + " has been verified." + ); + return true; + + } catch (IOException | IllegalArgumentException | NoSuchAlgorithmException e) { + String errMsg = "FileHashStore.verifyObject - " + e.getMessage(); + logFileHashStore.warn(errMsg); + return false; + } } @Override @@ -1272,7 +1278,7 @@ protected ObjectMetadata putObject( // Validate object if checksum and checksum algorithm is passed validateTmpObject( requestValidation, checksum, checksumAlgorithm, tmpFilePath, hexDigests, objSize, - storedObjFileSize + storedObjFileSize, true ); // Gather the elements to form the permanent address @@ -1311,26 +1317,35 @@ protected ObjectMetadata putObject( * @param checksumAlgorithm Hash algorithm of checksum value * @param tmpFile tmpFile that has been written * @param hexDigests Map of the hex digests available to check with - * @throws NoSuchAlgorithmException When algorithm supplied is not supported - * @throws IOException When tmpFile fails to be deleted + * @param tmpFile Path to the file that is being evaluated + * @param hexDigests Map of the hex digests to parse data from + * @param objSize Expected size of object + * @param storedObjFileSize Actual size of object stored + * @param deleteTmpFile Confirm whether to delete file being evaluated if invalid + * @return + * @throws NoSuchAlgorithmException + * @throws IOException */ - private void validateTmpObject( + private boolean validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, - Map hexDigests, long objSize, long storedObjFileSize + Map hexDigests, long objSize, long storedObjFileSize, boolean deleteTmpFile ) throws NoSuchAlgorithmException, IOException { if (objSize > 0) { if (objSize != storedObjFileSize) { - // Delete tmp File - try { - Files.delete(tmpFile); - - } catch (Exception ge) { - String errMsg = - "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " - + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile; - logFileHashStore.error(errMsg); - throw new IOException(errMsg); + if (deleteTmpFile) { + // Delete tmp File + try { + Files.delete(tmpFile); + + } catch (Exception ge) { + String errMsg = + "FileHashStore.validateTmpObject - objSize given is not equal to the" + + " stored object size. ObjSize: " + objSize + + ". storedObjFileSize: " + storedObjFileSize + + ". Failed to delete tmpFile: " + tmpFile; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } } String errMsg = @@ -1358,18 +1373,20 @@ private void validateTmpObject( } if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { - // Delete tmp File - try { - Files.delete(tmpFile); - - } catch (Exception ge) { - String errMsg = - "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" - + " is not equal to the calculated hex digest: " + digestFromHexDigests - + ". Checksum" + " provided: " + checksum - + ". Failed to delete tmpFile: " + tmpFile; - logFileHashStore.error(errMsg); - throw new IOException(errMsg); + if (deleteTmpFile) { + // Delete tmp File + try { + Files.delete(tmpFile); + + } catch (Exception ge) { + String errMsg = + "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" + + " is not equal to the calculated hex digest: " + + digestFromHexDigests + ". Checksum" + " provided: " + checksum + + ". Failed to delete tmpFile: " + tmpFile; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } } String errMsg = @@ -1380,6 +1397,8 @@ private void validateTmpObject( throw new IllegalArgumentException(errMsg); } } + + return true; } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index ee2d9747..3b48adc7 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -319,7 +319,7 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { } /** - * Check that verifyObject verifies with good values + * Check that verifyObject returns true with good values */ @Test public void verifyObject_correctValues() throws Exception { @@ -336,14 +336,15 @@ public void verifyObject_correctValues() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.verifyObject( + boolean isObjectValid = fileHashStore.verifyObject( objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize ); + assertTrue(isObjectValid); } } /** - * Check that verifyObject verifies with good values + * Check that verifyObject returns false with mismatched size value */ @Test public void verifyObject_mismatchedValuesBadSize() throws Exception { @@ -360,16 +361,16 @@ public void verifyObject_mismatchedValuesBadSize() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - }); + boolean isObjectValid = fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + assertFalse(isObjectValid); } } /** - * Check that verifyObject deletes file when there is a mismatch + * Check that verifyObject returns false and does not delete the file when + * there is a mismatch */ @Test public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { @@ -386,11 +387,10 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - }); + boolean isObjectValid = fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + assertFalse(isObjectValid); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -399,7 +399,7 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { storeDepth, storeWidth, actualCid ); Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertFalse(Files.exists(objectStoreDirectory)); + assertTrue(Files.exists(objectStoreDirectory)); } } From 9e87484b99202ea8c70d04e10706b16b31b047c2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 17:27:01 -0800 Subject: [PATCH 186/553] Add and implement new 'storeObject' overload method for checksum, checksum algo and object size; and add/revise junit tests.' --- .../java/org/dataone/hashstore/HashStore.java | 10 ++++- .../filehashstore/FileHashStore.java | 30 ++++++++++---- .../FileHashStoreInterfaceTest.java | 40 +++++++++++++++---- 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 34400ce1..8ffd6f90 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -78,7 +78,15 @@ public ObjectMetadata storeObject( public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; - // TODO: Revise this overload method to take size, this is likely the default storeObject + /** + * @see #storeObject(InputStream, String, String, String, String, long) + */ + public ObjectMetadata storeObject( + InputStream object, String pid, String checksum, String checksumAlgorithm, + long objSize + ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, + RuntimeException, InterruptedException; + /** * @see #storeObject(InputStream, String, String, String, String, long) */ diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 92d58ef9..957d54c1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -552,18 +552,20 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce return putObject(object, "HashStoreNoPid", null, null, null, -1); } + /** - * Overload method for storeObject with an additionalAlgorithm + * Overload method for storeObject with size and a checksum & checksumAlgorithm. */ @Override - public ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, + public ObjectMetadata storeObject( + InputStream object, String pid, String checksum, String checksumAlgorithm, long objSize + ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException { - FileHashStoreUtility.ensureNotNull( - additionalAlgorithm, "additionalAlgorithm", "storeObject" - ); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); - return storeObject(object, pid, additionalAlgorithm, null, null, -1); + return storeObject(object, pid, null, checksum, checksumAlgorithm, objSize); } /** @@ -592,6 +594,20 @@ public ObjectMetadata storeObject(InputStream object, String pid, long objSize) return storeObject(object, pid, null, null, null, objSize); } + /** + * Overload method for storeObject with an additionalAlgorithm + */ + @Override + public ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, + InterruptedException { + FileHashStoreUtility.ensureNotNull( + additionalAlgorithm, "additionalAlgorithm", "storeObject" + ); + + return storeObject(object, pid, additionalAlgorithm, null, null, -1); + } + @Override public boolean verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 6c7b45d5..5df53b32 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -213,30 +213,34 @@ public void storeObject_zeroObjSize() { } /** - * Verify that storeObject generates an additional checksum with overloaded method + * Verify that storeObject stores and validates a given checksum and its expected size + * with overloaded method */ @Test - public void storeObject_additionalAlgorithm_overload() throws Exception { + public void storeObject_overloadChecksumCsAlgoAndSize() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); + String md2 = testData.pidData.get(pid).get("md2"); + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, md2, "MD2", objectSize + ); Map hexDigests = objInfo.getHexDigests(); // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); assertEquals(md2, hexDigests.get("MD2")); } } /** - * Verify that storeObject validates checksum with overloaded method + * Verify that storeObject stores and validates a given checksum with overloaded method */ @Test - public void storeObject_validateChecksum_overload() throws Exception { + public void storeObject_overloadChecksumAndChecksumAlgo() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -256,7 +260,7 @@ public void storeObject_validateChecksum_overload() throws Exception { * Check that store object returns the correct ObjectMetadata size with overloaded method */ @Test - public void storeObject_objSize_overload() throws Exception { + public void storeObject_overloadObjSize() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -274,7 +278,7 @@ public void storeObject_objSize_overload() throws Exception { * any reference files) */ @Test - public void storeObject_inputStream_overload() throws Exception { + public void storeObject_overloadInputStreamOnly() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -297,6 +301,26 @@ public void storeObject_inputStream_overload() throws Exception { } } + /** + * Verify that storeObject generates an additional checksum with overloaded method + */ + @Test + public void storeObject_overloadAdditionalAlgo() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); + + Map hexDigests = objInfo.getHexDigests(); + + // Validate checksum values + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(md2, hexDigests.get("MD2")); + } + } + /** * Verify that storeObject returns the expected checksum value */ From ed4da4a6ddafba88c9ad0575080a65c0d86b458e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 22 Jan 2024 18:12:53 -0800 Subject: [PATCH 187/553] Add new 'deleteObject(String, boolean)' overload method for deleting an object based on its cid and add new junit tests --- .../java/org/dataone/hashstore/HashStore.java | 13 ++- .../filehashstore/FileHashStore.java | 34 ++++++++ .../FileHashStoreInterfaceTest.java | 84 ++++++++++++++++++- 3 files changed, 129 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 8ffd6f90..98b6dd5c 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -249,7 +249,18 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException; - // TODO: Determine path to directly delete a cid that exists by itself (store data only) + /** + * Delete an object based on its content identifier, with a flag to confirm intention. + * + * Note: This overload method should only be called when an issue arises during the storage + * of an object without a pid, and after verifying (via `verifyObject`) that the object is + * not what is expected. + * + * @param cid Content identifier + * @param deleteCid Boolean to confirm + */ + public void deleteObject(String cid, boolean deleteCid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; /** * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 957d54c1..7cb1d7f4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1015,6 +1015,38 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, return metadataCidInputStream; } + @Override + public void deleteObject(String cid, boolean deleteCid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException { + logFileHashStore.debug( + "FileHashStore.deleteObject - Called to delete object with content identifeir: " + cid + ); + if (deleteCid) { + // Validate input parameters + FileHashStoreUtility.ensureNotNull(cid, "cid", "deleteObject"); + FileHashStoreUtility.checkForEmptyString(cid, "cid", "deleteObject"); + + // Confirm that the object called to delete does not have a cid reference file + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + if (Files.exists(absCidRefsPath)) { + // The cid is referenced by pids, do not delete. + return; + + } else { + // Get permanent address of the actual cid + String objShardString = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid + ); + Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + + // If file exists, delete it. + if (Files.exists(expectedRealPath)) { + Files.delete(expectedRealPath); + } + } + } + } + @Override public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException, @@ -1035,6 +1067,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou // Delete the pid refs file and return, nothing else to delete. Path absPidRefsPath = getRealPath(pid, "refs", "pid"); Files.delete(absPidRefsPath); + String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); @@ -1044,6 +1077,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou // Delete pid refs file and return, nothing else to delete Path absPidRefsPath = getRealPath(pid, "refs", "pid"); Files.delete(absPidRefsPath); + String warnMsg = "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 5df53b32..bdd6d2d2 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -46,6 +46,7 @@ public class FileHashStoreInterfaceTest { private FileHashStore fileHashStore; private Properties fhsProperties; + private Path rootDirectory; private static final TestDataHarness testData = new TestDataHarness(); /** @@ -53,7 +54,7 @@ public class FileHashStoreInterfaceTest { */ @BeforeEach public void initializeFileHashStore() { - Path rootDirectory = tempFolder.resolve("metacat"); + rootDirectory = tempFolder.resolve("metacat"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -1385,6 +1386,87 @@ public void deleteObject_pidEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(" ")); } + /** + * Confirm deleteObject overload method to delete a cid deletes cid with a true bool + */ + @Test + public void deleteObject_overloadCidDeleteTrue() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.getCid(); + + // Set flag to true + fileHashStore.deleteObject(cid, true); + + // Get permanent address of the actual cid + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String actualCid = objInfo.getCid(); + String cidShardString = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, actualCid + ); + Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); + assertFalse(Files.exists(objectStoreDirectory)); + } + } + + /** + * Confirm deleteObject overload method does not delete an object with a true bool + * because a cid refs file exists + */ + @Test + public void deleteObject_overloadCidDeleteTrueButCidRefsExists() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + + // Set flag to true + fileHashStore.deleteObject(cid, true); + + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getRealPath(pid, "object", null); + assertTrue(Files.exists(objRealPath)); + } + } + + /** + * Confirm deleteObject overload method does not delete an object with a false bool + */ + @Test + public void deleteObject_overloadCidDeleteFalse() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.getCid(); + + // Set flag to true + fileHashStore.deleteObject(cid, false); + + // Get permanent address of the actual cid + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String actualCid = objInfo.getCid(); + String cidShardString = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, actualCid + ); + Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); + assertTrue(Files.exists(objectStoreDirectory)); + } + } + /** * Confirm that deleteMetadata deletes metadata and empty sub directories */ From 2e512013ab594f83914b1756b8d302a36fa94b23 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 23 Jan 2024 10:34:03 -0800 Subject: [PATCH 188/553] Refactor 'writeTo...Checksums' method to ensure we do not calculate redundant hashes --- .../filehashstore/FileHashStore.java | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7cb1d7f4..85568c36 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1479,6 +1479,23 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio return true; } + /** + * Checks whether the algorithm supplied is included in the DefaultHashAlgorithms + * + * @param algorithm Algorithm to check + * @return True if it's included + */ + private boolean isDefaultAlgorithm(String algorithm) { + boolean isDefaultAlgorithm = false; + for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { + if (algorithm.equals(defAlgo.getName())) { + isDefaultAlgorithm = true; + break; + } + } + return isDefaultAlgorithm; + } + /** * Determines whether an object will be verified with a given checksum and checksumAlgorithm * @@ -1536,17 +1553,22 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor protected Map writeToTmpFileAndGenerateChecksums( File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm ) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { + // Determine whether to calculate additional or checksum algorithms + boolean generateAddAlgo = false; if (additionalAlgorithm != null) { FileHashStoreUtility.checkForEmptyString( additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(additionalAlgorithm); + generateAddAlgo = !isDefaultAlgorithm(additionalAlgorithm); } - if (checksumAlgorithm != null) { + boolean generateCsAlgo = false; + if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { FileHashStoreUtility.checkForEmptyString( checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(checksumAlgorithm); + generateCsAlgo = !isDefaultAlgorithm(checksumAlgorithm); } FileOutputStream os = new FileOutputStream(tmpFile); @@ -1557,14 +1579,14 @@ protected Map writeToTmpFileAndGenerateChecksums( MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); MessageDigest additionalAlgo = null; MessageDigest checksumAlgo = null; - if (additionalAlgorithm != null) { + if (generateAddAlgo) { logFileHashStore.debug( "FileHashStore.writeToTmpFileAndGenerateChecksums - Adding additional algorithm" + " to hex digest map, algorithm: " + additionalAlgorithm ); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); } - if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { + if (generateCsAlgo) { logFileHashStore.debug( "FileHashStore.writeToTmpFileAndGenerateChecksums - Adding checksum algorithm" + " to hex digest map, algorithm: " + checksumAlgorithm @@ -1583,10 +1605,10 @@ protected Map writeToTmpFileAndGenerateChecksums( sha256.update(buffer, 0, bytesRead); sha384.update(buffer, 0, bytesRead); sha512.update(buffer, 0, bytesRead); - if (additionalAlgorithm != null) { + if (generateAddAlgo) { additionalAlgo.update(buffer, 0, bytesRead); } - if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { + if (generateCsAlgo) { checksumAlgo.update(buffer, 0, bytesRead); } } @@ -1616,12 +1638,12 @@ protected Map writeToTmpFileAndGenerateChecksums( hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); - if (additionalAlgorithm != null) { + if (generateAddAlgo) { String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) .toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } - if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { + if (generateCsAlgo) { String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) .toLowerCase(); hexDigests.put(checksumAlgorithm, extraChecksumDigest); From 44002153a4bc12b3438ce5822c24a6ec53b53a8c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 09:51:10 -0800 Subject: [PATCH 189/553] Update 'ObjectMetadata' class javadocs --- .../org/dataone/hashstore/ObjectMetadata.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 3c7986c7..9347a7c7 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -4,9 +4,9 @@ /** * ObjectMetadata is a class that models a unique identifier for an object in the HashStore. It - * encapsulates information about a file's id, size, and associated hash digest values. By using - * ObjectMetadata objects, client code can easily obtain metadata of a store object in HashStore - * without needing to know the underlying file system details. + * encapsulates information about a file's content identifier (cid), size, and associated hash + * digest values. By using ObjectMetadata objects, client code can easily obtain metadata of a store + * object in HashStore without needing to know the underlying file system details. */ public class ObjectMetadata { private final String cid; @@ -16,7 +16,7 @@ public class ObjectMetadata { /** * Creates a new instance of ObjectMetadata with the given properties. * - * @param id Unique identifier for the file + * @param cid Unique identifier for the file * @param size Size of stored file * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the * file @@ -28,9 +28,9 @@ public ObjectMetadata(String cid, long size, Map hexDigests) { } /** - * Return the id (address) of the file + * Return the cid (content identifier) of the file * - * @return id + * @return cid */ public String getCid() { return cid; @@ -39,14 +39,14 @@ public String getCid() { /** * Return the size of the file * - * @return id + * @return size */ public long getSize() { return size; } /** - * Return a map of hex digests + * Return a map of hex digests (checksums) * * @return hexDigests */ From a075d2d14408e47c5323c42c7bf83e54ebc3fe07 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 10:36:54 -0800 Subject: [PATCH 190/553] Refactor 'verify_object' to directly compare values and revert changes to 'validateTmpobject' --- .../filehashstore/FileHashStore.java | 95 ++++++++++--------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 85568c36..a3afe905 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -618,30 +618,35 @@ public boolean verifyObject( FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); Map hexDigests = objectInfo.getHexDigests(); + String digestFromHexDigests = hexDigests.get(checksumAlgorithm); long objInfoRetrievedSize = objectInfo.getSize(); - String objId = objectInfo.getCid(); - // Object is not tagged at this stage, so we must manually form the permanent address of the file - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objId - ); - Path objAbsPath = OBJECT_STORE_DIRECTORY.resolve(cidShardString); + String objCid = objectInfo.getCid(); - try { - validateTmpObject( - true, checksum, checksumAlgorithm, objAbsPath, hexDigests, objSize, - objInfoRetrievedSize, false - ); + if (objInfoRetrievedSize != objSize) { logFileHashStore.info( - "FileHashStore.verifyObject - Object with id: " + objId + " has been verified." + "FileHashStore.verifyObject - Object size invalid for cid: " + objCid + + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize ); - return true; + return false; - } catch (IOException | IllegalArgumentException | NoSuchAlgorithmException e) { - String errMsg = "FileHashStore.verifyObject - " + e.getMessage(); - logFileHashStore.warn(errMsg); + } else if (!digestFromHexDigests.equals(checksum)) { + logFileHashStore.info( + "FileHashStore.verifyObject - Object content invalid for cid: " + objCid + + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" + ); return false; + + } else { + logFileHashStore.info( + "FileHashStore.verifyObject - Object has been validated for cid: " + objCid + + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" + ); + return true; } } @@ -1328,7 +1333,7 @@ protected ObjectMetadata putObject( // Validate object if checksum and checksum algorithm is passed validateTmpObject( requestValidation, checksum, checksumAlgorithm, tmpFilePath, hexDigests, objSize, - storedObjFileSize, true + storedObjFileSize ); // Gather the elements to form the permanent address @@ -1371,31 +1376,28 @@ protected ObjectMetadata putObject( * @param hexDigests Map of the hex digests to parse data from * @param objSize Expected size of object * @param storedObjFileSize Actual size of object stored - * @param deleteTmpFile Confirm whether to delete file being evaluated if invalid * @return * @throws NoSuchAlgorithmException * @throws IOException */ private boolean validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, - Map hexDigests, long objSize, long storedObjFileSize, boolean deleteTmpFile + Map hexDigests, long objSize, long storedObjFileSize ) throws NoSuchAlgorithmException, IOException { if (objSize > 0) { if (objSize != storedObjFileSize) { - if (deleteTmpFile) { - // Delete tmp File - try { - Files.delete(tmpFile); - - } catch (Exception ge) { - String errMsg = - "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize - + ". storedObjFileSize: " + storedObjFileSize - + ". Failed to delete tmpFile: " + tmpFile; - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } + // Delete tmp File + try { + Files.delete(tmpFile); + + } catch (Exception ge) { + String errMsg = + "FileHashStore.validateTmpObject - objSize given is not equal to the" + + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " + + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + ". " + + ge.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); } String errMsg = @@ -1423,20 +1425,19 @@ private boolean validateTmpObject( } if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { - if (deleteTmpFile) { - // Delete tmp File - try { - Files.delete(tmpFile); - - } catch (Exception ge) { - String errMsg = - "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" - + " is not equal to the calculated hex digest: " - + digestFromHexDigests + ". Checksum" + " provided: " + checksum - + ". Failed to delete tmpFile: " + tmpFile; - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } + // Delete tmp File + try { + Files.delete(tmpFile); + + } catch (Exception ge) { + String errMsg = + "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" + + " is not equal to the calculated hex digest: " + digestFromHexDigests + + ". Checksum" + " provided: " + checksum + + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); + ; + logFileHashStore.error(errMsg); + throw new IOException(errMsg); } String errMsg = From 767a6edad4c7705ad3d9e9cdbf7ee12e9c7f074d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 11:58:38 -0800 Subject: [PATCH 191/553] Update 'HashStore' interface for new Public API method 'deleteObjectAll', update javadocs, updated README.md and add TODO items in 'HashStoreClient' --- README.md | 2 ++ .../java/org/dataone/hashstore/HashStore.java | 34 ++++++++++++------- .../dataone/hashstore/HashStoreClient.java | 2 ++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 3cac7b18..2bb46774 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ HashStore is a content-addressable file management system that utilizes the cont - retrieveObject - retrieveMetadata - deleteObject +- deleteObjectAll - deleteMetadata - getHexDigest @@ -92,6 +93,7 @@ tagObject(pid, cid) **How do I delete an object if I have the pid?** - To delete an object, call the Public API method `deleteObject` which will delete the object and its associated references and reference files where relevant. +- To delete an object and all its related data (reference files and system metadata), call the Public API method `deleteObjectAll` - Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 98b6dd5c..9e434d62 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -235,9 +235,13 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, /** * Deletes an object (and its empty subdirectories) permanently from HashStore using a given - * persistent identifier. + * persistent identifier. If the `idType` is 'pid', the object associated with the pid will + * be deleted if it is not referenced by any other pids, along with its reference files. + * If the `idType` is 'cid', only the object will be deleted if it is not referenced by + * other pids. * - * @param pid Authority-based identifier + * @param idType 'pid' or 'cid' + * @param id Authority-based identifier or content identifier * @throws IllegalArgumentException When pid is null or empty * @throws FileNotFoundException When requested pid has no associated object * @throws IOException I/O error when deleting empty directories, @@ -246,21 +250,25 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * supported * @throws InterruptedException When deletion synchronization is interrupted */ - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException; + public void deleteObject(String idType, String id) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException; /** - * Delete an object based on its content identifier, with a flag to confirm intention. - * - * Note: This overload method should only be called when an issue arises during the storage - * of an object without a pid, and after verifying (via `verifyObject`) that the object is - * not what is expected. + * Deletes an object and all relevant associated files (ex. system metadata, reference + * files, etc.) based on a given pid. If other pids still reference the object, the object + * will not be deleted. * - * @param cid Content identifier - * @param deleteCid Boolean to confirm + * @param pid Authority-based identifier + * @throws IllegalArgumentException When pid is null or empty + * @throws FileNotFoundException When requested pid has no associated object + * @throws IOException I/O error when deleting empty directories, + * modifying/deleting reference files + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + * @throws InterruptedException When deletion synchronization is interrupted */ - public void deleteObject(String cid, boolean deleteCid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + public void deleteObjectAll(String pid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException; /** * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index ceb31efb..614da676 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -230,6 +230,7 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deleteobject")) { String pid = cmd.getOptionValue("pid"); FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + // TODO: Use correct overload method when ready hashStore.deleteObject(pid); System.out.println("Object for pid (" + pid + ") has been deleted."); @@ -701,6 +702,7 @@ private static void deleteObjectsFromStore(List> resultObjLi // Delete object System.out.println("Deleting object for guid: " + guid); + // TODO: Use correct overload method when ready hashStore.deleteObject(guid); } catch (FileNotFoundException fnfe) { From d0e2b62f38bff6e91367ad3dfd944c22f04c14f7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 13:44:00 -0800 Subject: [PATCH 192/553] Refactor 'deleteObject(pid)' to 'deleteObject(idType, id)', add new method 'tryDeleteCidObject' and add/update junit tests --- .../dataone/hashstore/HashStoreClient.java | 11 +- .../filehashstore/FileHashStore.java | 271 ++++++++++-------- .../FileHashStoreInterfaceTest.java | 111 +++---- .../FileHashStoreProtectedTest.java | 56 ++++ 4 files changed, 276 insertions(+), 173 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 614da676..25a85ed2 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -21,7 +21,7 @@ import java.sql.Statement; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; - +import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -230,8 +230,9 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deleteobject")) { String pid = cmd.getOptionValue("pid"); FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - // TODO: Use correct overload method when ready - hashStore.deleteObject(pid); + + String deleteIdType = HashStoreIdTypes.pid.getName("pid"); + hashStore.deleteObject(deleteIdType, pid); System.out.println("Object for pid (" + pid + ") has been deleted."); } else if (cmd.hasOption("deletemetadata")) { @@ -702,8 +703,8 @@ private static void deleteObjectsFromStore(List> resultObjLi // Delete object System.out.println("Deleting object for guid: " + guid); - // TODO: Use correct overload method when ready - hashStore.deleteObject(guid); + String deleteIdType = HashStoreIdTypes.pid.getName("pid"); + hashStore.deleteObject(deleteIdType, guid); } catch (FileNotFoundException fnfe) { String errMsg = "Unexpected Error: " + fnfe.fillInStackTrace(); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a3afe905..f8766f65 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -70,6 +70,21 @@ public class FileHashStore implements HashStore { public static final String[] SUPPORTED_HASH_ALGORITHMS = {"MD2", "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; + public enum HashStoreIdTypes { + + cid("cid"), pid("pid"); + + final String identifierType; + + HashStoreIdTypes(String idType) { + identifierType = idType; + } + + public String getName(String string) { + return identifierType; + } + } + enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); @@ -1021,143 +1036,136 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, } @Override - public void deleteObject(String cid, boolean deleteCid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete object with content identifeir: " + cid - ); - if (deleteCid) { - // Validate input parameters - FileHashStoreUtility.ensureNotNull(cid, "cid", "deleteObject"); - FileHashStoreUtility.checkForEmptyString(cid, "cid", "deleteObject"); - - // Confirm that the object called to delete does not have a cid reference file - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - if (Files.exists(absCidRefsPath)) { - // The cid is referenced by pids, do not delete. - return; - - } else { - // Get permanent address of the actual cid - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid - ); - Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); - - // If file exists, delete it. - if (Files.exists(expectedRealPath)) { - Files.delete(expectedRealPath); - } - } - } - } - - @Override - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException, + public void deleteObject(String idType, String id) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete object for pid: " + pid + "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType + + ")" ); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteObject"); + FileHashStoreUtility.ensureNotNull(id, "id", "deleteObject"); + FileHashStoreUtility.checkForEmptyString(id, "id", "deleteObject"); + if (!idType.equals(HashStoreIdTypes.pid.getName("pid")) && !idType.equals( + HashStoreIdTypes.cid.getName("cid") - // First, find the object and evaluate its state - String cid; - try { - cid = findObject(pid); - - } catch (OrphanPidRefsFileException oprfe) { - // Delete the pid refs file and return, nothing else to delete. - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - Files.delete(absPidRefsPath); + )) { + String errMsg = "FileHashStore.deleteObject - 'idType' must be 'pid' or 'cid'"; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } - String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " - + pid + ". Deleted orphan pid refs file."; - logFileHashStore.warn(warnMsg); - return; + // If 'idType' is cid, attempt to delete the object + if (idType.equals(HashStoreIdTypes.cid.getName("cid"))) { + tryDeleteCidObject(id); - } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // Delete pid refs file and return, nothing else to delete - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - Files.delete(absPidRefsPath); + } else { + // Else 'idType' is pid + String cid; + String pid = id; + try { + // Begin by looking for the cid and confirming state + cid = findObject(id); + + } catch (OrphanPidRefsFileException oprfe) { + // Delete the pid refs file and return, nothing else to delete. + Path absPidRefsPath = getRealPath(id, "refs", "pid"); + Files.delete(absPidRefsPath); + + String warnMsg = + "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + id + + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + return; - String warnMsg = - "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " - + pid + ". Deleted orphan pid refs file."; - logFileHashStore.warn(warnMsg); - return; - } + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // Delete pid refs file and return, nothing else to delete + Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Files.delete(absPidRefsPath); - // If cid has been retrieved without any errors, proceed with second stage of deletion. - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); + String warnMsg = + "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " + + pid + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + return; + } - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" - + " waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); + // Proceed with next steps - cid has been retrieved without any errors + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" + + " waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.add(cid); } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.add(cid); - } - try { - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); - // Get the path to the cid refs file to work with - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + try { + // Get permanent address of the pid by calculating its sha-256 hex digest + Path objRealPath = getRealPath(pid, "object", null); + // Get the path to the cid refs file to work with + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - if (!Files.exists(objRealPath)) { - // Throw exception if object doesn't exist - String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " + pid - + " with object address: " + objRealPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); + if (!Files.exists(objRealPath)) { + // Throw exception if object doesn't exist + String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " + + pid + " with object address: " + objRealPath; + logFileHashStore.error(errMsg); + throw new FileNotFoundException(errMsg); - } else { - // Proceed to delete the reference files and object - // Delete pid reference file - deletePidRefsFile(pid); - // Remove pid from cid refs file - deleteCidRefsPid(pid, absCidRefsPath); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Delete empty cid refs file - Files.delete(absCidRefsPath); - // Delete actual object - Files.delete(objRealPath); } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (references exist for the cid). Skipping object deletion."; - logFileHashStore.warn(warnMsg); + // Proceed to delete the reference files and object + // Delete pid reference file + deletePidRefsFile(pid); + // Remove pid from cid refs file + deleteCidRefsPid(pid, absCidRefsPath); + // Delete obj and cid refs file only if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Delete empty cid refs file + Files.delete(absCidRefsPath); + // Delete actual object + Files.delete(objRealPath); + } else { + String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + + pid + + " is not empty (references exist for the cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); + } + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath + ); + } + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.remove(cid); + referenceLockedCids.notifyAll(); } - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); - // TODO: Discuss where deleteObject should also remove all default system metadata - } - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll(); } } + } + @Override + public void deleteObjectAll(String pid) throws IllegalArgumentException, FileNotFoundException, + IOException, NoSuchAlgorithmException, InterruptedException, + PidNotFoundInCidRefsFileException { + // TODO + return; } @Override @@ -1722,6 +1730,35 @@ protected void move(File source, File target, String entity) throws IOException, } } + /** + * Attempt to delete an object based on the given content identifier (cid). If the object + * has pids that references it and/or a cid refs file exists, the object will not be deleted. + * + * @param cid Content identifier + * @throws IOException + * @throws NoSuchAlgorithmException + */ + protected void tryDeleteCidObject(String cid) throws IOException, NoSuchAlgorithmException { + Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + if (Files.exists(absCidRefsPath)) { + // The cid is referenced by pids, do not delete. + return; + + } else { + // Get permanent address of the actual cid + String objShardString = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid + ); + Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + + // If file exists, delete it. + if (Files.exists(expectedRealPath)) { + Files.delete(expectedRealPath); + } + return; + } + } + /** * Verifies that the reference files for the given pid and cid exist and contain * the expected values. diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index bdd6d2d2..93f1622e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -32,6 +32,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -48,6 +49,8 @@ public class FileHashStoreInterfaceTest { private Properties fhsProperties; private Path rootDirectory; private static final TestDataHarness testData = new TestDataHarness(); + private String fhsDeleteTypePid; + private String fhsDeleteTypeCid; /** * Initialize FileHashStore before each test to creates tmp directories @@ -55,6 +58,8 @@ public class FileHashStoreInterfaceTest { @BeforeEach public void initializeFileHashStore() { rootDirectory = tempFolder.resolve("metacat"); + fhsDeleteTypePid = HashStoreIdTypes.pid.getName("pid"); + fhsDeleteTypeCid = HashStoreIdTypes.cid.getName("cid"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -1230,11 +1235,33 @@ public void retrieveMetadata_verifyContent() throws Exception { } } + /** + * Confirm that deleteObject throws exceptions when not using HashStoreIdTypes + */ + @Test + public void deleteObject_invalidIdTypes() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.deleteObject( + "PID", "dou.2023.hashstore.1" + ) + ); + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.deleteObject( + "CID", "dou.2023.hashstore.1" + ) + ); + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.deleteObject( + "bad.value", "dou.2023.hashstore.1" + ) + ); + } + /** * Confirm that deleteObject deletes object */ @Test - public void deleteObject_objectDeleted() throws Exception { + public void deleteObject_Pid_objectDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1243,7 +1270,7 @@ public void deleteObject_objectDeleted() throws Exception { fileHashStore.storeObject(dataStream, pid, null, null, null, -1); Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - fileHashStore.deleteObject(pid); + fileHashStore.deleteObject(fhsDeleteTypePid, pid); // Check that file doesn't exist assertFalse(Files.exists(objCidAbsPath)); @@ -1262,7 +1289,7 @@ public void deleteObject_objectDeleted() throws Exception { * Confirm that deleteObject deletes reference files */ @Test - public void deleteObject_referencesDeleted() throws Exception { + public void deleteObject_Pid_referencesDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1276,7 +1303,7 @@ public void deleteObject_referencesDeleted() throws Exception { // Path objAbsPath = fileHashStore.getRealPath(pid, "object", null); Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteObject(pid); + fileHashStore.deleteObject(fhsDeleteTypePid, pid); assertFalse(Files.exists(absPathPidRefsPath)); assertFalse(Files.exists(absPathCidRefsPath)); } @@ -1288,7 +1315,7 @@ public void deleteObject_referencesDeleted() throws Exception { * has references). */ @Test - public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exception { + public void deleteObject_Pid_CidRefsFileNotEmptyObjectExistsStill() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1304,7 +1331,7 @@ public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exceptio Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteObject(pid); + fileHashStore.deleteObject(fhsDeleteTypePid, pid); assertFalse(Files.exists(absPathPidRefsPath)); assertTrue(Files.exists(objCidAbsPath)); @@ -1319,7 +1346,7 @@ public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exceptio * @throws Exception */ @Test - public void deleteObject_pidOrphan() throws Exception { + public void deleteObject_Pid_pidOrphan() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1338,7 +1365,7 @@ public void deleteObject_pidOrphan() throws Exception { // Create an orphaned pid refs file fileHashStore.deleteCidRefsPid(pid, absPathCidRefsPath); - fileHashStore.deleteObject(pid); + fileHashStore.deleteObject(fhsDeleteTypePid, pid); // Confirm cid refs file still exists assertTrue(Files.exists(absPathCidRefsPath)); @@ -1356,9 +1383,11 @@ public void deleteObject_pidOrphan() throws Exception { * Confirm that deleteObject throws exception when associated pid obj not found */ @Test - public void deleteObject_pidNotFound() { + public void deleteObject_Pid_NotFoundPid() { assertThrows( - FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1") + FileNotFoundException.class, () -> fileHashStore.deleteObject( + fhsDeleteTypePid, "dou.2023.hashstore.1" + ) ); } @@ -1366,31 +1395,39 @@ public void deleteObject_pidNotFound() { * Confirm that deleteObject throws exception when pid is null */ @Test - public void deleteObject_pidNull() { - assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(null)); + public void deleteObject_idNull() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.deleteObject(fhsDeleteTypePid, null) + ); } /** * Confirm that deleteObject throws exception when pid is empty */ @Test - public void deleteObject_pidEmpty() { - assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject("")); + public void deleteObject_idEmpty() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.deleteObject(fhsDeleteTypePid, "") + ); } /** * Confirm that deleteObject throws exception when pid is empty spaces */ @Test - public void deleteObject_pidEmptySpaces() { - assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(" ")); + public void deleteObject_idEmptySpaces() { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.deleteObject( + fhsDeleteTypePid, " " + ) + ); } /** - * Confirm deleteObject overload method to delete a cid deletes cid with a true bool + * Confirm deleteObject with idType 'cid' deletes cid object */ @Test - public void deleteObject_overloadCidDeleteTrue() throws Exception { + public void deleteObject_Cid_idType() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1400,7 +1437,7 @@ public void deleteObject_overloadCidDeleteTrue() throws Exception { String cid = objInfo.getCid(); // Set flag to true - fileHashStore.deleteObject(cid, true); + fileHashStore.deleteObject(fhsDeleteTypeCid, cid); // Get permanent address of the actual cid int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -1415,11 +1452,11 @@ public void deleteObject_overloadCidDeleteTrue() throws Exception { } /** - * Confirm deleteObject overload method does not delete an object with a true bool - * because a cid refs file exists + * Confirm deleteObject with idType 'cid' does not delete an object because a cid refs file + * exists (there are still pids referencing the object) */ @Test - public void deleteObject_overloadCidDeleteTrueButCidRefsExists() throws Exception { + public void deleteObject_Cid_AndCidRefsExists() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1431,7 +1468,7 @@ public void deleteObject_overloadCidDeleteTrueButCidRefsExists() throws Exceptio String cid = objInfo.getCid(); // Set flag to true - fileHashStore.deleteObject(cid, true); + fileHashStore.deleteObject(fhsDeleteTypeCid, cid); // Get permanent address of the actual cid Path objRealPath = fileHashStore.getRealPath(pid, "object", null); @@ -1439,34 +1476,6 @@ public void deleteObject_overloadCidDeleteTrueButCidRefsExists() throws Exceptio } } - /** - * Confirm deleteObject overload method does not delete an object with a false bool - */ - @Test - public void deleteObject_overloadCidDeleteFalse() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String cid = objInfo.getCid(); - - // Set flag to true - fileHashStore.deleteObject(cid, false); - - // Get permanent address of the actual cid - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertTrue(Files.exists(objectStoreDirectory)); - } - } - /** * Confirm that deleteMetadata deletes metadata and empty sub directories */ diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 09a26f15..f17fa63e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -916,6 +916,62 @@ public void isPidInCidRefsFile_pidNotFound() throws Exception { } } + /** + * Confirm tryDeleteCidObject overload method does not delete an object if pid and cid + * refs files exist. + */ + @Test + public void tryDeleteCidObject_pidRefsExists() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + // Store object only + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.getCid(); + + // Set flag to true + fileHashStore.tryDeleteCidObject(cid); + + // Get permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, cid + ); + + Path objRealPath = storePath.resolve("objects").resolve(objShardString); + assertFalse(Files.exists(objRealPath)); + } + } + + /** + * Confirm tryDeleteCidObject overload method does not delete an object if a cid refs file + * exists (pids still referencing it). + */ + @Test + public void tryDeleteCidObject_cidRefsFileContainsPids() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + + // Set flag to true + fileHashStore.tryDeleteCidObject(cid); + + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getRealPath(pid, "object", null); + assertTrue(Files.exists(objRealPath)); + } + } + @Test public void getRealPath() throws Exception { // Get single test file to "upload" From b03c40c5e033732f9ef8c8b1895dfd9cf124855d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 14:17:56 -0800 Subject: [PATCH 193/553] Rename 'getRealPath' to 'getExpectedPath' and update junit tests --- .../filehashstore/FileHashStore.java | 40 +++++----- .../FileHashStoreInterfaceTest.java | 74 +++++++++---------- .../FileHashStoreProtectedTest.java | 14 ++-- .../FileHashStoreReferencesTest.java | 24 +++--- 4 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f8766f65..d349534a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -506,7 +506,7 @@ private ObjectMetadata syncPutObject( tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid - + ". Permanent address: " + getRealPath(pid, "object", null) + + ". Permanent address: " + getExpectedPath(pid, "object", null) ); return objInfo; @@ -698,8 +698,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); // Check that pid refs file doesn't exist yet if (Files.exists(absPidRefsPath)) { @@ -763,11 +763,11 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); // Get path of the pid references file - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); if (Files.exists(absPidRefsPath)) { String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { @@ -917,7 +917,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); + Path objRealPath = getExpectedPath(pid, "object", null); // Check to see if object exists if (!Files.exists(objRealPath)) { @@ -960,7 +960,7 @@ public InputStream retrieveMetadata(String pid, String formatId) FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", formatId); + Path metadataCidPath = getExpectedPath(pid, "metadata", formatId); // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { @@ -1004,7 +1004,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); + Path metadataCidPath = getExpectedPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { @@ -1069,7 +1069,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanPidRefsFileException oprfe) { // Delete the pid refs file and return, nothing else to delete. - Path absPidRefsPath = getRealPath(id, "refs", "pid"); + Path absPidRefsPath = getExpectedPath(id, "refs", "pid"); Files.delete(absPidRefsPath); String warnMsg = @@ -1080,7 +1080,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (PidNotFoundInCidRefsFileException pnficrfe) { // Delete pid refs file and return, nothing else to delete - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); Files.delete(absPidRefsPath); String warnMsg = @@ -1113,9 +1113,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti try { // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); + Path objRealPath = getExpectedPath(pid, "object", null); // Get the path to the cid refs file to work with - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); if (!Files.exists(objRealPath)) { // Throw exception if object doesn't exist @@ -1181,7 +1181,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", formatId); + Path metadataCidPath = getExpectedPath(pid, "metadata", formatId); if (!Files.exists(metadataCidPath)) { String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid @@ -1226,7 +1226,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE } else { // Get permanent address of the pid - Path objRealPath = getRealPath(pid, "object", null); + Path objRealPath = getExpectedPath(pid, "object", null); // Check to see if object exists if (!Files.exists(objRealPath)) { @@ -1739,7 +1739,7 @@ protected void move(File source, File target, String entity) throws IOException, * @throws NoSuchAlgorithmException */ protected void tryDeleteCidObject(String cid) throws IOException, NoSuchAlgorithmException { - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); if (Files.exists(absCidRefsPath)) { // The cid is referenced by pids, do not delete. return; @@ -1939,7 +1939,7 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO FileHashStoreUtility.ensureNotNull(pid, "pid", "deletePidRefsFile"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deletePidRefsFile"); - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); // Check to see if pid refs file exists if (!Files.exists(absPidRefsPath)) { String errMsg = @@ -2034,7 +2034,7 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) String metadataCid = FileHashStoreUtility.getPidHexDigest( pid + checkedFormatId, OBJECT_STORE_ALGORITHM ); - Path metadataCidPath = getRealPath(pid, "metadata", checkedFormatId); + Path metadataCidPath = getExpectedPath(pid, "metadata", checkedFormatId); // Store metadata to tmpMetadataFile File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( @@ -2102,7 +2102,7 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea * @throws NoSuchAlgorithmException If store algorithm is not supported * @throws IOException If unable to retrieve cid */ - protected Path getRealPath(String abId, String entity, String formatId) + protected Path getExpectedPath(String abId, String entity, String formatId) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; if (entity.equalsIgnoreCase("object")) { @@ -2138,14 +2138,14 @@ protected Path getRealPath(String abId, String entity, String formatId) realPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); } else { String errMsg = - "FileHashStore.getRealPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; + "FileHashStore.getExpectedPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } } else { throw new IllegalArgumentException( - "FileHashStore.getRealPath - entity must be 'object' or 'metadata'" + "FileHashStore.getExpectedPath - entity must be 'object' or 'metadata'" ); } return realPath; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 93f1622e..ca679138 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -302,7 +302,7 @@ public void storeObject_overloadInputStreamOnly() throws Exception { fileHashStore.findObject(pid); }); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertFalse(Files.exists(cidRefsFilePath)); } } @@ -341,7 +341,7 @@ public void storeObject_validateChecksumValue() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } @@ -490,7 +490,7 @@ public void storeObject_duplicate() throws Exception { ); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(fileHashStore.isPidInCidRefsFile(pid, absCidRefsPath)); assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); } @@ -525,7 +525,7 @@ public void storeObject_largeSparseFile() throws Exception { String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objCidAbsPath)); } @@ -577,10 +577,10 @@ public void storeObject_interruptProcess() throws Exception { /** * Tests that the `storeObject` method can store an object successfully with multiple threads * (5). This test uses five futures (threads) that run concurrently, all except one of which - * will encounter an `ExecutionException`. The thread that does not encounter an exception will + * will encounter an `RunTimeException`. The thread that does not encounter an exception will * store the given object, and verifies that the object is stored successfully. * - * The threads that run into exceptions will encounter a `RunTimeException` since the expected + * The threads are expected to encounter a `RunTimeException` since the expected * object to store is already in progress (thrown by `syncPutObject` which coordinates * `store_object` requests with a pid). */ @@ -602,9 +602,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -625,9 +625,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -644,9 +644,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -663,9 +663,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -682,9 +682,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -928,7 +928,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { // Confirm metadata file is written Path storePath = Paths.get(fhsProperties.getProperty("storePath")); String formatId = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataCidAbsPath = fileHashStore.getRealPath(pid, "metadata", formatId); + Path metadataCidAbsPath = fileHashStore.getExpectedPath(pid, "metadata", formatId); assertTrue(Files.exists(metadataCidAbsPath)); // Confirm there are only two files in HashStore - 'hashstore.yaml' and the @@ -1269,7 +1269,7 @@ public void deleteObject_Pid_objectDeleted() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); fileHashStore.deleteObject(fhsDeleteTypePid, pid); // Check that file doesn't exist @@ -1300,9 +1300,9 @@ public void deleteObject_Pid_referencesDeleted() throws Exception { ); String cid = objInfo.getCid(); - // Path objAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path absPathPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.deleteObject(fhsDeleteTypePid, pid); assertFalse(Files.exists(absPathPidRefsPath)); assertFalse(Files.exists(absPathCidRefsPath)); @@ -1328,9 +1328,9 @@ public void deleteObject_Pid_CidRefsFileNotEmptyObjectExistsStill() throws Excep String cid = objInfo.getCid(); fileHashStore.tagObject(pidExtra, cid); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path absPathPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.deleteObject(fhsDeleteTypePid, pid); assertFalse(Files.exists(absPathPidRefsPath)); @@ -1357,10 +1357,10 @@ public void deleteObject_Pid_pidOrphan() throws Exception { ); String cid = objInfo.getCid(); String pidExtra = "dou.test" + pid; - Path objRealPath = fileHashStore.getRealPath(pid, "object", null); + Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); // Manually change the pid found in the cid refs file - Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.updateCidRefsFiles(pidExtra, absPathCidRefsPath); // Create an orphaned pid refs file fileHashStore.deleteCidRefsPid(pid, absPathCidRefsPath); @@ -1370,7 +1370,7 @@ public void deleteObject_Pid_pidOrphan() throws Exception { // Confirm cid refs file still exists assertTrue(Files.exists(absPathCidRefsPath)); // Confirm the original (and now orphaned) pid refs file is deleted - Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path absPathPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertFalse(Files.exists(absPathPidRefsPath)); // Confirm the object has not been deleted assertTrue(Files.exists(objRealPath)); @@ -1471,7 +1471,7 @@ public void deleteObject_Cid_AndCidRefsExists() throws Exception { fileHashStore.deleteObject(fhsDeleteTypeCid, cid); // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getRealPath(pid, "object", null); + Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objRealPath)); } } @@ -1494,7 +1494,7 @@ public void deleteMetadata() throws Exception { fileHashStore.deleteMetadata(pid, storeFormatId); // Check that file doesn't exist - Path metadataCidPath = fileHashStore.getRealPath(pid, "metadata", storeFormatId); + Path metadataCidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); assertFalse(Files.exists(metadataCidPath)); // Check that parent directories are not deleted @@ -1525,7 +1525,7 @@ public void deleteMetadata_overload() throws Exception { // Check that file doesn't exist String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - Path metadataCidPath = fileHashStore.getRealPath(pid, "metadata", storeFormatId); + Path metadataCidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); assertFalse(Files.exists(metadataCidPath)); // Check that parent directories are not deleted @@ -1724,7 +1724,7 @@ public void findObject_cidRefsFileNotFound() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); Files.delete(cidRefsPath); assertThrows(OrphanPidRefsFileException.class, () -> { @@ -1743,7 +1743,7 @@ public void findObject_cidRefsFileMissingPid() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.deleteCidRefsPid(pid, cidRefsPath); assertThrows(PidNotFoundInCidRefsFileException.class, () -> { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index f17fa63e..9a951f62 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -891,7 +891,7 @@ public void isPidInCidRefsFile_pidFound() throws Exception { ); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); } } @@ -911,7 +911,7 @@ public void isPidInCidRefsFile_pidNotFound() throws Exception { ); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertFalse(fileHashStore.isPidInCidRefsFile("pid.not.found", absCidRefsPath)); } } @@ -967,13 +967,13 @@ public void tryDeleteCidObject_cidRefsFileContainsPids() throws Exception { fileHashStore.tryDeleteCidObject(cid); // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getRealPath(pid, "object", null); + Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objRealPath)); } } @Test - public void getRealPath() throws Exception { + public void getExpectedPath() throws Exception { // Get single test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); @@ -982,9 +982,9 @@ public void getRealPath() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 3b48adc7..64e26aff 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -76,7 +76,7 @@ public void tagObject_pidRefsFile() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertTrue(Files.exists(pidRefsFilePath)); } @@ -89,7 +89,7 @@ public void tagObject_cidRefsFile() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(cidRefsFilePath)); } @@ -120,12 +120,12 @@ public void tagObject_cidRefsFileExists() throws Exception { String pidAdditional = "another.pid.2"; fileHashStore.tagObject(pidAdditional, cid); - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertTrue(Files.exists(pidRefsFilePath)); // Check cid refs file - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); boolean pidFoundInCidRefFiles = fileHashStore.isPidInCidRefsFile( pidAdditional, cidRefsFilePath ); @@ -141,12 +141,12 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { String cid = "abcdef123456789"; File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); fileHashStore.tagObject(pid, cid); - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertTrue(Files.exists(pidRefsFilePath)); // Confirm that cid refs file only has 1 line @@ -195,7 +195,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertThrows(IOException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); @@ -217,7 +217,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); // Get path of the pid refs file - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertThrows(IOException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); @@ -234,7 +234,7 @@ public void updateCidRefsFiles_content() throws Exception { fileHashStore.tagObject(pid, cid); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); String pidAdditional = "dou.test.2"; fileHashStore.updateCidRefsFiles("dou.test.2", cidRefsFilePath); @@ -265,7 +265,7 @@ public void deletePidRefsFile_fileDeleted() throws Exception { fileHashStore.deletePidRefsFile(pid); - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertFalse(Files.exists(pidRefsFilePath)); } @@ -292,7 +292,7 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); assertFalse(fileHashStore.isPidInCidRefsFile(pid, cidRefsFilePath)); @@ -309,7 +309,7 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { fileHashStore.tagObject(pid, cid); String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); fileHashStore.deleteCidRefsPid(pidAdditional, cidRefsFilePath); From 3a2e16e6039374111b751e4d3e2b475156a5b2f0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 16:19:08 -0800 Subject: [PATCH 194/553] Refactor 'writePidRefsFile' and 'writeCidRefsFile' into 'writeRefsFile' and update junit tests --- .../filehashstore/FileHashStore.java | 68 +++++++------------ .../FileHashStoreReferencesTest.java | 24 ++----- 2 files changed, 29 insertions(+), 63 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d349534a..6dde2c7c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -174,6 +174,7 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); + // TODO: Create formatId-namespace tracking document logFileHashStore.debug("FileHashStore - Created store and store tmp directories."); } catch (IOException ioe) { @@ -715,7 +716,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi updateCidRefsFiles(pid, absCidRefsPath); } // Get the pid refs file - File pidRefsTmpFile = writePidRefsFile(cid); + File pidRefsTmpFile = writeRefsFile(cid, "pid"); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); // Verify tagging process, this throws exceptions if there's an issue @@ -728,8 +729,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } else { // Get pid and cid refs files - File pidRefsTmpFile = writePidRefsFile(cid); - File cidRefsTmpFile = writeCidRefsFile(pid); + File pidRefsTmpFile = writeRefsFile(cid, "pid"); + File cidRefsTmpFile = writeRefsFile(pid, "cid"); // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); File absPathCidRefsFile = absCidRefsPath.toFile(); @@ -868,6 +869,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF "FileHashStore.storeMetadata - Metadata stored for pid: " + pid + ". Metadata Content Identifier (metadataCid): " + metadataCid ); + // TODO: Save formatId if it doesn't already exist return metadataCid; } catch (IOException ioe) { @@ -1164,7 +1166,12 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti public void deleteObjectAll(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { - // TODO + // First, delete object as expected normally + deleteObject("pid", pid); + // TODO: + // Then look for and remove all related sysmeta + // Open metadata reference file, read all the format types + // Call 'delete_metadata(pid, formatId)' for all types return; } @@ -1811,63 +1818,34 @@ protected void verifyHashStoreRefsFiles( } } + /** - * Writes the given 'pid' into a file in the 'cid' refs file format, which consists of - * multiple pids that references a 'cid' each on its own line (delimited by "\n"). + * Writes the given string into a temporary file. The client must explicitly move this file to + * where belongs otherwise it will be removed during garbage collection. * - * @param pid Authority-based or persistent identifier to write - * @throws IOException Failure to write pid refs file + * @param ref Authority-based or persistent identifier to write + * @param refType Type of reference 'pid', 'cid' or 'sysmeta' + * @throws IOException Failure to write refs file + * @return File object with single reference */ - protected File writeCidRefsFile(String pid) throws IOException { + protected File writeRefsFile(String ref, String refType) throws IOException { File cidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter( Files.newOutputStream(cidRefsTmpFile.toPath()), StandardCharsets.UTF_8 ) )) { - writer.write(pid); + writer.write(ref); writer.close(); logFileHashStore.debug( - "FileHashStore.writeCidRefsFile - cid refs file written for: " + pid + "FileHashStore.writeRefsFile - " + refType + " refs file written for: " + ref ); return cidRefsTmpFile; } catch (IOException ioe) { - logFileHashStore.error( - "FileHashStore.writeCidRefsFile - Unable to write cid refs file for pid: " + pid - + " IOException: " + ioe.getMessage() - ); - throw ioe; - } - } - - /** - * Writes the given 'cid' into a file in the 'pid' refs file format. A pid refs file - * contains a single 'cid'. Note, a 'pid' can only ever reference one 'cid'. - * - * @param cid Content identifier to write - * @throws IOException Failure to write pid refs file - */ - protected File writePidRefsFile(String cid) throws IOException { - File pidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(pidRefsTmpFile.toPath()), StandardCharsets.UTF_8 - ) - )) { - writer.write(cid); - writer.close(); - - logFileHashStore.debug( - "FileHashStore.writePidRefsFile - pid refs file written for: " + cid - ); - return pidRefsTmpFile; - - } catch (IOException ioe) { - String errMsg = - "FileHashStore.writePidRefsFile - Unable to write pid refs file for cid: " + cid - + " IOException: " + ioe.getMessage(); + String errMsg = "FileHashStore.writeRefsFile - Unable to write refs file for ref: " + + refType + " IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 64e26aff..b2f6a4c8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -140,7 +140,7 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pid); + File cidRefsTmpFile = fileHashStore.writeRefsFile(pid, "cid"); Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); @@ -160,26 +160,14 @@ public void tagObject_pidExistsInCidRefsFile() throws Exception { * Check that the cid supplied is written into the file given */ @Test - public void writePidRefsFile_content() throws Exception { + public void writeRefsFile_content() throws Exception { String cidToWrite = "test_cid_123"; - File pidRefsTmpFile = fileHashStore.writePidRefsFile(cidToWrite); + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); assertEquals(cidRead, cidToWrite); } - /** - * Check that the pid supplied is written into the file given with a new line - */ - @Test - public void writeCidRefsFile_content() throws Exception { - String pidToWrite = "dou.test.123"; - File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pidToWrite); - - String pidRead = new String(Files.readAllBytes(cidRefsTmpFile.toPath())); - assertEquals(pidRead, pidToWrite); - } - /** * Check that exception is thrown when incorrect cid in a pid refs file. */ @@ -191,7 +179,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { // Create a pid refs file with the incorrect cid String cidToWrite = "123456789abcdef"; - File pidRefsTmpFile = fileHashStore.writePidRefsFile(cidToWrite); + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file @@ -212,8 +200,8 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception fileHashStore.tagObject(pid, cid); // Create a cid refs file with a different pid from the one that is expected - String cidToWrite = "dou.test.2"; - File cidRefsTmpFile = fileHashStore.writeCidRefsFile(cidToWrite); + String pidToWrite = "dou.test.2"; + File cidRefsTmpFile = fileHashStore.writeRefsFile(pidToWrite, "cid"); Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); // Get path of the pid refs file From cbf8dd53e80b590a9fe921fd776e4f0fa317f244 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 16:23:22 -0800 Subject: [PATCH 195/553] Rename 'isPidInCidRefsFile' method to 'isStringInRefsFile' and update junit tests --- .../filehashstore/FileHashStore.java | 22 +++++++++---------- .../FileHashStoreInterfaceTest.java | 4 ++-- .../FileHashStoreProtectedTest.java | 12 +++++----- .../FileHashStoreReferencesTest.java | 4 ++-- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6dde2c7c..e99930a8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -711,7 +711,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } else if (Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); + boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { updateCidRefsFiles(pid, absCidRefsPath); } @@ -779,7 +779,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio throw new OrphanPidRefsFileException(errMsg); } // If the pid is found in the expected cid refs file, return it - if (isPidInCidRefsFile(pid, absCidRefsPath)) { + if (isStringInRefsFile(pid, absCidRefsPath)) { logFileHashStore.info( "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid ); @@ -1804,7 +1804,7 @@ protected void verifyHashStoreRefsFiles( logFileHashStore.error(errMsg); throw new IOException(errMsg); } - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); + boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + pid + " in cid refs file: " + absCidRefsPath; @@ -1854,21 +1854,21 @@ protected File writeRefsFile(String ref, String refType) throws IOException { /** * Checks a given cid refs file for a pid. This is case-sensitive. * - * @param pid Authority-based or persistent identifier to search - * @param absCidRefsPath Path to the cid refs file to check + * @param ref Authority-based or persistent identifier to search + * @param absRefsPath Path to the refs file to check * @return True if cid is found, false otherwise * @throws IOException If unable to read the cid refs file. */ - protected boolean isPidInCidRefsFile(String pid, Path absCidRefsPath) throws IOException { - List lines = Files.readAllLines(absCidRefsPath); - boolean pidFoundInCidRefFiles = false; + protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOException { + List lines = Files.readAllLines(absRefsPath); + boolean refFoundInCidRefFiles = false; for (String line : lines) { - if (line.equals(pid)) { - pidFoundInCidRefFiles = true; + if (line.equals(ref)) { + refFoundInCidRefFiles = true; break; } } - return pidFoundInCidRefFiles; + return refFoundInCidRefFiles; } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index ca679138..de66202f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -491,8 +491,8 @@ public void storeObject_duplicate() throws Exception { String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - assertTrue(fileHashStore.isPidInCidRefsFile(pid, absCidRefsPath)); - assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); + assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); + assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 9a951f62..2d470f6b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -873,10 +873,10 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { } /** - * Confirm that isPidInCidRefsFile returns true when pid is found + * Confirm that isStringInRefsFile returns true when pid is found */ @Test - public void isPidInCidRefsFile_pidFound() throws Exception { + public void isStringInRefsFile_pidFound() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -892,15 +892,15 @@ public void isPidInCidRefsFile_pidFound() throws Exception { String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); + assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); } } /** - * Confirm that isPidInCidRefsFile returns false when pid is found + * Confirm that isStringInRefsFile returns false when pid is found */ @Test - public void isPidInCidRefsFile_pidNotFound() throws Exception { + public void isStringInRefsFile_pidNotFound() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -912,7 +912,7 @@ public void isPidInCidRefsFile_pidNotFound() throws Exception { String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - assertFalse(fileHashStore.isPidInCidRefsFile("pid.not.found", absCidRefsPath)); + assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index b2f6a4c8..83db6a6b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -126,7 +126,7 @@ public void tagObject_cidRefsFileExists() throws Exception { // Check cid refs file Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - boolean pidFoundInCidRefFiles = fileHashStore.isPidInCidRefsFile( + boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( pidAdditional, cidRefsFilePath ); assertTrue(pidFoundInCidRefFiles); @@ -283,7 +283,7 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); - assertFalse(fileHashStore.isPidInCidRefsFile(pid, cidRefsFilePath)); + assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); } /** From f45f691ebc55b09f3f80313ec42ca9804ed01d4f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 16:27:44 -0800 Subject: [PATCH 196/553] Rename 'updateCidRefsFiles' method to 'updateRefsFile' and update junit tests --- .../filehashstore/FileHashStore.java | 26 +++++++++---------- .../FileHashStoreInterfaceTest.java | 2 +- .../FileHashStoreReferencesTest.java | 4 +-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e99930a8..8e8a4c5d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -713,7 +713,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Only update cid refs file if pid is not in the file boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { - updateCidRefsFiles(pid, absCidRefsPath); + updateRefsFile(pid, absCidRefsPath); } // Get the pid refs file File pidRefsTmpFile = writeRefsFile(cid, "pid"); @@ -1852,7 +1852,7 @@ protected File writeRefsFile(String ref, String refType) throws IOException { } /** - * Checks a given cid refs file for a pid. This is case-sensitive. + * Checks a given refs file for a ref. This is case-sensitive. * * @param ref Authority-based or persistent identifier to search * @param absRefsPath Path to the refs file to check @@ -1872,13 +1872,13 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce } /** - * Updates a cid refs file with a pid that references the cid + * Updates a refs file with a pid that references the cid * - * @param pid Authority-based or persistent identifier - * @param absCidRefsPath Path to the cid refs file to update + * @param ref Authority-based or persistent identifier + * @param absCidRefsPath Path to the refs file to update * @throws IOException Issue with updating a cid refs file */ - protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOException { + protected void updateRefsFile(String ref, Path absRefsPath) throws IOException { // This update process is atomic, so we first write the updated content // into a temporary file before overwriting it. File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); @@ -1886,21 +1886,21 @@ protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOExce try { // Obtain a lock on the file before updating it try (FileChannel channel = FileChannel.open( - absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE + absRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE ); FileLock ignored = channel.lock()) { - List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); - lines.add(pid); + List lines = new ArrayList<>(Files.readAllLines(absRefsPath)); + lines.add(ref); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absCidRefsPath.toFile(), "refs"); + move(tmpFile, absRefsPath.toFile(), "refs"); logFileHashStore.debug( - "FileHashStore.updateCidRefsFiles - Pid: " + pid - + " has been added to cid refs file: " + absCidRefsPath + "FileHashStore.updateRefsFile - Pid: " + ref + + " has been added to cid refs file: " + absRefsPath ); } // The lock is automatically released when the try block exits } catch (IOException ioe) { - String errMsg = "FileHashStore.updateCidRefsFiles - " + ioe.getMessage(); + String errMsg = "FileHashStore.updateRefsFile - " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index de66202f..66117ce5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1361,7 +1361,7 @@ public void deleteObject_Pid_pidOrphan() throws Exception { // Manually change the pid found in the cid refs file Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - fileHashStore.updateCidRefsFiles(pidExtra, absPathCidRefsPath); + fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath); // Create an orphaned pid refs file fileHashStore.deleteCidRefsPid(pid, absPathCidRefsPath); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 83db6a6b..8d747777 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -216,7 +216,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception * Confirm that cid refs file has been updated successfully */ @Test - public void updateCidRefsFiles_content() throws Exception { + public void updateRefsFile_content() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); @@ -225,7 +225,7 @@ public void updateCidRefsFiles_content() throws Exception { Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); String pidAdditional = "dou.test.2"; - fileHashStore.updateCidRefsFiles("dou.test.2", cidRefsFilePath); + fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath); List lines = Files.readAllLines(cidRefsFilePath); boolean pidOriginal_foundInCidRefFiles = false; From 7b230e2cfe96258a588b51a44434676441cc0cd2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 16:38:51 -0800 Subject: [PATCH 197/553] Absorb 'deleteCidRefsPid' method functionality into 'updateRefsFile' method and update junit tests --- .../filehashstore/FileHashStore.java | 72 +++++++------------ .../FileHashStoreInterfaceTest.java | 6 +- .../FileHashStoreReferencesTest.java | 8 +-- 3 files changed, 32 insertions(+), 54 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8e8a4c5d..59f51aac 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -713,7 +713,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Only update cid refs file if pid is not in the file boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { - updateRefsFile(pid, absCidRefsPath); + updateRefsFile(pid, absCidRefsPath, "add"); } // Get the pid refs file File pidRefsTmpFile = writeRefsFile(cid, "pid"); @@ -1131,7 +1131,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Delete pid reference file deletePidRefsFile(pid); // Remove pid from cid refs file - deleteCidRefsPid(pid, absCidRefsPath); + updateRefsFile(pid, absCidRefsPath, "remove"); // Delete obj and cid refs file only if the cid refs file is empty if (Files.size(absCidRefsPath) == 0) { // Delete empty cid refs file @@ -1878,25 +1878,40 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce * @param absCidRefsPath Path to the refs file to update * @throws IOException Issue with updating a cid refs file */ - protected void updateRefsFile(String ref, Path absRefsPath) throws IOException { + protected void updateRefsFile(String ref, Path absRefsPath, String updateType) + throws IOException { // This update process is atomic, so we first write the updated content // into a temporary file before overwriting it. File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); Path tmpFilePath = tmpFile.toPath(); + try { // Obtain a lock on the file before updating it try (FileChannel channel = FileChannel.open( absRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE ); FileLock ignored = channel.lock()) { List lines = new ArrayList<>(Files.readAllLines(absRefsPath)); - lines.add(ref); - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absRefsPath.toFile(), "refs"); - logFileHashStore.debug( - "FileHashStore.updateRefsFile - Pid: " + ref - + " has been added to cid refs file: " + absRefsPath - ); + if (updateType.equals("add")) { + lines.add(ref); + + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absRefsPath.toFile(), "refs"); + logFileHashStore.debug( + "FileHashStore.updateRefsFile - Ref: " + ref + + " has been added to refs file: " + absRefsPath + ); + } + + if (updateType.equals("remove")) { + lines.remove(ref); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absRefsPath.toFile(), "refs"); + logFileHashStore.debug( + "FileHashStore.updateRefsFile - Ref: " + ref + + " has been removed from refs file: " + absRefsPath + ); + } } // The lock is automatically released when the try block exits } catch (IOException ioe) { @@ -1936,43 +1951,6 @@ protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IO } } - - /** - * Removes a pid from a cid refs file. - * - * @param pid Authority-based or persistent identifier. - * @param absCidRefsPath Path to the cid refs file to remove the pid from - * @throws IOException Unable to access cid refs file - */ - protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteCidRefsPid"); - FileHashStoreUtility.ensureNotNull(absCidRefsPath, "absCidRefsPath", "deleteCidRefsPid"); - // This deletes process is atomic, so we first write the updated content - // into a temporary file before overwriting it. - File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - Path tmpFilePath = tmpFile.toPath(); - try (FileChannel channel = FileChannel.open( - absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock ignored = channel.lock()) { - // Read all lines into a List - List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); - lines.remove(pid); - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absCidRefsPath.toFile(), "refs"); - logFileHashStore.debug( - "FileHashStore.deleteCidRefsPid - Pid: " + pid + " removed from cid refs file: " - + absCidRefsPath - ); - // The lock is automatically released when the try block exits - } catch (IOException ioe) { - String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid - + " from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - } - /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 66117ce5..6d74d8d1 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1361,9 +1361,9 @@ public void deleteObject_Pid_pidOrphan() throws Exception { // Manually change the pid found in the cid refs file Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath); + fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, "add"); // Create an orphaned pid refs file - fileHashStore.deleteCidRefsPid(pid, absPathCidRefsPath); + fileHashStore.updateRefsFile(pid, absPathCidRefsPath, "remove"); fileHashStore.deleteObject(fhsDeleteTypePid, pid); @@ -1744,7 +1744,7 @@ public void findObject_cidRefsFileMissingPid() throws Exception { fileHashStore.tagObject(pid, cid); Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pid, cidRefsPath); + fileHashStore.updateRefsFile(pid, cidRefsPath, "remove"); assertThrows(PidNotFoundInCidRefsFileException.class, () -> { fileHashStore.findObject(pid); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 8d747777..3cb217fc 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -225,7 +225,7 @@ public void updateRefsFile_content() throws Exception { Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); String pidAdditional = "dou.test.2"; - fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath); + fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "add"); List lines = Files.readAllLines(cidRefsFilePath); boolean pidOriginal_foundInCidRefFiles = false; @@ -281,7 +281,7 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { fileHashStore.tagObject(pidAdditional, cid); Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); } @@ -299,8 +299,8 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { fileHashStore.tagObject(pidAdditional, cid); Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); - fileHashStore.deleteCidRefsPid(pidAdditional, cidRefsFilePath); + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "remove"); assertTrue(Files.exists(cidRefsFilePath)); assertTrue(Files.size(cidRefsFilePath) == 0); From 59b6b42aeeed3da30193878e6ed9d24a7b5e1f74 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 16:49:22 -0800 Subject: [PATCH 198/553] Rename 'deletePidRefsFile' to 'deleteRefsFile', refactor where relevant and update junit tests --- .../filehashstore/FileHashStore.java | 36 +++++++++---------- .../FileHashStoreReferencesTest.java | 11 +++--- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 59f51aac..08cef9e6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1129,7 +1129,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } else { // Proceed to delete the reference files and object // Delete pid reference file - deletePidRefsFile(pid); + Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + deleteRefsFile(absPidRefsPath); // Remove pid from cid refs file updateRefsFile(pid, absCidRefsPath, "remove"); // Delete obj and cid refs file only if the cid refs file is empty @@ -1820,8 +1821,8 @@ protected void verifyHashStoreRefsFiles( /** - * Writes the given string into a temporary file. The client must explicitly move this file to - * where belongs otherwise it will be removed during garbage collection. + * Writes the given ref into a temporary file. The client must explicitly move this file to + * where it belongs otherwise it will be removed during garbage collection. * * @param ref Authority-based or persistent identifier to write * @param refType Type of reference 'pid', 'cid' or 'sysmeta' @@ -1872,10 +1873,11 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce } /** - * Updates a refs file with a pid that references the cid + * Adds or removes a ref value from a refs file given an 'updateType' * - * @param ref Authority-based or persistent identifier - * @param absCidRefsPath Path to the refs file to update + * @param ref Authority-based or persistent identifier + * @param absRefsPath Path to the refs file to update + * @param updateType "add" or "remove" * @throws IOException Issue with updating a cid refs file */ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) @@ -1922,31 +1924,25 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) } /** - * Deletes a pid references file + * Deletes a references file * - * @param pid Authority-based or persistent identifier + * @param absRefsPath Path to the refs file to delete * @throws NoSuchAlgorithmException Incompatible algorithm used to find pid refs file * @throws IOException Unable to delete object or open pid refs file */ - protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IOException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "deletePidRefsFile"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deletePidRefsFile"); - - Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + protected void deleteRefsFile(Path absRefsPath) throws NoSuchAlgorithmException, IOException { // Check to see if pid refs file exists - if (!Files.exists(absPidRefsPath)) { - String errMsg = - "FileHashStore.deletePidRefsFile - File refs file does not exist for pid: " + pid - + " with address: " + absPidRefsPath; + if (!Files.exists(absRefsPath)) { + String errMsg = "FileHashStore.deleteRefsFile - Refs file does not exist at: " + + absRefsPath; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } else { // Proceed to delete - Files.delete(absPidRefsPath); + Files.delete(absRefsPath); logFileHashStore.debug( - "FileHashStore.deletePidRefsFile - Pid refs file deleted for: " + pid - + " with address: " + absPidRefsPath + "FileHashStore.deleteRefsFile - Refs file deleted at: " + absRefsPath ); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 3cb217fc..21e723d9 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -243,17 +243,17 @@ public void updateRefsFile_content() throws Exception { } /** - * Check that deletePidRefsFile deletes file + * Check that deleteRefsFile deletes file */ @Test - public void deletePidRefsFile_fileDeleted() throws Exception { + public void deleteRefsFile_fileDeleted() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - fileHashStore.deletePidRefsFile(pid); - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + fileHashStore.deleteRefsFile(pidRefsFilePath); + assertFalse(Files.exists(pidRefsFilePath)); } @@ -265,7 +265,8 @@ public void deletePidRefsFile_missingPidRefsFile() { String pid = "dou.test.1"; assertThrows(FileNotFoundException.class, () -> { - fileHashStore.deletePidRefsFile(pid); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + fileHashStore.deleteRefsFile(pidRefsFilePath); }); } From 4dd1bb00e59a43166f1f1c89d001c92240b02455 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 16:58:18 -0800 Subject: [PATCH 199/553] Rename 'tryDeleteCidObject' method to 'deleteObjectByCid', add missing logging statements and update junit tests for clarity --- .../filehashstore/FileHashStore.java | 10 ++++++++-- .../FileHashStoreProtectedTest.java | 19 +++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 08cef9e6..33767905 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1059,7 +1059,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // If 'idType' is cid, attempt to delete the object if (idType.equals(HashStoreIdTypes.cid.getName("cid"))) { - tryDeleteCidObject(id); + deleteObjectByCid(id); } else { // Else 'idType' is pid @@ -1746,9 +1746,12 @@ protected void move(File source, File target, String entity) throws IOException, * @throws IOException * @throws NoSuchAlgorithmException */ - protected void tryDeleteCidObject(String cid) throws IOException, NoSuchAlgorithmException { + protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException { Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); if (Files.exists(absCidRefsPath)) { + String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" + + " references, skipping deletion."; + logFileHashStore.warn(warnMsg); // The cid is referenced by pids, do not delete. return; @@ -1763,6 +1766,9 @@ protected void tryDeleteCidObject(String cid) throws IOException, NoSuchAlgorith if (Files.exists(expectedRealPath)) { Files.delete(expectedRealPath); } + String debugMsg = "FileHashStore - deleteObjectByCid: object deleted at" + + expectedRealPath; + logFileHashStore.debug(debugMsg); return; } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 2d470f6b..a56a6559 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -917,11 +917,10 @@ public void isStringInRefsFile_pidNotFound() throws Exception { } /** - * Confirm tryDeleteCidObject overload method does not delete an object if pid and cid - * refs files exist. + * Confirm deleteObjectByCid method deletes object when there are no references. */ @Test - public void tryDeleteCidObject_pidRefsExists() throws Exception { + public void deleteObjectByCid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -931,8 +930,8 @@ public void tryDeleteCidObject_pidRefsExists() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); String cid = objInfo.getCid(); - // Set flag to true - fileHashStore.tryDeleteCidObject(cid); + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); // Get permanent address of the actual cid Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -948,11 +947,11 @@ public void tryDeleteCidObject_pidRefsExists() throws Exception { } /** - * Confirm tryDeleteCidObject overload method does not delete an object if a cid refs file - * exists (pids still referencing it). + * Confirm deleteObjectByCid method does not delete an object if a cid refs file + * exists (pids still referencing the cid). */ @Test - public void tryDeleteCidObject_cidRefsFileContainsPids() throws Exception { + public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -963,8 +962,8 @@ public void tryDeleteCidObject_cidRefsFileContainsPids() throws Exception { ); String cid = objInfo.getCid(); - // Set flag to true - fileHashStore.tryDeleteCidObject(cid); + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); // Get permanent address of the actual cid Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); From ae349c4d170926fec19a344422f212af457d28db Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jan 2024 17:02:11 -0800 Subject: [PATCH 200/553] Clean up javadocs for revised refs related methods --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 33767905..1da54789 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1884,7 +1884,7 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce * @param ref Authority-based or persistent identifier * @param absRefsPath Path to the refs file to update * @param updateType "add" or "remove" - * @throws IOException Issue with updating a cid refs file + * @throws IOException Issue with updating or accessing a refs file */ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) throws IOException { @@ -1902,7 +1902,6 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) if (updateType.equals("add")) { lines.add(ref); - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); logFileHashStore.debug( @@ -1930,13 +1929,12 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) } /** - * Deletes a references file + * Deletes a references file at the given path * * @param absRefsPath Path to the refs file to delete - * @throws NoSuchAlgorithmException Incompatible algorithm used to find pid refs file - * @throws IOException Unable to delete object or open pid refs file + * @throws IOException Unable to delete object or open pid refs file */ - protected void deleteRefsFile(Path absRefsPath) throws NoSuchAlgorithmException, IOException { + protected void deleteRefsFile(Path absRefsPath) throws IOException { // Check to see if pid refs file exists if (!Files.exists(absRefsPath)) { String errMsg = "FileHashStore.deleteRefsFile - Refs file does not exist at: " From c6a0eaaaf95a2bb60fef4ecb84eb310026ce5bfc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 25 Jan 2024 12:12:26 -0800 Subject: [PATCH 201/553] Update README.md --- README.md | 6 +++--- .../FileHashStoreInterfaceTest.java | 6 +++--- .../filehashstore/FileHashStorePublicTest.java | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2bb46774..7acf99f8 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,8 @@ tagObject(pid, cid) - To retrieve an object, call the Public API method `retrieveObject` which opens a stream to the object if it exists. **How do I find an object or check that it exists if I have the pid?** -- To find the location of the object, call the Public API method `findObject` which will return the content identifier (cid) of the object. -- This cid can then be used to locate the object on disk by following HashStore's store configuration. +- To check if an object exists, call the Public API method `findObject` which will return the content identifier (cid) of the object if it exists. +- If desired, this cid can then be used to locate the object on disk by following HashStore's store configuration. **How do I delete an object if I have the pid?** - To delete an object, call the Public API method `deleteObject` which will delete the object and its associated references and reference files where relevant. @@ -133,7 +133,7 @@ These reference files are implemented in HashStore underneath the hood with no e ###### What does HashStore look like? -``` +```sh # Example layout in HashStore with a single file stored along with its metadata and reference files. # This uses a store depth of 3, with a width of 2 and "SHA-256" as its default store algorithm ## Notes: diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 6d74d8d1..d701b10c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -931,11 +931,11 @@ public void storeMetadata_metadataLockedIds() throws Exception { Path metadataCidAbsPath = fileHashStore.getExpectedPath(pid, "metadata", formatId); assertTrue(Files.exists(metadataCidAbsPath)); - // Confirm there are only two files in HashStore - 'hashstore.yaml' and the - // metadata file written + // Confirm there are only three files in HashStore - 'hashstore.yaml', the metadata file written + // and the metadata refs file that contains namespaces used try (Stream walk = Files.walk(storePath)) { long fileCount = walk.filter(Files::isRegularFile).count(); - assertEquals(fileCount, 2); + assertEquals(fileCount, 3); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 05b0fa42..2fb32e60 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -283,6 +283,22 @@ public void initRefsDirectories() { assertTrue(Files.isDirectory(refsCidPath)); } + /** + * Check metadata namespace refs file has been created and contains expected default namespace + */ + @Test + public void initMetadataRefsFile() throws Exception { + Path metadataRefsPath = rootDirectory.resolve("refs").resolve( + "metadata/formatid_namespaces" + ); + assertTrue(Files.exists(metadataRefsPath)); + + HashMap hsProperties = fileHashStore.loadHashStoreYaml(rootDirectory); + String defaultMetadataNamespace = (String) hsProperties.get("storeMetadataNamespace"); + assertTrue(fileHashStore.isStringInRefsFile(defaultMetadataNamespace, metadataRefsPath)); + + } + /** * Check that a HashStore configuration file is written and exists */ From a5718d50927eeb72bbbdf29094d6f569569f147c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 25 Jan 2024 12:23:48 -0800 Subject: [PATCH 202/553] Remove junit tests for rolled back metadata features --- .../FileHashStoreInterfaceTest.java | 2 +- .../filehashstore/FileHashStorePublicTest.java | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index d701b10c..5be3be7d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -935,7 +935,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { // and the metadata refs file that contains namespaces used try (Stream walk = Files.walk(storePath)) { long fileCount = walk.filter(Files::isRegularFile).count(); - assertEquals(fileCount, 3); + assertEquals(fileCount, 2); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 2fb32e60..05b0fa42 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -283,22 +283,6 @@ public void initRefsDirectories() { assertTrue(Files.isDirectory(refsCidPath)); } - /** - * Check metadata namespace refs file has been created and contains expected default namespace - */ - @Test - public void initMetadataRefsFile() throws Exception { - Path metadataRefsPath = rootDirectory.resolve("refs").resolve( - "metadata/formatid_namespaces" - ); - assertTrue(Files.exists(metadataRefsPath)); - - HashMap hsProperties = fileHashStore.loadHashStoreYaml(rootDirectory); - String defaultMetadataNamespace = (String) hsProperties.get("storeMetadataNamespace"); - assertTrue(fileHashStore.isStringInRefsFile(defaultMetadataNamespace, metadataRefsPath)); - - } - /** * Check that a HashStore configuration file is written and exists */ From c45f9840eea3866e4a85407e8c2f1fe7cceb30b5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jan 2024 11:12:27 -0800 Subject: [PATCH 203/553] Add new custom exception class 'OrphanRefsFilesException', refactor 'findObject' and 'deleteObject' method and update junit tests --- .../exceptions/OrphanRefsFilesException.java | 14 ++++++ .../filehashstore/FileHashStore.java | 43 +++++++++++++++-- .../FileHashStoreInterfaceTest.java | 46 +++++++++++++++++-- 3 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java b/src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java new file mode 100644 index 00000000..e8d4cb6b --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +/** + * Custom exception class for FileHashStore when both a pid and cid reference file is found + * but object does not exist. + */ +public class OrphanRefsFilesException extends IOException { + public OrphanRefsFilesException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1da54789..cf94a974 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -37,6 +37,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -778,12 +779,25 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio logFileHashStore.error(errMsg); throw new OrphanPidRefsFileException(errMsg); } - // If the pid is found in the expected cid refs file, return it + // If the pid is found in the expected cid refs file, and the object exists, return it if (isStringInRefsFile(pid, absCidRefsPath)) { logFileHashStore.info( "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid ); - return cid; + + String objShardString = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid + ); + Path realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + if (Files.exists(realPath)) { + return cid; + + } else { + String errMsg = "FileHashStore.findObject - Object with cid: " + cid + + " does not exist, but pid and cid reference file found for pid: " + pid; + logFileHashStore.error(errMsg); + throw new OrphanRefsFilesException(errMsg); + } } else { String errMsg = "FileHashStore.deleteObject - Pid refs file exists, but pid (" + pid @@ -797,7 +811,6 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid + ". Pid refs file does not exist at: " + absPidRefsPath; logFileHashStore.error(errMsg); - // Create custom exception class throw new FileNotFoundException(errMsg); } } @@ -1057,7 +1070,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti throw new IllegalArgumentException(errMsg); } - // If 'idType' is cid, attempt to delete the object + // If 'idType' is cid, attempt to delete the object directly if (idType.equals(HashStoreIdTypes.cid.getName("cid"))) { deleteObjectByCid(id); @@ -1070,7 +1083,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti cid = findObject(id); } catch (OrphanPidRefsFileException oprfe) { - // Delete the pid refs file and return, nothing else to delete. + // Delete the pid refs file and return, nothing else to delete Path absPidRefsPath = getExpectedPath(id, "refs", "pid"); Files.delete(absPidRefsPath); @@ -1080,6 +1093,26 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti logFileHashStore.warn(warnMsg); return; + } catch (OrphanRefsFilesException orfe) { + // Object does not exist, attempt to remove orphan files + // Remove pid refs file + Path absPidRefsPath = getExpectedPath(id, "refs", "pid"); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + Files.delete(absPidRefsPath); + + // Remove the pid from the cid refs file + Path absCidRefsPath = getExpectedPath(cidRead, "refs", "cid"); + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete the cid reference file if it's now empty + if (Files.size(absCidRefsPath) == 0) { + Files.delete(absCidRefsPath); + } + String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted orphan files."; + logFileHashStore.warn(warnMsg); + return; + } catch (PidNotFoundInCidRefsFileException pnficrfe) { // Delete pid refs file and return, nothing else to delete Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 5be3be7d..2d96e07a 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -31,6 +31,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; @@ -1379,6 +1380,23 @@ public void deleteObject_Pid_pidOrphan() throws Exception { } } + /** + * Confirm deleteObject removes pid and cid refs orphan files + */ + @Test + public void deleteObject_orphanRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path absPathCidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path absPathPidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + + fileHashStore.deleteObject("pid", pid); + assertFalse(Files.exists(absPathCidRefsPath)); + assertFalse(Files.exists(absPathPidRefsPath)); + } + /** * Confirm that deleteObject throws exception when associated pid obj not found */ @@ -1701,17 +1719,37 @@ public void getHexDigest_badAlgo() { } /** - * Confirm expected cid is returned + * Check that findObject returns cid as expected. */ @Test public void findObject_cid() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + + String cidRetrieved = fileHashStore.findObject(pid); + assertEquals(cidRetrieved, objInfo.getCid()); + } + } + + /** + * Confirm findObject throws exception when cid object does not exist but reference + * files exist. + */ + @Test + public void findObject_refsFileExistButObjectDoesNot() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - String cidRetrieved = fileHashStore.findObject(pid); - - assertEquals(cid, cidRetrieved); + assertThrows(OrphanRefsFilesException.class, () -> { + fileHashStore.findObject(pid); + }); } /** From 3f890defbf8a90f5e2b75dc90ac43c4e58e515a7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 30 Jan 2024 14:42:18 -0800 Subject: [PATCH 204/553] Refactor 'storeMetadata' to store metadata docs in a dir formed by calculating the hash of the 'pid' - with the doc name being the hash of the 'formatId', and update junit tests and documentation --- README.md | 4 +- .../java/org/dataone/hashstore/HashStore.java | 2 +- .../filehashstore/FileHashStore.java | 41 ++++++----- .../hashstore/HashStoreClientTest.java | 23 +++++- .../FileHashStoreInterfaceTest.java | 73 ++++++++++--------- .../FileHashStoreProtectedTest.java | 15 ++-- 6 files changed, 91 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 7acf99f8..22ce822e 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ tagObject(pid, cid) ###### Working with metadata (store, retrieve, delete) -HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. To differentiate between metadata documents for a given object, HashStore includes the 'formatId' (format or namespace of the metadata) when generating the address of the metadata document to store (the hash of the 'pid' + 'formatId'). By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). +HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata documents related to a 'pid' are stored in a directory determined by calculating the hash of the pid (based on the store's algorithm). Each specific metadata document is then stored by calculating the hash of its associated `formatId`. By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). **How do I retrieve a metadata file?** - To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. @@ -145,7 +145,7 @@ These reference files are implemented in HashStore underneath the hood with no e └─ objects └─ /d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 └─ metadata - └─ /15/8d/7e/55c36a810d7c14479c9...b20d7df66768b04 + └─ /d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2/affe1b6dd20659c63e99e63a29c...579c2d688880adc └─ refs └─ pid/0d/55/5e/d77052d7e166017f779...7230bcf7abcef65e └─ cid/d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 9e434d62..2c26696c 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -176,7 +176,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio * @param metadata Input stream to metadata document * @param pid Authority-based identifier * @param formatId Metadata namespace/format - * @return Metadata content identifier (string representing metadata address) + * @return Path to metadata content identifier (string representing metadata address) * @throws IOException When there is an error writing the metadata document * @throws IllegalArgumentException Invalid values like null for metadata, or empty pids and * formatIds diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index cf94a974..093190de 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -877,13 +877,12 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF + ". formatId: " + checkedFormatId ); // Store metadata - String metadataCid = putMetadata(metadata, pid, checkedFormatId); + String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( - "FileHashStore.storeMetadata - Metadata stored for pid: " + pid - + ". Metadata Content Identifier (metadataCid): " + metadataCid + "FileHashStore.storeMetadata - Metadata stored for pid: " + pid + " at: " + + pathToStoredMetadata ); - // TODO: Save formatId if it doesn't already exist - return metadataCid; + return pathToStoredMetadata; } catch (IOException ioe) { // Covers FileNotFoundException @@ -2020,10 +2019,9 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) } // Get permanent address for the given metadata document - String metadataCid = FileHashStoreUtility.getPidHexDigest( - pid + checkedFormatId, OBJECT_STORE_ALGORITHM - ); - Path metadataCidPath = getExpectedPath(pid, "metadata", checkedFormatId); + // All metadata documents for a pid are stored in a directory that is formed + // by using the hash of the 'pid', with the file name being the hash of the 'formatId' + Path pathToStoredMetadata = getExpectedPath(pid, "metadata", checkedFormatId); // Store metadata to tmpMetadataFile File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( @@ -2033,16 +2031,16 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) if (tmpMetadataWritten) { logFileHashStore.debug( "FileHashStore.putMetadata - tmp metadata file has been written, moving to" - + " permanent location: " + metadataCidPath + + " permanent location: " + pathToStoredMetadata ); - File permMetadataFile = metadataCidPath.toFile(); + File permMetadataFile = pathToStoredMetadata.toFile(); move(tmpMetadataFile, permMetadataFile, "metadata"); } logFileHashStore.debug( "FileHashStore.putMetadata - Move metadata success, permanent address: " - + metadataCidPath + + pathToStoredMetadata ); - return metadataCid; + return pathToStoredMetadata.toString(); } /** @@ -2103,13 +2101,20 @@ protected Path getExpectedPath(String abId, String entity, String formatId) realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); } else if (entity.equalsIgnoreCase("metadata")) { - String objectCid = FileHashStoreUtility.getPidHexDigest( - abId + formatId, OBJECT_STORE_ALGORITHM + // Get the pid metadata directory + String metadataCidPartOne = FileHashStoreUtility.getPidHexDigest( + abId, OBJECT_STORE_ALGORITHM ); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid + String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, metadataCidPartOne + ); + // The file name for the metadata document is the hash of the supplied 'formatId' + String metadataCidPartTwo = FileHashStoreUtility.getPidHexDigest( + formatId, OBJECT_STORE_ALGORITHM + ); + realPath = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirectory).resolve( + metadataCidPartTwo ); - realPath = METADATA_STORE_DIRECTORY.resolve(objShardString); } else if (entity.equalsIgnoreCase("refs")) { if (formatId.equalsIgnoreCase("pid")) { diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 40bd443a..deca87a4 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -15,6 +15,8 @@ import java.util.List; import java.util.Properties; +import org.dataone.hashstore.filehashstore.FileHashStore; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -211,10 +213,25 @@ public void client_storeMetadata() throws Exception { HashStoreClient.main(args); // Confirm metadata was stored - Path absPath = getObjectAbsPath( - testData.pidData.get(pid).get("metadata_cid"), "metadata" + // Calculate absolute path + String storeAlgorithm = hsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(hsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(hsProperties.getProperty("storeWidth")); + Path metadataDirectory = Paths.get(hsProperties.getProperty("storePath")).resolve( + "metadata" ); - assertTrue(Files.exists(absPath)); + String metadataCidPartOne = FileHashStoreUtility.getPidHexDigest(pid, storeAlgorithm); + String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, metadataCidPartOne + ); + // The file name for the metadata document is the hash of the supplied 'formatId' + String metadataCidPartTwo = FileHashStoreUtility.getPidHexDigest( + optFormatIdValue, storeAlgorithm + ); + Path expectedMetadataPath = metadataDirectory.resolve(pidMetadataDirectory).resolve( + metadataCidPartTwo + ); + assertTrue(Files.exists(expectedMetadataPath)); // Put things back System.out.flush(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 2d96e07a..9d529331 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -718,20 +718,19 @@ public void storeMetadata() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, null); + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid + // Calculate absolute path + Path metadataPidExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", testFormatId ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); - assertTrue(Files.exists(metadataCidAbsPath)); + assertEquals(metadataPidExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataPidExpectedPath)); long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataCidAbsPath); + long originalMetadataFie = Files.size(metadataPidExpectedPath); assertEquals(writtenMetadataFile, originalMetadataFie); } } @@ -748,20 +747,19 @@ public void storeMetadata_defaultFormatId_overload() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); - assertTrue(Files.exists(metadataCidAbsPath)); + assertEquals(metadataPidExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataPidExpectedPath)); long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataCidAbsPath); + long originalMetadataFie = Files.size(metadataPidExpectedPath); assertEquals(writtenMetadataFile, originalMetadataFie); } } @@ -778,18 +776,10 @@ public void storeMetadata_fileSize() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, null); - - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, null); long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataCidAbsPath); + long originalMetadataFie = Files.size(Paths.get(metadataPath)); assertEquals(writtenMetadataFile, originalMetadataFie); } } @@ -891,8 +881,13 @@ public void storeMetadata_metadataLockedIds() throws Exception { try { String formatId = "http://ns.dataone.org/service/types/v2.0"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, formatId); - assertEquals(metadataCid, pidFormatHexDigest); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace + ); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (IOException | NoSuchAlgorithmException | InterruptedException e) { e.printStackTrace(); } @@ -901,8 +896,13 @@ public void storeMetadata_metadataLockedIds() throws Exception { try { String formatId = "http://ns.dataone.org/service/types/v2.0"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, formatId); - assertEquals(metadataCid, pidFormatHexDigest); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace + ); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (Exception e) { e.printStackTrace(); } @@ -911,8 +911,13 @@ public void storeMetadata_metadataLockedIds() throws Exception { try { String formatId = "http://ns.dataone.org/service/types/v2.0"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, formatId); - assertEquals(metadataCid, pidFormatHexDigest); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace + ); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (Exception e) { e.printStackTrace(); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index a56a6559..a1ecd963 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -691,17 +691,14 @@ public void putMetadata() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.putMetadata(metadataStream, pid, null); + String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); - - assertTrue(Files.exists(metadataCidAbsPath)); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } } From 8fead5854fb5a3680a67348de37ab71dbddf95c0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 30 Jan 2024 16:16:18 -0800 Subject: [PATCH 205/553] Revise existing 'storeMetadata' junit tests and add new junit test to confirm pid hash is a directory --- .../FileHashStoreInterfaceTest.java | 67 +++++++++++++++++-- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 9d529331..fe0d07be 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -728,10 +728,6 @@ public void storeMetadata() throws Exception { assertEquals(metadataPidExpectedPath.toString(), metadataPath); assertTrue(Files.exists(metadataPidExpectedPath)); - - long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataPidExpectedPath); - assertEquals(writtenMetadataFile, originalMetadataFie); } } @@ -757,10 +753,67 @@ public void storeMetadata_defaultFormatId_overload() throws Exception { assertEquals(metadataPidExpectedPath.toString(), metadataPath); assertTrue(Files.exists(metadataPidExpectedPath)); + } + } - long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataPidExpectedPath); - assertEquals(writtenMetadataFile, originalMetadataFie); + /** + * Test storeMetadata creates appropriate directory for metadata documents with the given pid + */ + @Test + public void storeMetadata_pidHashIsDirectory() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + fileHashStore.storeMetadata(metadataStream, pid); + + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String metadataPidhash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, metadataPidhash + ); + Path expectedPidMetadataDirectory = rootDirectory.resolve("metadata").resolve( + pidMetadataDirectory + ); + + assertTrue(Files.isDirectory(expectedPidMetadataDirectory)); + } + } + + /** + * Test storeMetadata stores different metadata for a given pid in its expected directory + */ + @Test + public void storeMetadata_multipleFormatIds() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + String metadataDefaultPath = fileHashStore.storeMetadata(metadataStream, pid); + + // Calculate absolute path + Path metadataTestFormatIdExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", testFormatId + ); + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataDefaultExpectedPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace + ); + + assertEquals(metadataTestFormatIdExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataTestFormatIdExpectedPath)); + assertEquals(metadataDefaultExpectedPath.toString(), metadataDefaultPath); + assertTrue(Files.exists(metadataDefaultExpectedPath)); } } From 4eea5f23ef2b3eda9ed6b7f213936db1efe89b9d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 30 Jan 2024 16:21:56 -0800 Subject: [PATCH 206/553] Revise 'HashStore' interface for renaming of 'deleteObjectAll' to 'deleteObject', clean up code and revise/update redundant TODO items --- src/main/java/org/dataone/hashstore/HashStore.java | 12 +++--------- .../hashstore/filehashstore/FileHashStore.java | 8 +++----- .../org/dataone/hashstore/HashStoreClientTest.java | 1 - .../filehashstore/FileHashStoreInterfaceTest.java | 2 -- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 2c26696c..87624295 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -259,16 +259,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti * will not be deleted. * * @param pid Authority-based identifier - * @throws IllegalArgumentException When pid is null or empty - * @throws FileNotFoundException When requested pid has no associated object - * @throws IOException I/O error when deleting empty directories, - * modifying/deleting reference files - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - * @throws InterruptedException When deletion synchronization is interrupted + * @see #deleteObject(String, String) for more details. */ - public void deleteObjectAll(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException; + public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, + IOException, NoSuchAlgorithmException, InterruptedException; /** * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 093190de..38d6165a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -175,7 +175,6 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); - // TODO: Create formatId-namespace tracking document logFileHashStore.debug("FileHashStore - Created store and store tmp directories."); } catch (IOException ioe) { @@ -1196,15 +1195,14 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } @Override - public void deleteObjectAll(String pid) throws IllegalArgumentException, FileNotFoundException, + public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { // First, delete object as expected normally deleteObject("pid", pid); // TODO: - // Then look for and remove all related sysmeta - // Open metadata reference file, read all the format types - // Call 'delete_metadata(pid, formatId)' for all types + // Calculate pid metadata directory + // Walk this folder, delete all files found return; } diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index deca87a4..dcd151e0 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -15,7 +15,6 @@ import java.util.List; import java.util.Properties; -import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index fe0d07be..182548f8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -923,8 +923,6 @@ public void storeMetadata_metadataLockedIds() throws Exception { String pidFormatted = pid.replace("/", "_"); // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - String pidFormatHexDigest = - "ddf07952ef28efc099d10d8b682480f7d2da60015f5d8873b6e1ea75b4baf689"; // Create a thread pool with 3 threads ExecutorService executorService = Executors.newFixedThreadPool(3); From 93a2644aead9c49b273572bb671ec0c9f275bf89 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 30 Jan 2024 17:06:28 -0800 Subject: [PATCH 207/553] Implement 'deleteObject(String pid)' overload method without synchronization and add new junit test --- .../filehashstore/FileHashStore.java | 17 +++++-- .../FileHashStoreInterfaceTest.java | 45 +++++++++++++++++-- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 38d6165a..b03b068f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -21,11 +21,13 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Properties; +import java.util.stream.Stream; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -1200,9 +1202,18 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou PidNotFoundInCidRefsFileException { // First, delete object as expected normally deleteObject("pid", pid); - // TODO: - // Calculate pid metadata directory - // Walk this folder, delete all files found + + // Second, delete all metadata documents in the pid metadata directory + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest + ); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirectory); + Files.walk(expectedPidMetadataDirectory).sorted(Comparator.reverseOrder()).map(Path::toFile) + .forEach(File::delete); + + // TODO: This process must be synchronized + return; } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 182548f8..4677ba01 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1314,11 +1314,48 @@ public void deleteObject_invalidIdTypes() { ); } + + /** + * Confirm that deleteObject overload method with signature (String pid) deletes objects + * and all metadata documents. + */ + @Test + public void deleteObject_stringPidAll() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + // Get metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + String metadataDefaultPath = fileHashStore.storeMetadata(metadataStream, pid); + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + + // Confirm expected documents exist + assertTrue(Files.exists(Paths.get(metadataPath))); + assertTrue(Files.exists(Paths.get(metadataDefaultPath))); + assertTrue(Files.exists(objCidAbsPath)); + + fileHashStore.deleteObject(pid); + + // Check documents have been deleted + assertFalse(Files.exists(Paths.get(metadataPath))); + assertFalse(Files.exists(Paths.get(metadataDefaultPath))); + assertFalse(Files.exists(objCidAbsPath)); + } + } + + /** * Confirm that deleteObject deletes object */ @Test - public void deleteObject_Pid_objectDeleted() throws Exception { + public void deleteObject_pidType_objectDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1346,7 +1383,7 @@ public void deleteObject_Pid_objectDeleted() throws Exception { * Confirm that deleteObject deletes reference files */ @Test - public void deleteObject_Pid_referencesDeleted() throws Exception { + public void deleteObject_pidType_referencesDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1372,7 +1409,7 @@ public void deleteObject_Pid_referencesDeleted() throws Exception { * has references). */ @Test - public void deleteObject_Pid_CidRefsFileNotEmptyObjectExistsStill() throws Exception { + public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1403,7 +1440,7 @@ public void deleteObject_Pid_CidRefsFileNotEmptyObjectExistsStill() throws Excep * @throws Exception */ @Test - public void deleteObject_Pid_pidOrphan() throws Exception { + public void deleteObject_pidType_pidOrphan() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); From 69dcf62de6dd27c903b15f3ccac07e64e41a3080 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 09:40:19 -0800 Subject: [PATCH 208/553] Refactor 'deleteObject(String pid)' to check that metadata directory exists and is not empty before attempting to remove metadata docs, and add new junit test --- .../java/org/dataone/hashstore/HashStore.java | 5 ++-- .../filehashstore/FileHashStore.java | 27 ++++++++++--------- .../FileHashStoreInterfaceTest.java | 27 +++++++++++++++++++ 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 87624295..b4b11d4e 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -243,11 +243,10 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * @param idType 'pid' or 'cid' * @param id Authority-based identifier or content identifier * @throws IllegalArgumentException When pid is null or empty - * @throws FileNotFoundException When requested pid has no associated object * @throws IOException I/O error when deleting empty directories, * modifying/deleting reference files - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported + * @throws NoSuchAlgorithmException When algorithm used to calculate an object or metadata's + * address is not supported * @throws InterruptedException When deletion synchronization is interrupted */ public void deleteObject(String idType, String id) throws IllegalArgumentException, diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b03b068f..08defd47 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -27,7 +27,6 @@ import java.util.Map; import java.util.Objects; import java.util.Properties; -import java.util.stream.Stream; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -1051,9 +1050,8 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, } @Override - public void deleteObject(String idType, String id) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException, - PidNotFoundInCidRefsFileException { + public void deleteObject(String idType, String id) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType + ")" @@ -1197,23 +1195,26 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } @Override - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException, - PidNotFoundInCidRefsFileException { + public void deleteObject(String pid) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { // First, delete object as expected normally - deleteObject("pid", pid); + // This is synchronized based on the 'cid' retrieved from the pid refs file + deleteObject(HashStoreIdTypes.pid.getName("pid"), pid); - // Second, delete all metadata documents in the pid metadata directory + // Second, delete all metadata documents in the associated pid metadata directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest ); Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirectory); - Files.walk(expectedPidMetadataDirectory).sorted(Comparator.reverseOrder()).map(Path::toFile) - .forEach(File::delete); - - // TODO: This process must be synchronized + // Check that directory exists and is not empty before attempting to delete metadata docs + if (Files.isDirectory(expectedPidMetadataDirectory) && !FileHashStoreUtility + .isDirectoryEmpty(expectedPidMetadataDirectory)) { + Files.walk(expectedPidMetadataDirectory).sorted(Comparator.reverseOrder()).map( + Path::toFile + ).forEach(File::delete); + } return; } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 4677ba01..ee48d3d8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1351,6 +1351,33 @@ public void deleteObject_stringPidAll() throws Exception { } + /** + * Confirm that deleteObject overload method with signature (String pid) deletes objects + * and does not throw exceptions if metadata documents do not exist. + */ + @Test + public void deleteObject_stringPidNoMetadataDocs() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + // Get metadata file + Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + + // Confirm expected documents exist + assertTrue(Files.exists(objCidAbsPath)); + + fileHashStore.deleteObject(pid); + + // Check documents have been deleted + assertFalse(Files.exists(objCidAbsPath)); + } + } + + /** * Confirm that deleteObject deletes object */ From aedd5b62ca4f2d1d2311ab58679100d6dd79d0da Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 10:09:47 -0800 Subject: [PATCH 209/553] Rename variables created from calling '.getHierarchicalPathString' to improve clarity --- .../filehashstore/FileHashStore.java | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 08defd47..60aa9079 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -785,10 +785,10 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid ); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid ); - Path realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); if (Files.exists(realPath)) { return cid; @@ -1203,10 +1203,10 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio // Second, delete all metadata documents in the associated pid metadata directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest ); - Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirectory); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); // Check that directory exists and is not empty before attempting to delete metadata docs if (Files.isDirectory(expectedPidMetadataDirectory) && !FileHashStoreUtility @@ -1396,10 +1396,10 @@ protected ObjectMetadata putObject( // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); - Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); // Confirm that the object does not yet exist, delete tmpFile if so if (Files.exists(objRealPath)) { @@ -1799,10 +1799,10 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithm } else { // Get permanent address of the actual cid - String objShardString = FileHashStoreUtility.getHierarchicalPathString( + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid ); - Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); // If file exists, delete it. if (Files.exists(expectedRealPath)) { @@ -2105,24 +2105,24 @@ protected Path getExpectedPath(String abId, String entity, String formatId) if (entity.equalsIgnoreCase("object")) { // 'abId' is expected to be a pid String objectCid = findObject(abId); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); - realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); } else if (entity.equalsIgnoreCase("metadata")) { // Get the pid metadata directory String metadataCidPartOne = FileHashStoreUtility.getPidHexDigest( abId, OBJECT_STORE_ALGORITHM ); - String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, metadataCidPartOne ); // The file name for the metadata document is the hash of the supplied 'formatId' String metadataCidPartTwo = FileHashStoreUtility.getPidHexDigest( formatId, OBJECT_STORE_ALGORITHM ); - realPath = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirectory).resolve( + realPath = METADATA_STORE_DIRECTORY.resolve(pidRelativePath).resolve( metadataCidPartTwo ); @@ -2131,15 +2131,15 @@ protected Path getExpectedPath(String abId, String entity, String formatId) String pidRefId = FileHashStoreUtility.getPidHexDigest( abId, OBJECT_STORE_ALGORITHM ); - String pidShardString = FileHashStoreUtility.getHierarchicalPathString( + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidRefId ); - realPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); + realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); } else if (formatId.equalsIgnoreCase("cid")) { - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( + String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId ); - realPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); + realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } else { String errMsg = "FileHashStore.getExpectedPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; From 9dda62e058f6f25ac7238f6c29e060047231d70b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 10:26:19 -0800 Subject: [PATCH 210/553] Remove redundant '.sort' call when iterating over a pid metadata directory --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 60aa9079..17ac7f38 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -21,7 +21,6 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -1211,9 +1210,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio // Check that directory exists and is not empty before attempting to delete metadata docs if (Files.isDirectory(expectedPidMetadataDirectory) && !FileHashStoreUtility .isDirectoryEmpty(expectedPidMetadataDirectory)) { - Files.walk(expectedPidMetadataDirectory).sorted(Comparator.reverseOrder()).map( - Path::toFile - ).forEach(File::delete); + Files.walk(expectedPidMetadataDirectory).map(Path::toFile).forEach(File::delete); } return; } From 23ec383f2443663015501ac89b5b9566c5a747ec Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 10:44:27 -0800 Subject: [PATCH 211/553] Clean up codebase --- .../java/org/dataone/hashstore/HashStore.java | 6 +-- .../filehashstore/FileHashStore.java | 49 +++++++++++-------- .../FileHashStoreInterfaceTest.java | 17 ++++--- .../FileHashStoreReferencesTest.java | 2 +- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index b4b11d4e..db05404c 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -137,15 +137,11 @@ public void tagObject(String pid, String cid) throws IOException, * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing - * @throws IOException An issue with deleting the object when there is a - * mismatch - * @throws NoSuchAlgorithmException If checksum algorithm (and its respective checksum) is - * not in objectInfo * @throws IllegalArgumentException An expected value does not match */ public boolean verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException; + ) throws IllegalArgumentException; /** * Checks whether an object referenced by a pid exists and returns the content identifier. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 17ac7f38..f3a7698b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.Objects; import java.util.Properties; +import java.util.stream.Stream; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -627,7 +628,7 @@ public ObjectMetadata storeObject(InputStream object, String pid, String additio @Override public boolean verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { + ) throws IllegalArgumentException { logFileHashStore.debug( "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() ); @@ -1050,7 +1051,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, @Override public void deleteObject(String idType, String id) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { + NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType + ")" @@ -1077,15 +1078,15 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti String pid = id; try { // Begin by looking for the cid and confirming state - cid = findObject(id); + cid = findObject(pid); } catch (OrphanPidRefsFileException oprfe) { // Delete the pid refs file and return, nothing else to delete - Path absPidRefsPath = getExpectedPath(id, "refs", "pid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); Files.delete(absPidRefsPath); String warnMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + id + "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); return; @@ -1195,7 +1196,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti @Override public void deleteObject(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException, PidNotFoundInCidRefsFileException { + NoSuchAlgorithmException, InterruptedException { + logFileHashStore.debug( + "FileHashStore.deleteObject - Called to delete all associated docs for pid: " + pid + ); // First, delete object as expected normally // This is synchronized based on the 'cid' retrieved from the pid refs file deleteObject(HashStoreIdTypes.pid.getName("pid"), pid); @@ -1210,9 +1214,18 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio // Check that directory exists and is not empty before attempting to delete metadata docs if (Files.isDirectory(expectedPidMetadataDirectory) && !FileHashStoreUtility .isDirectoryEmpty(expectedPidMetadataDirectory)) { - Files.walk(expectedPidMetadataDirectory).map(Path::toFile).forEach(File::delete); + try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { + stream.map(Path::toFile).forEach(File::delete); + + } catch (IOException ioe) { + logFileHashStore.warn( + "FileHashStore.deleteObject - Unexpected IOException: " + ioe.getMessage() + ); + } } - return; + logFileHashStore.info( + "FileHashStore.deleteObject - Object, references and metadata deleted for: " + pid + ); } @Override @@ -1234,7 +1247,6 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid + " with metadata address: " + metadataCidPath; logFileHashStore.warn(errMsg); - return; } else { // Proceed to delete @@ -1250,8 +1262,8 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * Overload method for deleteMetadata with default metadata namespace */ @Override - public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException { + public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); } @@ -1425,15 +1437,13 @@ protected ObjectMetadata putObject( * @param requestValidation Boolean to decide whether to proceed with validation * @param checksum Expected checksum value of object * @param checksumAlgorithm Hash algorithm of checksum value - * @param tmpFile tmpFile that has been written - * @param hexDigests Map of the hex digests available to check with * @param tmpFile Path to the file that is being evaluated * @param hexDigests Map of the hex digests to parse data from * @param objSize Expected size of object * @param storedObjFileSize Actual size of object stored - * @return - * @throws NoSuchAlgorithmException - * @throws IOException + * @return Boolean, true if valid + * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent + * @throws IOException Issue with deleting tmpFile */ private boolean validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, @@ -1490,7 +1500,6 @@ private boolean validateTmpObject( + " is not equal to the calculated hex digest: " + digestFromHexDigests + ". Checksum" + " provided: " + checksum + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); - ; logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -1782,8 +1791,8 @@ protected void move(File source, File target, String entity) throws IOException, * has pids that references it and/or a cid refs file exists, the object will not be deleted. * * @param cid Content identifier - * @throws IOException - * @throws NoSuchAlgorithmException + * @throws IOException If an issue arises during deletion of object + * @throws NoSuchAlgorithmException Incompatible algorithm used to find relative path to cid */ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException { Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); @@ -1792,7 +1801,6 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithm + " references, skipping deletion."; logFileHashStore.warn(warnMsg); // The cid is referenced by pids, do not delete. - return; } else { // Get permanent address of the actual cid @@ -1808,7 +1816,6 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithm String debugMsg = "FileHashStore - deleteObjectByCid: object deleted at" + expectedRealPath; logFileHashStore.debug(debugMsg); - return; } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index ee48d3d8..9ee6e45d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1332,20 +1332,23 @@ public void deleteObject_stringPidAll() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); - String metadataDefaultPath = fileHashStore.storeMetadata(metadataStream, pid); + String metadataPathString = fileHashStore.storeMetadata(metadataStream, pid, + testFormatId); + String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStream, pid); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path metadataPath = Paths.get(metadataPathString); + Path metadataDefaultPath = Paths.get(metadataDefaultPathString); // Confirm expected documents exist - assertTrue(Files.exists(Paths.get(metadataPath))); - assertTrue(Files.exists(Paths.get(metadataDefaultPath))); + assertTrue(Files.exists(metadataPath)); + assertTrue(Files.exists(metadataDefaultPath)); assertTrue(Files.exists(objCidAbsPath)); fileHashStore.deleteObject(pid); // Check documents have been deleted - assertFalse(Files.exists(Paths.get(metadataPath))); - assertFalse(Files.exists(Paths.get(metadataDefaultPath))); + assertFalse(Files.exists(metadataPath)); + assertFalse(Files.exists(metadataDefaultPath)); assertFalse(Files.exists(objCidAbsPath)); } } @@ -1463,8 +1466,6 @@ public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws E /** * Confirm that deleteObject removes an orphan pid reference file when the associated cid refs * file does not contain the expected pid. - * - * @throws Exception */ @Test public void deleteObject_pidType_pidOrphan() throws Exception { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 21e723d9..4c185ee5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -304,7 +304,7 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "remove"); assertTrue(Files.exists(cidRefsFilePath)); - assertTrue(Files.size(cidRefsFilePath) == 0); + assertEquals(0, Files.size(cidRefsFilePath)); } /** From 32db8be3af4b811bcb1bf197738e5cebee923dfe Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 11:09:04 -0800 Subject: [PATCH 212/553] Refactor FileHashStoreUtility method 'isDirectoryEmpty' to 'dirContainsFiles' to improve method readability --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- .../hashstore/filehashstore/FileHashStoreUtility.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f3a7698b..21516f71 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -282,7 +282,7 @@ protected void verifyHashStoreProperties( ); if (Files.isDirectory(storePath)) { - if (!FileHashStoreUtility.isDirectoryEmpty(storePath)) { + if (FileHashStoreUtility.dirContainsFiles(storePath)) { String errMsg = "FileHashStore - Missing 'hashstore.yaml' but directories" + " and/or objects found."; logFileHashStore.fatal(errMsg); @@ -1212,8 +1212,8 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); // Check that directory exists and is not empty before attempting to delete metadata docs - if (Files.isDirectory(expectedPidMetadataDirectory) && !FileHashStoreUtility - .isDirectoryEmpty(expectedPidMetadataDirectory)) { + if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility.dirContainsFiles( + expectedPidMetadataDirectory)) { try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { stream.map(Path::toFile).forEach(File::delete); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 6b7ae759..43d43ae1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -102,7 +102,7 @@ public static String getPidHexDigest(String pid, String algorithm) * @return True if a file is found or the directory is empty, False otherwise * @throws IOException If I/O occurs when accessing directory */ - public static boolean isDirectoryEmpty(Path directory) throws IOException { + public static boolean dirContainsFiles(Path directory) throws IOException { try (Stream stream = Files.list(directory)) { // The findFirst() method is called on the stream created from the given // directory to retrieve the first element. If the stream is empty (i.e., the @@ -112,7 +112,7 @@ public static boolean isDirectoryEmpty(Path directory) throws IOException { // findFirst(). If the Optional contains a value (i.e., an element was found), // isPresent() returns true. If the Optional is empty (i.e., the stream is // empty), isPresent() returns false. - return !stream.findFirst().isPresent(); + return stream.findFirst().isPresent(); } } From c9df6a3a66db31609f8cbb6c7e0971637c8a12d0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 12:43:24 -0800 Subject: [PATCH 213/553] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 22ce822e..6593864d 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,6 @@ HashStore is a content-addressable file management system that utilizes the cont - retrieveObject - retrieveMetadata - deleteObject -- deleteObjectAll - deleteMetadata - getHexDigest @@ -92,8 +91,9 @@ tagObject(pid, cid) - If desired, this cid can then be used to locate the object on disk by following HashStore's store configuration. **How do I delete an object if I have the pid?** -- To delete an object, call the Public API method `deleteObject` which will delete the object and its associated references and reference files where relevant. -- To delete an object and all its related data (reference files and system metadata), call the Public API method `deleteObjectAll` +- To delete an object and all its associated reference files, call the Public API method `deleteObject()` with `idType` 'pid'. +- To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object if it it is not referenced by any pids. +- To delete an object and all its related data (reference files and system metadata), call the Public API method `deleteObject(String pid)`. - Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. @@ -140,6 +140,7 @@ These reference files are implemented in HashStore underneath the hood with no e ## - Objects are stored using their content identifier as the file address ## - The reference file for each pid contains a single cid ## - The reference file for each cid contains multiple pids each on its own line +## - There is one sysmeta document under the metadata directory for the pid hash .../metacat/hashstore/ └─ objects From 82ac70e3f55095ab2cf62be18f99bfbe93991856 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jan 2024 13:03:54 -0800 Subject: [PATCH 214/553] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6593864d..05c385cc 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, chec // Manual Process // Store object objectMetadata objInfo = storeObject(InputStream) -// Validate object, throws exceptions if there is a mismatch and deletes the associated file +// Validate object, returns False if there is a mismatch and deletes the associated file verifyObject(objInfo, checksum, checksumAlgorithn, objSize) // Tag object, makes the object discoverable (find, retrieve, delete) tagObject(pid, cid) @@ -116,7 +116,7 @@ HashStore assumes that every object to store has a respective identifier. This i - pid (persistent identifier) reference files - cid (content identifier) reference files -These reference files are implemented in HashStore underneath the hood with no expectation for modification from the calling app/client. The one and only exception to this process when the calling client/app does not have an identifier, and solely stores an objects raw bytes in HashStore (calling `storeObject(InputStream)`). +These reference files are implemented in HashStore underneath the hood with no expectation for modification from the calling app/client. The one and only exception to this process is when the calling client/app does not have an identifier, and solely stores an objects raw bytes in HashStore (calling `storeObject(InputStream)`). **'pid' Reference Files** - Pid (persistent identifier) reference files are created when storing an object with an identifier. From 7b410de419de2c1f1e839483745e789f533b3a69 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 09:17:27 -0800 Subject: [PATCH 215/553] Swallow 'FileAlreadyExistsException' in 'move()' to reduce complexity --- .../hashstore/filehashstore/FileHashStore.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 21516f71..8543f33b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1212,8 +1212,8 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); // Check that directory exists and is not empty before attempting to delete metadata docs - if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility.dirContainsFiles( - expectedPidMetadataDirectory)) { + if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility + .dirContainsFiles(expectedPidMetadataDirectory)) { try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { stream.map(Path::toFile).forEach(File::delete); @@ -1355,6 +1355,8 @@ protected ObjectMetadata putObject( ); validateAlgorithm(additionalAlgorithm); } + // checksumAlgorithm should be evaluated as a pair, catch it earlier + // The way this checks must be very clear or else it's difficult to understand if (checksumAlgorithm != null) { FileHashStoreUtility.checkForEmptyString( checksumAlgorithm, "checksumAlgorithm", "putObject" @@ -1443,7 +1445,7 @@ protected ObjectMetadata putObject( * @param storedObjFileSize Actual size of object stored * @return Boolean, true if valid * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent - * @throws IOException Issue with deleting tmpFile + * @throws IOException Issue with deleting tmpFile */ private boolean validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, @@ -1768,6 +1770,12 @@ protected void move(File source, File target, String entity) throws IOException, + targetFilePath ); + } catch (FileAlreadyExistsException amnse) { + logFileHashStore.warn( + "FileHashStore.move - File already exists, skipping request to move object." + + " Source: " + source + ". Target: " + target + ); + } catch (AtomicMoveNotSupportedException amnse) { logFileHashStore.error( "FileHashStore.move - StandardCopyOption.ATOMIC_MOVE failed. AtomicMove is" @@ -1791,7 +1799,7 @@ protected void move(File source, File target, String entity) throws IOException, * has pids that references it and/or a cid refs file exists, the object will not be deleted. * * @param cid Content identifier - * @throws IOException If an issue arises during deletion of object + * @throws IOException If an issue arises during deletion of object * @throws NoSuchAlgorithmException Incompatible algorithm used to find relative path to cid */ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException { @@ -2106,6 +2114,7 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea protected Path getExpectedPath(String abId, String entity, String formatId) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; + // TODO: Double check that an endless loop is not created and write junit test if (entity.equalsIgnoreCase("object")) { // 'abId' is expected to be a pid String objectCid = findObject(abId); From 5b8ab0a0c1eb0546e287c4e58ef72f6b037f1174 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 10:31:25 -0800 Subject: [PATCH 216/553] Refactor 'getExpectedPath()' for clarity and to prevent potential endless loop from calling 'findObject' --- .../filehashstore/FileHashStore.java | 45 ++++++++++++------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8543f33b..0d5cdcfc 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2114,43 +2114,54 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea protected Path getExpectedPath(String abId, String entity, String formatId) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; - // TODO: Double check that an endless loop is not created and write junit test + String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); if (entity.equalsIgnoreCase("object")) { - // 'abId' is expected to be a pid - String objectCid = findObject(abId); + // hashId = pidRefId + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId + ); + Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); + // Attempt to retrieve the cid + String objectCid; + if (!Files.exists(pathToPidRefsFile)) { + String errMsg = + "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid:" + abId + + " with object address: " + pathToPidRefsFile + ". Cannot retrive cid."; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } else { + objectCid = new String(Files.readAllBytes(pathToPidRefsFile)); + } + // If cid is found, return the expected real path to object String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); } else if (entity.equalsIgnoreCase("metadata")) { - // Get the pid metadata directory - String metadataCidPartOne = FileHashStoreUtility.getPidHexDigest( - abId, OBJECT_STORE_ALGORITHM - ); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, metadataCidPartOne + // Get the pid metadata directory (the relative path of the hashId) + String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); // The file name for the metadata document is the hash of the supplied 'formatId' - String metadataCidPartTwo = FileHashStoreUtility.getPidHexDigest( + String metadataFormatIdHash = FileHashStoreUtility.getPidHexDigest( formatId, OBJECT_STORE_ALGORITHM ); - realPath = METADATA_STORE_DIRECTORY.resolve(pidRelativePath).resolve( - metadataCidPartTwo + realPath = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve( + metadataFormatIdHash ); } else if (entity.equalsIgnoreCase("refs")) { if (formatId.equalsIgnoreCase("pid")) { - String pidRefId = FileHashStoreUtility.getPidHexDigest( - abId, OBJECT_STORE_ALGORITHM - ); + // hashId = pidRefId String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidRefId + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); } else if (formatId.equalsIgnoreCase("cid")) { + // hashId = cid String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } else { From f63c2ac34ee81ba59cc59388176cc7ea267876c3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 10:39:31 -0800 Subject: [PATCH 217/553] Utilize 'HashStoreIdTypes' where relevant instead of strings for 'cid' and 'pid' --- .../filehashstore/FileHashStore.java | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0d5cdcfc..6ac3cbc6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -163,8 +163,8 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep OBJECT_TMP_FILE_DIRECTORY = OBJECT_STORE_DIRECTORY.resolve("tmp"); METADATA_TMP_FILE_DIRECTORY = METADATA_STORE_DIRECTORY.resolve("tmp"); REFS_TMP_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("tmp"); - REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("pid"); - REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("cid"); + REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.pid.getName("pid")); + REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.cid.getName("cid")); try { // Physically create object & metadata store and tmp directories @@ -700,8 +700,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { - Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); - Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); // Check that pid refs file doesn't exist yet if (Files.exists(absPidRefsPath)) { @@ -717,7 +717,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi updateRefsFile(pid, absCidRefsPath, "add"); } // Get the pid refs file - File pidRefsTmpFile = writeRefsFile(cid, "pid"); + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName("pid")); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); // Verify tagging process, this throws exceptions if there's an issue @@ -730,8 +730,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } else { // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, "pid"); - File cidRefsTmpFile = writeRefsFile(pid, "cid"); + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName("pid")); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName("cid")); // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); File absPathCidRefsFile = absCidRefsPath.toFile(); @@ -765,11 +765,11 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); // Get path of the pid references file - Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); if (Files.exists(absPidRefsPath)) { String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { @@ -1082,7 +1082,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanPidRefsFileException oprfe) { // Delete the pid refs file and return, nothing else to delete - Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath( + pid, "refs", HashStoreIdTypes.pid.getName("pid") + ); Files.delete(absPidRefsPath); String warnMsg = @@ -1094,12 +1096,16 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanRefsFilesException orfe) { // Object does not exist, attempt to remove orphan files // Remove pid refs file - Path absPidRefsPath = getExpectedPath(id, "refs", "pid"); + Path absPidRefsPath = getExpectedPath( + id, "refs", HashStoreIdTypes.pid.getName("pid") + ); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); Files.delete(absPidRefsPath); // Remove the pid from the cid refs file - Path absCidRefsPath = getExpectedPath(cidRead, "refs", "cid"); + Path absCidRefsPath = getExpectedPath( + cidRead, "refs", HashStoreIdTypes.cid.getName("cid") + ); updateRefsFile(pid, absCidRefsPath, "remove"); // Delete the cid reference file if it's now empty if (Files.size(absCidRefsPath) == 0) { @@ -1113,7 +1119,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (PidNotFoundInCidRefsFileException pnficrfe) { // Delete pid refs file and return, nothing else to delete - Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath( + pid, "refs", HashStoreIdTypes.pid.getName("pid") + ); Files.delete(absPidRefsPath); String warnMsg = @@ -1148,7 +1156,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Get permanent address of the pid by calculating its sha-256 hex digest Path objRealPath = getExpectedPath(pid, "object", null); // Get the path to the cid refs file to work with - Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); + Path absCidRefsPath = getExpectedPath( + cid, "refs", HashStoreIdTypes.cid.getName("cid") + ); if (!Files.exists(objRealPath)) { // Throw exception if object doesn't exist @@ -1160,7 +1170,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } else { // Proceed to delete the reference files and object // Delete pid reference file - Path absPidRefsPath = getExpectedPath(pid, "refs", "pid"); + Path absPidRefsPath = getExpectedPath( + pid, "refs", HashStoreIdTypes.pid.getName("pid") + ); deleteRefsFile(absPidRefsPath); // Remove pid from cid refs file updateRefsFile(pid, absCidRefsPath, "remove"); @@ -1803,7 +1815,7 @@ protected void move(File source, File target, String entity) throws IOException, * @throws NoSuchAlgorithmException Incompatible algorithm used to find relative path to cid */ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException { - Path absCidRefsPath = getExpectedPath(cid, "refs", "cid"); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); if (Files.exists(absCidRefsPath)) { String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" + " references, skipping deletion."; @@ -2152,13 +2164,13 @@ protected Path getExpectedPath(String abId, String entity, String formatId) ); } else if (entity.equalsIgnoreCase("refs")) { - if (formatId.equalsIgnoreCase("pid")) { + if (formatId.equalsIgnoreCase(HashStoreIdTypes.pid.getName("pid"))) { // hashId = pidRefId String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); - } else if (formatId.equalsIgnoreCase("cid")) { + } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName("cid"))) { // hashId = cid String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId From f970411d9fa72804fe591c5e0743e2d2a938823a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 11:08:11 -0800 Subject: [PATCH 218/553] Update HashStore interface docstrings --- .../java/org/dataone/hashstore/HashStore.java | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index db05404c..0ed37261 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -74,12 +74,16 @@ public ObjectMetadata storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object only without reference files. */ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; /** * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object and validate the given checksum & checksum algorithm and size. */ public ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm, @@ -89,6 +93,8 @@ public ObjectMetadata storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object and validate the given checksum & checksum algorithm. */ public ObjectMetadata storeObject( InputStream object, String pid, String checksum, String checksumAlgorithm @@ -97,6 +103,8 @@ public ObjectMetadata storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object and generate an additional algorithm in hex digests. */ public ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm @@ -105,6 +113,8 @@ public ObjectMetadata storeObject( /** * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object and validate its size. */ public ObjectMetadata storeObject(InputStream object, String pid, long objSize) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, @@ -162,9 +172,10 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a - * persistent identifier (`pid`) and metadata format (`formatId`). The permanent address of - * the stored metadata document is determined by calculating the SHA-256 hex digest of the - * provided `pid` + `formatId`. + * persistent identifier (`pid`) and metadata format (`formatId`). All metadata documents + * for a given pid will be stored in the a directory (under ../metadata) that is determined + * by calculating the hash of the given pid, with the document name being the hash of the + * metadata format (`formatId`). * * Note, multiple calls to store the same metadata content will all be accepted, but is not * guaranteed to execute sequentially. @@ -187,6 +198,9 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) /** * @see #storeMetadata(InputStream, String, String) + * + * If the '(InputStream metadata, String pid)' signature is used, the metadata format + * stored will default to `sysmeta`. */ public String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, @@ -225,16 +239,19 @@ public InputStream retrieveMetadata(String pid, String formatId) /** * @see #retrieveMetadata(String, String) + * + * If `retrieveMetadata` is called with signature (String pid), the metadata + * document retrieved will be the given pid's 'sysmeta' */ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** - * Deletes an object (and its empty subdirectories) permanently from HashStore using a given + * Deletes an object and its related data permanently from HashStore using a given * persistent identifier. If the `idType` is 'pid', the object associated with the pid will - * be deleted if it is not referenced by any other pids, along with its reference files. - * If the `idType` is 'cid', only the object will be deleted if it is not referenced by - * other pids. + * be deleted if it is not referenced by any other pids, along with its reference files and + * all metadata documents found in its respective metadata directory. If the `idType` is + * 'cid', only the object will be deleted if it is not referenced by other pids. * * @param idType 'pid' or 'cid' * @param id Authority-based identifier or content identifier @@ -250,8 +267,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti /** * Deletes an object and all relevant associated files (ex. system metadata, reference - * files, etc.) based on a given pid. If other pids still reference the object, the object - * will not be deleted. + * files, etc.) based on a given pid. If other pids still reference the pid's associated + * object, the object will not be deleted. * * @param pid Authority-based identifier * @see #deleteObject(String, String) for more details. @@ -275,6 +292,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx /** * @see #deleteMetadata(String, String) + * + * If `deleteMetadata` is called with signature (String pid), the metadata + * document deleted will be the given pid's 'sysmeta' */ public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; From 8314dfd9db37cc2ff32eebd2dacc828cb94dfb8a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 11:20:48 -0800 Subject: [PATCH 219/553] Update 'ObjectMetadata' class with new attribute 'pid' with default value null, add methods 'getPid' & 'setPid', and add new junit tests --- .../org/dataone/hashstore/ObjectMetadata.java | 32 ++++++++++++++++--- .../dataone/hashstore/ObjectMetadataTest.java | 25 ++++++++++++++- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 9347a7c7..afd20ada 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -4,11 +4,13 @@ /** * ObjectMetadata is a class that models a unique identifier for an object in the HashStore. It - * encapsulates information about a file's content identifier (cid), size, and associated hash - * digest values. By using ObjectMetadata objects, client code can easily obtain metadata of a store - * object in HashStore without needing to know the underlying file system details. + * encapsulates information about a file's authority-based/persistent identifier (pid), content + * identifier (cid), size, and associated hash digest values. By using ObjectMetadata objects, + * client code can easily obtain metadata of a store object in HashStore without needing to know the + * underlying file system details. */ public class ObjectMetadata { + private String pid = null; private final String cid; private final long size; private final Map hexDigests; @@ -16,6 +18,7 @@ public class ObjectMetadata { /** * Creates a new instance of ObjectMetadata with the given properties. * + * @param pid Authority based or persistent identifer, null by default * @param cid Unique identifier for the file * @param size Size of stored file * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the @@ -28,7 +31,26 @@ public ObjectMetadata(String cid, long size, Map hexDigests) { } /** - * Return the cid (content identifier) of the file + * Get the persistent identifier + * + * @return pid + */ + public String getPid() { + return pid; + } + + /** + * Set the persistent identifier + * + * @return cid + */ + public String setPid(String pid) { + this.pid = pid; + return pid; + } + + /** + * Return the cid (content identifier) * * @return cid */ @@ -37,7 +59,7 @@ public String getCid() { } /** - * Return the size of the file + * Return the size * * @return size */ diff --git a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index 8d97a0f3..11f25bab 100644 --- a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -50,7 +50,30 @@ public void testObjectMetadata() { } /** - * Check ObjectMetadata get id + * Check ObjectMetadata pid is null by default + */ + @Test + public void testObjectMetadataGetPid() { + ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + String pid = objInfo.getPid(); + assertEquals(pid, null); + } + + /** + * Check ObjectMetadata pid is null by default + */ + @Test + public void testObjectMetadataSetPid() { + ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + String pidToSet = "dou.test.1"; + objInfo.setPid(pidToSet); + + String pidFromObjectMetadata = objInfo.getPid(); + assertEquals(pidFromObjectMetadata, pidToSet); + } + + /** + * Check ObjectMetadata get cid */ @Test public void testObjectMetadataGetId() { From 72f2824038eb13edd42ec9d277b533ef5c7489ee Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 12:49:26 -0800 Subject: [PATCH 220/553] Update 'syncPutObject' to call '.setPid()' and update junit test --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 1 + .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6ac3cbc6..ed8426e4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -505,6 +505,7 @@ private ObjectMetadata syncPutObject( ); // Tag object String cid = objInfo.getCid(); + objInfo.setPid(pid); tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 9ee6e45d..9d4d077a 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -107,6 +107,7 @@ public void storeObject() throws Exception { // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); assertEquals(objectCid, objInfo.getCid()); + assertEquals(pid, objInfo.getPid()); } } @@ -1332,8 +1333,9 @@ public void deleteObject_stringPidAll() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPathString = fileHashStore.storeMetadata(metadataStream, pid, - testFormatId); + String metadataPathString = fileHashStore.storeMetadata( + metadataStream, pid, testFormatId + ); String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStream, pid); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); Path metadataPath = Paths.get(metadataPathString); From 188d482e67b087c3835fe72e41b955cb59c65041 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 13:25:53 -0800 Subject: [PATCH 221/553] Move 'deleteObject(String pid)' deletion of metadata process to 'deleteObject(String idType, String id)' --- .../filehashstore/FileHashStore.java | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ed8426e4..522f534e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1154,6 +1154,31 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } try { + // First, remove all metadata + String pidHexDigest = FileHashStoreUtility.getPidHexDigest( + pid, OBJECT_STORE_ALGORITHM + ); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest + ); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve( + pidRelativePath + ); + + // Check that directory exists and is not empty before attempting to delete metadata docs + if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility + .dirContainsFiles(expectedPidMetadataDirectory)) { + try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { + stream.map(Path::toFile).forEach(File::delete); + + } catch (IOException ioe) { + logFileHashStore.warn( + "FileHashStore.deleteObject - Unexpected IOException: " + ioe + .getMessage() + ); + } + } + // Get permanent address of the pid by calculating its sha-256 hex digest Path objRealPath = getExpectedPath(pid, "object", null); // Get the path to the cid refs file to work with @@ -1185,8 +1210,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Files.delete(objRealPath); } else { String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " - + pid - + " is not empty (references exist for the cid). Skipping object deletion."; + + pid + " is not empty (refs exist for cid). Skipping object deletion."; logFileHashStore.warn(warnMsg); } logFileHashStore.info( @@ -1213,29 +1237,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete all associated docs for pid: " + pid ); - // First, delete object as expected normally - // This is synchronized based on the 'cid' retrieved from the pid refs file deleteObject(HashStoreIdTypes.pid.getName("pid"), pid); - - // Second, delete all metadata documents in the associated pid metadata directory - String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest - ); - Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); - - // Check that directory exists and is not empty before attempting to delete metadata docs - if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility - .dirContainsFiles(expectedPidMetadataDirectory)) { - try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { - stream.map(Path::toFile).forEach(File::delete); - - } catch (IOException ioe) { - logFileHashStore.warn( - "FileHashStore.deleteObject - Unexpected IOException: " + ioe.getMessage() - ); - } - } logFileHashStore.info( "FileHashStore.deleteObject - Object, references and metadata deleted for: " + pid ); From 45dbb319607985588bdc723e11a1fc9104e1d9ad Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Feb 2024 15:07:30 -0800 Subject: [PATCH 222/553] Move validation logic of checksum/algorithm parameters from 'putObject' to 'verifyChecksumParameters' to improve code clarity --- .../filehashstore/FileHashStore.java | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 522f534e..9ea3025b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1362,32 +1362,19 @@ protected ObjectMetadata putObject( PidRefsFileExistsException, IllegalArgumentException, NullPointerException, AtomicMoveNotSupportedException { logFileHashStore.debug("FileHashStore.putObject - Called to put object for pid: " + pid); - - // Validate algorithms if not null or empty, throws exception if not supported + // If validation is desired, checksumAlgorithm and checksum must both be present + boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); + // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForEmptyString( additionalAlgorithm, "additionalAlgorithm", "putObject" ); validateAlgorithm(additionalAlgorithm); } - // checksumAlgorithm should be evaluated as a pair, catch it earlier - // The way this checks must be very clear or else it's difficult to understand - if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "checksumAlgorithm", "putObject" - ); - validateAlgorithm(checksumAlgorithm); - } - if (checksum != null) { - FileHashStoreUtility.checkForEmptyString(checksum, "checksum", "putObject"); - } if (objSize != -1) { FileHashStoreUtility.checkNotNegativeOrZero(objSize, "putObject"); } - // If validation is desired, checksumAlgorithm and checksum must both be present - boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); - // Generate tmp file and write to it logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); @@ -1588,6 +1575,16 @@ private boolean isDefaultAlgorithm(String algorithm) { */ protected boolean verifyChecksumParameters(String checksum, String checksumAlgorithm) throws NoSuchAlgorithmException { + // First ensure algorithm is compatible and values are valid if they aren't null + if (checksumAlgorithm != null) { + FileHashStoreUtility.checkForEmptyString( + checksumAlgorithm, "checksumAlgorithm", "putObject" + ); + validateAlgorithm(checksumAlgorithm); + } + if (checksum != null) { + FileHashStoreUtility.checkForEmptyString(checksum, "checksum", "putObject"); + } // If checksum is supplied, checksumAlgorithm cannot be empty if (checksum != null && !checksum.trim().isEmpty()) { FileHashStoreUtility.ensureNotNull( From 4901ef8d4cc646f365b29d60e950525cc89bcd23 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Feb 2024 10:33:54 -0800 Subject: [PATCH 223/553] Remove redundant code from 'deleteObject' and add additional comments --- .../filehashstore/FileHashStore.java | 66 ++++++++----------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9ea3025b..bd494b3f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1069,16 +1069,19 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti throw new IllegalArgumentException(errMsg); } - // If 'idType' is cid, attempt to delete the object directly + // If 'idType' is cid, attempt to delete the object directly without checking refs files if (idType.equals(HashStoreIdTypes.cid.getName("cid"))) { deleteObjectByCid(id); } else { // Else 'idType' is pid + // Before we begin deleting files, we need to ensure that the object and + // refs file are where they are expected to be String cid; String pid = id; try { // Begin by looking for the cid and confirming state + // Custom exceptions will be thrown and handled cid = findObject(pid); } catch (OrphanPidRefsFileException oprfe) { @@ -1132,7 +1135,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti return; } - // Proceed with next steps - cid has been retrieved without any errors + // Proceed with comprehensive deletion - cid exists, nothing out of place synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -1154,6 +1157,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } try { + // Get permanent address of the object + Path objRealPath = getExpectedPath(pid, "object", null); // First, remove all metadata String pidHexDigest = FileHashStoreUtility.getPidHexDigest( pid, OBJECT_STORE_ALGORITHM @@ -1164,7 +1169,6 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve( pidRelativePath ); - // Check that directory exists and is not empty before attempting to delete metadata docs if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility .dirContainsFiles(expectedPidMetadataDirectory)) { @@ -1178,46 +1182,34 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti ); } } - - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getExpectedPath(pid, "object", null); + // Proceed to delete the reference files and object // Get the path to the cid refs file to work with Path absCidRefsPath = getExpectedPath( cid, "refs", HashStoreIdTypes.cid.getName("cid") ); - - if (!Files.exists(objRealPath)) { - // Throw exception if object doesn't exist - String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " - + pid + " with object address: " + objRealPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - + Path absPidRefsPath = getExpectedPath( + pid, "refs", HashStoreIdTypes.pid.getName("pid") + ); + // Delete pid reference file + deleteRefsFile(absPidRefsPath); + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete obj and cid refs file only if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Delete empty cid refs file + Files.delete(absCidRefsPath); + // Delete actual object + Files.delete(objRealPath); } else { - // Proceed to delete the reference files and object - // Delete pid reference file - Path absPidRefsPath = getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName("pid") - ); - deleteRefsFile(absPidRefsPath); - // Remove pid from cid refs file - updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Delete empty cid refs file - Files.delete(absCidRefsPath); - // Delete actual object - Files.delete(objRealPath); - } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " - + pid + " is not empty (refs exist for cid). Skipping object deletion."; - logFileHashStore.warn(warnMsg); - } - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); + String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); } + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath + ); + } finally { // Release lock synchronized (referenceLockedCids) { From 5964cd9ac1972617ef5f0a3ada2c553c968f0d4f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Feb 2024 12:05:28 -0800 Subject: [PATCH 224/553] Refactor 'deleteObject' to reduce time 'cid' is locked --- .../filehashstore/FileHashStore.java | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bd494b3f..e4902cf9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1136,6 +1136,21 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } // Proceed with comprehensive deletion - cid exists, nothing out of place + // Stage 1: Get all the required paths to streamline deletion process + // Permanent address of the object + Path objRealPath = getExpectedPath(pid, "object", null); + // Cid refs file + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); + // Pid refs file + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); + // Metadata directory + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest + ); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); + + // Stage 2: Remove documents synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -1157,18 +1172,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } try { - // Get permanent address of the object - Path objRealPath = getExpectedPath(pid, "object", null); - // First, remove all metadata - String pidHexDigest = FileHashStoreUtility.getPidHexDigest( - pid, OBJECT_STORE_ALGORITHM - ); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest - ); - Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve( - pidRelativePath - ); + // Begin with metadata documents // Check that directory exists and is not empty before attempting to delete metadata docs if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility .dirContainsFiles(expectedPidMetadataDirectory)) { @@ -1182,15 +1186,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti ); } } - // Proceed to delete the reference files and object - // Get the path to the cid refs file to work with - Path absCidRefsPath = getExpectedPath( - cid, "refs", HashStoreIdTypes.cid.getName("cid") - ); - Path absPidRefsPath = getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName("pid") - ); - // Delete pid reference file + // Then pid reference file deleteRefsFile(absPidRefsPath); // Remove pid from cid refs file updateRefsFile(pid, absCidRefsPath, "remove"); From 7d5d88f391e7bb1b25255c2b37dd69c994c668d1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Feb 2024 12:58:14 -0800 Subject: [PATCH 225/553] Further streamline 'deleteObject' process by collecting metadata document paths beforehand into a List before entering synchronization code --- .../filehashstore/FileHashStore.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e4902cf9..8f6359cb 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1143,12 +1143,21 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); // Pid refs file Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); + // Get list of metadata documents // Metadata directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest ); Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); + // Add all metadata doc paths to a List to iterate over below + List metadataDocPaths = new ArrayList<>(); + if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility + .dirContainsFiles(expectedPidMetadataDirectory)) { + try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { + stream.filter(Files::isRegularFile).forEach(metadataDocPaths::add); + } + } // Stage 2: Remove documents synchronized (referenceLockedCids) { @@ -1173,21 +1182,11 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti try { // Begin with metadata documents - // Check that directory exists and is not empty before attempting to delete metadata docs - if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility - .dirContainsFiles(expectedPidMetadataDirectory)) { - try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { - stream.map(Path::toFile).forEach(File::delete); - - } catch (IOException ioe) { - logFileHashStore.warn( - "FileHashStore.deleteObject - Unexpected IOException: " + ioe - .getMessage() - ); - } + for (Path path : metadataDocPaths) { + Files.delete(path); } // Then pid reference file - deleteRefsFile(absPidRefsPath); + Files.delete(absPidRefsPath); // Remove pid from cid refs file updateRefsFile(pid, absCidRefsPath, "remove"); // Delete obj and cid refs file only if the cid refs file is empty From 14bfd59f1fc91b8437000be7ec871453008f7e3e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Feb 2024 13:04:46 -0800 Subject: [PATCH 226/553] Update HashStore interface javadocs and review interface method exceptions --- .../java/org/dataone/hashstore/HashStore.java | 3 + .../filehashstore/FileHashStore.java | 111 +++++++++--------- 2 files changed, 60 insertions(+), 54 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 0ed37261..7ad0e9e9 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -6,6 +6,7 @@ import java.security.NoSuchAlgorithmException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -162,6 +163,8 @@ public boolean verifyObject( * file's absolute address is not valid * @throws IOException Unable to read from a pid refs file or pid refs * file does not exist + * @throws OrphanRefsFilesException pid and cid refs file found, but object does + * not exist * @throws OrphanPidRefsFileException When pid refs file exists and the cid found * inside does not exist. * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8f6359cb..c48f3d3c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -626,47 +626,6 @@ public ObjectMetadata storeObject(InputStream object, String pid, String additio return storeObject(object, pid, additionalAlgorithm, null, null, -1); } - @Override - public boolean verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IllegalArgumentException { - logFileHashStore.debug( - "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() - ); - FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); - - Map hexDigests = objectInfo.getHexDigests(); - String digestFromHexDigests = hexDigests.get(checksumAlgorithm); - long objInfoRetrievedSize = objectInfo.getSize(); - String objCid = objectInfo.getCid(); - - if (objInfoRetrievedSize != objSize) { - logFileHashStore.info( - "FileHashStore.verifyObject - Object size invalid for cid: " + objCid - + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize - ); - return false; - - } else if (!digestFromHexDigests.equals(checksum)) { - logFileHashStore.info( - "FileHashStore.verifyObject - Object content invalid for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" - ); - return false; - - } else { - logFileHashStore.info( - "FileHashStore.verifyObject - Object has been validated for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" - ); - return true; - } - } @Override public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, @@ -760,7 +719,50 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } @Override - public String findObject(String pid) throws NoSuchAlgorithmException, IOException { + public boolean verifyObject( + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize + ) throws IllegalArgumentException { + logFileHashStore.debug( + "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() + ); + FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); + + Map hexDigests = objectInfo.getHexDigests(); + String digestFromHexDigests = hexDigests.get(checksumAlgorithm); + long objInfoRetrievedSize = objectInfo.getSize(); + String objCid = objectInfo.getCid(); + + if (objInfoRetrievedSize != objSize) { + logFileHashStore.info( + "FileHashStore.verifyObject - Object size invalid for cid: " + objCid + + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize + ); + return false; + + } else if (!digestFromHexDigests.equals(checksum)) { + logFileHashStore.info( + "FileHashStore.verifyObject - Object content invalid for cid: " + objCid + + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" + ); + return false; + + } else { + logFileHashStore.info( + "FileHashStore.verifyObject - Object has been validated for cid: " + objCid + + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" + ); + return true; + } + } + + @Override + public String findObject(String pid) throws NoSuchAlgorithmException, IOException, + OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); @@ -818,7 +820,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio @Override public String storeMetadata(InputStream metadata, String pid, String formatId) - throws IOException, FileNotFoundException, IllegalArgumentException, InterruptedException, + throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.storeMetadata - Called to store metadata for pid: " + pid @@ -917,13 +919,14 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF */ @Override public String storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { + IllegalArgumentException, FileNotFoundException, InterruptedException, + NoSuchAlgorithmException { return storeMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); } @Override public InputStream retrieveObject(String pid) throws IllegalArgumentException, - NoSuchAlgorithmException, FileNotFoundException, IOException { + FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.retrieveObject - Called to retrieve object for pid: " + pid ); @@ -1051,8 +1054,8 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, } @Override - public void deleteObject(String idType, String id) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException { + public void deleteObject(String idType, String id) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType + ")" @@ -1219,8 +1222,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } @Override - public void deleteObject(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException { + public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, + IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete all associated docs for pid: " + pid ); @@ -1232,7 +1235,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio @Override public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException { + FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid ); @@ -1264,14 +1267,14 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * Overload method for deleteMetadata with default metadata namespace */ @Override - public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException { + public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, + IOException, NoSuchAlgorithmException { deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); } @Override - public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, - FileNotFoundException, IOException { + public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.getHexDigest - Called to calculate hex digest for pid: " + pid ); From c9e134f4568bc9a609a63a70eccb9a24f4a0ee36 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Feb 2024 13:25:36 -0800 Subject: [PATCH 227/553] Revise junit javadoc and test, and add .printStackTrace() statements to help with debugging --- .../FileHashStoreInterfaceTest.java | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 9d4d077a..8b6a1fd8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -33,6 +33,7 @@ import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -579,12 +580,14 @@ public void storeObject_interruptProcess() throws Exception { /** * Tests that the `storeObject` method can store an object successfully with multiple threads * (5). This test uses five futures (threads) that run concurrently, all except one of which - * will encounter an `RunTimeException`. The thread that does not encounter an exception will + * will encounter a `RunTimeException`. The thread that does not encounter an exception will * store the given object, and verifies that the object is stored successfully. * * The threads are expected to encounter a `RunTimeException` since the expected * object to store is already in progress (thrown by `syncPutObject` which coordinates - * `store_object` requests with a pid). + * `store_object` requests with a pid). If both threads execute simultaneously and bypasses + * the store object synchronization flow, we may also run into a `PidRefsFileExistsException` + * - which prevents the cid from being tagged twice by the same pid. */ @Test public void storeObject_objectLockedIds_FiveThreads() throws Exception { @@ -592,7 +595,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - // Create a thread pool with 3 threads + // Create a thread pool with 5 threads ExecutorService executorService = Executors.newFixedThreadPool(5); // Submit 5 futures to the thread pool, each calling storeObject @@ -612,11 +615,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("Start Thread 1 Exception:"); - System.out.println(e.getClass()); e.printStackTrace(); - System.out.println("End Thread 1 Exception\n"); - assertTrue(e instanceof RuntimeException); + assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); } }); Future future2 = executorService.submit(() -> { @@ -635,7 +635,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + e.printStackTrace(); + assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); } }); Future future3 = executorService.submit(() -> { @@ -654,7 +655,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + e.printStackTrace(); + assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); } }); Future future4 = executorService.submit(() -> { @@ -673,7 +675,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + e.printStackTrace(); + assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); } }); Future future5 = executorService.submit(() -> { @@ -692,7 +695,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + e.printStackTrace(); + assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); } }); From 1ab270c7cb2010b292ab236c69e83d80f430b4e4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Feb 2024 14:19:41 -0800 Subject: [PATCH 228/553] Add new method 'getFilesFromDir()' to 'FileHashStoreUtility' class and refactor 'deleteObject' --- .../hashstore/filehashstore/FileHashStore.java | 11 ++++------- .../filehashstore/FileHashStoreUtility.java | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c48f3d3c..b46e0163 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1077,6 +1077,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti deleteObjectByCid(id); } else { + // TODO: How to coordinate deleting metadata documents here? // Else 'idType' is pid // Before we begin deleting files, we need to ensure that the object and // refs file are where they are expected to be @@ -1154,13 +1155,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti ); Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); // Add all metadata doc paths to a List to iterate over below - List metadataDocPaths = new ArrayList<>(); - if (Files.isDirectory(expectedPidMetadataDirectory) && FileHashStoreUtility - .dirContainsFiles(expectedPidMetadataDirectory)) { - try (Stream stream = Files.walk(expectedPidMetadataDirectory)) { - stream.filter(Files::isRegularFile).forEach(metadataDocPaths::add); - } - } + List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( + expectedPidMetadataDirectory + ); // Stage 2: Remove documents synchronized (referenceLockedCids) { diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 43d43ae1..620ac438 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -116,6 +116,23 @@ public static boolean dirContainsFiles(Path directory) throws IOException { } } + /** + * Checks a directory for files and returns a list of paths + * + * @param directory Directory to check + * @return List of files + * @throws IOException If I/O occurs when accessing directory + */ + public static List getFilesFromDir(Path directory) throws IOException { + List filePaths = new ArrayList<>(); + if (Files.isDirectory(directory) && dirContainsFiles(directory)) { + try (Stream stream = Files.walk(directory)) { + stream.filter(Files::isRegularFile).forEach(filePaths::add); + } + } + return filePaths; + } + /** * Checks whether a given string is empty and throws an exception if so * From d1aaf321d87d438321a653e04041fdfb0ce6d68f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 10:27:25 -0800 Subject: [PATCH 229/553] Clean-up code --- .../filehashstore/FileHashStore.java | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b46e0163..c58ed69d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -26,7 +26,6 @@ import java.util.Map; import java.util.Objects; import java.util.Properties; -import java.util.stream.Stream; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -785,7 +784,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio // If the pid is found in the expected cid refs file, and the object exists, return it if (isStringInRefsFile(pid, absCidRefsPath)) { logFileHashStore.info( - "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid + "FileHashStore.findObject - Cid (" + cid + ") found for pid: " + pid ); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -1054,8 +1053,8 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, } @Override - public void deleteObject(String idType, String id) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException { + public void deleteObject(String idType, String id) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType + ")" @@ -1219,8 +1218,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } @Override - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException { + public void deleteObject(String pid) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete all associated docs for pid: " + pid ); @@ -1232,7 +1231,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou @Override public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { + IOException, NoSuchAlgorithmException { logFileHashStore.debug( "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid ); @@ -1264,8 +1263,8 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * Overload method for deleteMetadata with default metadata namespace */ @Override - public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException { + public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); } @@ -1432,11 +1431,10 @@ protected ObjectMetadata putObject( * @param hexDigests Map of the hex digests to parse data from * @param objSize Expected size of object * @param storedObjFileSize Actual size of object stored - * @return Boolean, true if valid * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent * @throws IOException Issue with deleting tmpFile */ - private boolean validateTmpObject( + private void validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, Map hexDigests, long objSize, long storedObjFileSize ) throws NoSuchAlgorithmException, IOException { @@ -1503,8 +1501,6 @@ private boolean validateTmpObject( throw new IllegalArgumentException(errMsg); } } - - return true; } /** @@ -2020,7 +2016,7 @@ protected void deleteRefsFile(Path absRefsPath) throws IOException { protected String putMetadata(InputStream metadata, String pid, String formatId) throws NoSuchAlgorithmException, IOException { logFileHashStore.debug( - "FileHashStore.putMetadata - Called to put metadata for pid:" + pid + "FileHashStore.putMetadata - Called to put metadata for pid: " + pid + " , with metadata namespace: " + formatId ); @@ -2124,7 +2120,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) String objectCid; if (!Files.exists(pathToPidRefsFile)) { String errMsg = - "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid:" + abId + "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid: " + abId + " with object address: " + pathToPidRefsFile + ". Cannot retrive cid."; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); From d56b9b152e50bd130b1af5f1d865b4201e30cf46 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 12:50:53 -0800 Subject: [PATCH 230/553] Add new methods 'renamePathForDeletion' and 'deleteListItems' in FileHashStoreUtility class --- .../filehashstore/FileHashStoreUtility.java | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 620ac438..14437be4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -1,11 +1,13 @@ package org.dataone.hashstore.filehashstore; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -133,6 +135,46 @@ public static List getFilesFromDir(Path directory) throws IOException { return filePaths; } + /** + * Rename the given path to the 'file name' + '_delete' + * + * @param path + * @return Path to the file with '_delete' appended + * @throws IOException Issue with renaming the given file path + */ + public static Path renamePathForDeletion(Path pathToRename) throws IOException { + ensureNotNull(pathToRename, "pathToRename", "renamePathForDeletion"); + if (!Files.exists(pathToRename)) { + String errMsg = "FileHashStoreUtility.renamePathForDeletion - Given path to file: " + + pathToRename + " does not exist."; + throw new FileNotFoundException(errMsg); + } + Path parentPath = pathToRename.getParent(); + Path fileName = pathToRename.getFileName(); + String newFileName = fileName.toString() + "_delete"; + + Path deletePath = parentPath.resolve(newFileName); + Files.move(pathToRename, deletePath, StandardCopyOption.ATOMIC_MOVE); + return deletePath; + } + + /** + * Delete all paths found in the given List object. + * + * @param deleteList Directory to check + * @throws IOException Unexpected I/O error when deleting files + */ + public static void deleteListItems(List deleteList) throws IOException { + ensureNotNull(deleteList, "deleteList", "deleteListItems"); + if (deleteList.size() > 0) { + for (Path deleteItem : deleteList) { + if (Files.exists(deleteItem)) { + Files.delete(deleteItem); + } + } + } + } + /** * Checks whether a given string is empty and throws an exception if so * From 20a209225a2c5c02112cc0bcb80540d5cdfb056c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 12:51:21 -0800 Subject: [PATCH 231/553] Refactor 'deleteObject' to improve atomicity of the process (rename all files, then delete at the very end) --- .../filehashstore/FileHashStore.java | 69 +++++++++++-------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c58ed69d..3e6d48f0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1076,15 +1076,30 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti deleteObjectByCid(id); } else { - // TODO: How to coordinate deleting metadata documents here? // Else 'idType' is pid - // Before we begin deleting files, we need to ensure that the object and - // refs file are where they are expected to be String cid; String pid = id; + List deleteList = new ArrayList<>(); + + // Get list of metadata documents, these will always be deleted if they exist. + // Metadata directory + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest + ); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); + // Add all metadata doc paths to a List to iterate over below + List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( + expectedPidMetadataDirectory + ); + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + + // Before we begin deleting files, we handle orphaned files scenarios try { // Begin by looking for the cid and confirming state - // Custom exceptions will be thrown and handled + // If a custom exception is thrown, this try block will return; cid = findObject(pid); } catch (OrphanPidRefsFileException oprfe) { @@ -1092,8 +1107,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path absPidRefsPath = getExpectedPath( pid, "refs", HashStoreIdTypes.pid.getName("pid") ); - Files.delete(absPidRefsPath); + // Add the pid refs file to deleteList + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1102,22 +1119,24 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanRefsFilesException orfe) { // Object does not exist, attempt to remove orphan files - // Remove pid refs file Path absPidRefsPath = getExpectedPath( id, "refs", HashStoreIdTypes.pid.getName("pid") ); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - Files.delete(absPidRefsPath); + // Add the pid refs file to deleteList + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Remove the pid from the cid refs file Path absCidRefsPath = getExpectedPath( cidRead, "refs", HashStoreIdTypes.cid.getName("cid") ); updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete the cid reference file if it's now empty + // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { - Files.delete(absCidRefsPath); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } + + FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted orphan files."; @@ -1129,8 +1148,11 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path absPidRefsPath = getExpectedPath( pid, "refs", HashStoreIdTypes.pid.getName("pid") ); - Files.delete(absPidRefsPath); + // Add the pid refs file to deleteList + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1139,26 +1161,14 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } // Proceed with comprehensive deletion - cid exists, nothing out of place - // Stage 1: Get all the required paths to streamline deletion process + // Get all the required paths to streamline deletion process // Permanent address of the object Path objRealPath = getExpectedPath(pid, "object", null); // Cid refs file Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); // Pid refs file Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); - // Get list of metadata documents - // Metadata directory - String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest - ); - Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); - // Add all metadata doc paths to a List to iterate over below - List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( - expectedPidMetadataDirectory - ); - // Stage 2: Remove documents synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -1180,25 +1190,23 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } try { - // Begin with metadata documents - for (Path path : metadataDocPaths) { - Files.delete(path); - } // Then pid reference file - Files.delete(absPidRefsPath); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Remove pid from cid refs file updateRefsFile(pid, absCidRefsPath, "remove"); // Delete obj and cid refs file only if the cid refs file is empty if (Files.size(absCidRefsPath) == 0) { // Delete empty cid refs file - Files.delete(absCidRefsPath); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); // Delete actual object - Files.delete(objRealPath); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); } else { String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid + " is not empty (refs exist for cid). Skipping object deletion."; logFileHashStore.warn(warnMsg); } + // Delete all related items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info( "FileHashStore.deleteObject - File and references deleted for: " + pid + " with object address: " + objRealPath @@ -1217,6 +1225,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } } + @Override public void deleteObject(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { From c86bb10af9178b9439c05b4a8c52537585189fc1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 13:02:52 -0800 Subject: [PATCH 232/553] Update 'HashStore' interface javadocs and clean up code --- .../java/org/dataone/hashstore/HashStore.java | 15 +++--- .../dataone/hashstore/HashStoreClient.java | 4 +- .../filehashstore/FileHashStore.java | 49 +++++++++---------- .../FileHashStoreInterfaceTest.java | 4 +- 4 files changed, 37 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 7ad0e9e9..731f2329 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -256,6 +256,9 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * all metadata documents found in its respective metadata directory. If the `idType` is * 'cid', only the object will be deleted if it is not referenced by other pids. * + * Notes: All objects are renamed at their existing path with a '_deleted' appended + * to their file name before they are deleted. + * * @param idType 'pid' or 'cid' * @param id Authority-based identifier or content identifier * @throws IllegalArgumentException When pid is null or empty @@ -266,7 +269,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * @throws InterruptedException When deletion synchronization is interrupted */ public void deleteObject(String idType, String id) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException, InterruptedException; + IOException, NoSuchAlgorithmException, InterruptedException; /** * Deletes an object and all relevant associated files (ex. system metadata, reference @@ -276,8 +279,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti * @param pid Authority-based identifier * @see #deleteObject(String, String) for more details. */ - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException; + public void deleteObject(String pid) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException, InterruptedException; /** * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given @@ -291,7 +294,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, FileNotFou * supported */ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + IOException, NoSuchAlgorithmException; /** * @see #deleteMetadata(String, String) @@ -299,8 +302,8 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * If `deleteMetadata` is called with signature (String pid), the metadata * document deleted will be the given pid's 'sysmeta' */ - public void deleteMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, + NoSuchAlgorithmException; /** * Calculates the hex digest of an object that exists in HashStore using a given persistent diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 25a85ed2..7778eaf5 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -231,7 +231,7 @@ public static void main(String[] args) throws Exception { String pid = cmd.getOptionValue("pid"); FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - String deleteIdType = HashStoreIdTypes.pid.getName("pid"); + String deleteIdType = HashStoreIdTypes.pid.getName(); hashStore.deleteObject(deleteIdType, pid); System.out.println("Object for pid (" + pid + ") has been deleted."); @@ -703,7 +703,7 @@ private static void deleteObjectsFromStore(List> resultObjLi // Delete object System.out.println("Deleting object for guid: " + guid); - String deleteIdType = HashStoreIdTypes.pid.getName("pid"); + String deleteIdType = HashStoreIdTypes.pid.getName(); hashStore.deleteObject(deleteIdType, guid); } catch (FileNotFoundException fnfe) { diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3e6d48f0..8e69f149 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -81,7 +81,7 @@ public enum HashStoreIdTypes { identifierType = idType; } - public String getName(String string) { + public String getName() { return identifierType; } } @@ -162,8 +162,8 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep OBJECT_TMP_FILE_DIRECTORY = OBJECT_STORE_DIRECTORY.resolve("tmp"); METADATA_TMP_FILE_DIRECTORY = METADATA_STORE_DIRECTORY.resolve("tmp"); REFS_TMP_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("tmp"); - REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.pid.getName("pid")); - REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.cid.getName("cid")); + REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.pid.getName()); + REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.cid.getName()); try { // Physically create object & metadata store and tmp directories @@ -659,8 +659,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); // Check that pid refs file doesn't exist yet if (Files.exists(absPidRefsPath)) { @@ -676,7 +676,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi updateRefsFile(pid, absCidRefsPath, "add"); } // Get the pid refs file - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName("pid")); + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); // Verify tagging process, this throws exceptions if there's an issue @@ -689,8 +689,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } else { // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName("pid")); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName("cid")); + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); File absPathCidRefsFile = absCidRefsPath.toFile(); @@ -767,11 +767,11 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); // Get path of the pid references file - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); if (Files.exists(absPidRefsPath)) { String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { @@ -1062,8 +1062,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Validate input parameters FileHashStoreUtility.ensureNotNull(id, "id", "deleteObject"); FileHashStoreUtility.checkForEmptyString(id, "id", "deleteObject"); - if (!idType.equals(HashStoreIdTypes.pid.getName("pid")) && !idType.equals( - HashStoreIdTypes.cid.getName("cid") + if (!idType.equals(HashStoreIdTypes.pid.getName()) && !idType.equals( + HashStoreIdTypes.cid.getName() )) { String errMsg = "FileHashStore.deleteObject - 'idType' must be 'pid' or 'cid'"; @@ -1072,7 +1072,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } // If 'idType' is cid, attempt to delete the object directly without checking refs files - if (idType.equals(HashStoreIdTypes.cid.getName("cid"))) { + if (idType.equals(HashStoreIdTypes.cid.getName())) { deleteObjectByCid(id); } else { @@ -1105,7 +1105,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanPidRefsFileException oprfe) { // Delete the pid refs file and return, nothing else to delete Path absPidRefsPath = getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName("pid") + pid, "refs", HashStoreIdTypes.pid.getName() ); // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1120,7 +1120,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanRefsFilesException orfe) { // Object does not exist, attempt to remove orphan files Path absPidRefsPath = getExpectedPath( - id, "refs", HashStoreIdTypes.pid.getName("pid") + id, "refs", HashStoreIdTypes.pid.getName() ); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); // Add the pid refs file to deleteList @@ -1128,7 +1128,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Remove the pid from the cid refs file Path absCidRefsPath = getExpectedPath( - cidRead, "refs", HashStoreIdTypes.cid.getName("cid") + cidRead, "refs", HashStoreIdTypes.cid.getName() ); updateRefsFile(pid, absCidRefsPath, "remove"); // Add the cid reference file to deleteList if it's now empty @@ -1146,7 +1146,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (PidNotFoundInCidRefsFileException pnficrfe) { // Delete pid refs file and return, nothing else to delete Path absPidRefsPath = getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName("pid") + pid, "refs", HashStoreIdTypes.pid.getName() ); // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1165,9 +1165,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Permanent address of the object Path objRealPath = getExpectedPath(pid, "object", null); // Cid refs file - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); // Pid refs file - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName("pid")); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { @@ -1232,7 +1232,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete all associated docs for pid: " + pid ); - deleteObject(HashStoreIdTypes.pid.getName("pid"), pid); + deleteObject(HashStoreIdTypes.pid.getName(), pid); logFileHashStore.info( "FileHashStore.deleteObject - Object, references and metadata deleted for: " + pid ); @@ -1290,8 +1290,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // Find the content identifier if (algorithm.equals(OBJECT_STORE_ALGORITHM)) { - String cid = findObject(pid); - return cid; + return findObject(pid); } else { // Get permanent address of the pid @@ -1807,7 +1806,7 @@ protected void move(File source, File target, String entity) throws IOException, * @throws NoSuchAlgorithmException Incompatible algorithm used to find relative path to cid */ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException { - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName("cid")); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); if (Files.exists(absCidRefsPath)) { String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" + " references, skipping deletion."; @@ -2156,13 +2155,13 @@ protected Path getExpectedPath(String abId, String entity, String formatId) ); } else if (entity.equalsIgnoreCase("refs")) { - if (formatId.equalsIgnoreCase(HashStoreIdTypes.pid.getName("pid"))) { + if (formatId.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { // hashId = pidRefId String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); - } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName("cid"))) { + } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { // hashId = cid String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 8b6a1fd8..714cfa14 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -60,8 +60,8 @@ public class FileHashStoreInterfaceTest { @BeforeEach public void initializeFileHashStore() { rootDirectory = tempFolder.resolve("metacat"); - fhsDeleteTypePid = HashStoreIdTypes.pid.getName("pid"); - fhsDeleteTypeCid = HashStoreIdTypes.cid.getName("cid"); + fhsDeleteTypePid = HashStoreIdTypes.pid.getName(); + fhsDeleteTypeCid = HashStoreIdTypes.cid.getName(); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); From c66b5a14d72c5c261426d5d53aa31740caacd424 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 13:17:20 -0800 Subject: [PATCH 233/553] Clean up code, javadocs and fix minor bug in ObjectMetadata class constructor --- .../java/org/dataone/hashstore/HashStore.java | 2 +- .../org/dataone/hashstore/ObjectMetadata.java | 3 ++- .../hashstore/filehashstore/FileHashStore.java | 15 ++++----------- .../filehashstore/FileHashStoreUtility.java | 2 +- .../org/dataone/hashstore/ObjectMetadataTest.java | 15 ++++++++------- .../filehashstore/FileHashStoreInterfaceTest.java | 2 +- 6 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 731f2329..b725824f 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -176,7 +176,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a * persistent identifier (`pid`) and metadata format (`formatId`). All metadata documents - * for a given pid will be stored in the a directory (under ../metadata) that is determined + * for a given pid will be stored in the directory (under ../metadata) that is determined * by calculating the hash of the given pid, with the document name being the hash of the * metadata format (`formatId`). * diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index afd20ada..22f6689d 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -24,7 +24,8 @@ public class ObjectMetadata { * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the * file */ - public ObjectMetadata(String cid, long size, Map hexDigests) { + public ObjectMetadata(String pid, String cid, long size, Map hexDigests) { + this.pid = pid; this.cid = cid; this.size = size; this.hexDigests = hexDigests; diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8e69f149..63d0a51c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -504,7 +504,6 @@ private ObjectMetadata syncPutObject( ); // Tag object String cid = objInfo.getCid(); - objInfo.setPid(pid); tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid @@ -1104,9 +1103,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanPidRefsFileException oprfe) { // Delete the pid refs file and return, nothing else to delete - Path absPidRefsPath = getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName() - ); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1119,9 +1116,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (OrphanRefsFilesException orfe) { // Object does not exist, attempt to remove orphan files - Path absPidRefsPath = getExpectedPath( - id, "refs", HashStoreIdTypes.pid.getName() - ); + Path absPidRefsPath = getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1145,9 +1140,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (PidNotFoundInCidRefsFileException pnficrfe) { // Delete pid refs file and return, nothing else to delete - Path absPidRefsPath = getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName() - ); + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1424,7 +1417,7 @@ protected ObjectMetadata putObject( } // Create ObjectMetadata to return with pertinent data - return new ObjectMetadata(objectCid, storedObjFileSize, hexDigests); + return new ObjectMetadata(pid, objectCid, storedObjFileSize, hexDigests); } /** diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 14437be4..e3ddc53a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -138,7 +138,7 @@ public static List getFilesFromDir(Path directory) throws IOException { /** * Rename the given path to the 'file name' + '_delete' * - * @param path + * @param pathToRename The path to the file to be renamed with '_deleted' * @return Path to the file with '_delete' appended * @throws IOException Issue with renaming the given file path */ diff --git a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index 11f25bab..e946107b 100644 --- a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import java.util.HashMap; import java.util.Map; @@ -45,7 +46,7 @@ public void initializeInstanceVariables() { */ @Test public void testObjectMetadata() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); assertNotNull(objInfo); } @@ -54,9 +55,9 @@ public void testObjectMetadata() { */ @Test public void testObjectMetadataGetPid() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); String pid = objInfo.getPid(); - assertEquals(pid, null); + assertNull(pid); } /** @@ -64,7 +65,7 @@ public void testObjectMetadataGetPid() { */ @Test public void testObjectMetadataSetPid() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); String pidToSet = "dou.test.1"; objInfo.setPid(pidToSet); @@ -77,7 +78,7 @@ public void testObjectMetadataSetPid() { */ @Test public void testObjectMetadataGetId() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); String objId = objInfo.getCid(); assertEquals(objId, id); } @@ -87,7 +88,7 @@ public void testObjectMetadataGetId() { */ @Test public void testHashAddressGetSize() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); long objSize = objInfo.getSize(); assertEquals(objSize, size); } @@ -97,7 +98,7 @@ public void testHashAddressGetSize() { */ @Test public void testObjectMetadataGetHexDigests() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); Map objInfoMap = objInfo.getHexDigests(); assertEquals(objInfoMap, hexDigests); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 714cfa14..7db3ebab 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1708,7 +1708,7 @@ public void deleteMetadata_pidNull() { * Confirm that deleteMetadata throws exception when pid is empty */ @Test - public void deleteMetadata_pidEmpty() throws Exception { + public void deleteMetadata_pidEmpty() { assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata("", formatId); From 97cc2553fe7b87b760fb697f476c6b814e860716 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 13:41:33 -0800 Subject: [PATCH 234/553] Refactor 'deleteMetadata(String pid)' to remove all metadata related for the given pid and update junit test --- .../java/org/dataone/hashstore/HashStore.java | 11 +++++--- .../filehashstore/FileHashStore.java | 26 ++++++++++++++++++- .../FileHashStoreInterfaceTest.java | 13 +++++++++- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index b725824f..7ca04a14 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -289,7 +289,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio * @param pid Authority-based identifier * @param formatId Metadata namespace/format * @throws IllegalArgumentException When pid or formatId is null or empty - * @throws IOException I/O error when deleting empty directories + * @throws IOException I/O error when deleting metadata or empty directories * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ @@ -297,10 +297,13 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx IOException, NoSuchAlgorithmException; /** - * @see #deleteMetadata(String, String) + * Deletes all metadata related for the given 'pid' from HashStore * - * If `deleteMetadata` is called with signature (String pid), the metadata - * document deleted will be the given pid's 'sysmeta' + * @param pid Authority-based identifier + * @throws IllegalArgumentException If pid is invalid + * @throws IOException I/O error when deleting metadata or empty directories + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported */ public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException; diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 63d0a51c..930e728e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1267,7 +1267,31 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx @Override public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { - deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); + logFileHashStore.debug( + "FileHashStore.deleteMetadata - Called to delete all metadata for pid: " + pid + ); + FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); + + List deleteList = new ArrayList<>(); + // Metadata directory + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest + ); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); + // Add all metadata doc paths to a List to iterate over below + List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( + expectedPidMetadataDirectory + ); + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + // Delete all items in the list + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.deleteMetadata - All related metadata deleted for: " + pid + ); } @Override diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 7db3ebab..aad63f56 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1653,7 +1653,7 @@ public void deleteMetadata() throws Exception { } /** - * Confirm that deleteMetadata deletes object and empty subdirectories with overload method + * Confirm that deleteMetadata deletes all metadata stored for a given pid. */ @Test public void deleteMetadata_overload() throws Exception { @@ -1662,16 +1662,27 @@ public void deleteMetadata_overload() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + String formatIdTwo = "ns.type.2"; + String formatIdThree = "ns.type.3"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid, null); + fileHashStore.storeMetadata(metadataStream, pid, formatIdTwo); + fileHashStore.storeMetadata(metadataStream, pid, formatIdThree); fileHashStore.deleteMetadata(pid); // Check that file doesn't exist String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); Path metadataCidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); + Path metadataCidPathTwo = fileHashStore.getExpectedPath(pid, "metadata", formatIdTwo); + Path metadataCidPathThree = fileHashStore.getExpectedPath( + pid, "metadata", formatIdThree + ); + assertFalse(Files.exists(metadataCidPath)); + assertFalse(Files.exists(metadataCidPathTwo)); + assertFalse(Files.exists(metadataCidPathThree)); // Check that parent directories are not deleted assertTrue(Files.exists(metadataCidPath.getParent())); From 5297773ac63bfeeeaa127286b74e568093482da8 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 5 Feb 2024 15:25:45 -0800 Subject: [PATCH 235/553] Update README.md --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 05c385cc..0acd4f6b 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,8 @@ tagObject(pid, cid) - If desired, this cid can then be used to locate the object on disk by following HashStore's store configuration. **How do I delete an object if I have the pid?** -- To delete an object and all its associated reference files, call the Public API method `deleteObject()` with `idType` 'pid'. +- To delete an object, all its associated reference files and its metadata, call the Public API method `deleteObject()` with `idType` 'pid'. If an `idType` is not given (ex. calling `deleteObject(String pid)`), the `idType` will be assumed to be a 'pid' - To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object if it it is not referenced by any pids. -- To delete an object and all its related data (reference files and system metadata), call the Public API method `deleteObject(String pid)`. - Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. @@ -106,8 +105,8 @@ HashStore's '/metadata' directory holds all metadata for objects stored in HashS - If there are multiple metadata objects, a 'formatId' must be specified when calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) **How do I delete a metadata file?** -- Like `retrieveMetadata`, call the Public API method `deleteMetadata` which will delete the metadata object associated with the given pid. -- If there are multiple metadata objects, a 'formatId' must be specified when calling `deleteMetadata` to ensure the expected metadata object is deleted. +- Like `retrieveMetadata`, call the Public API method `deleteMetadata(String pid, String formatId)` which will delete the metadata object associated with the given pid. +- To delete all metadata objects related to a given 'pid', call `deleteMetadata(String pid)` ###### What are HashStore reference files? @@ -158,7 +157,7 @@ hashstore.yaml HashStore is a Java package, and built using the [Maven](https://maven.apache.org/) build tool. -To install `hashstore` locally, install Java and Maven on your local machine, +To install `HashStore-java` locally, install Java and Maven on your local machine, and then install or build the package with `mvn install` or `mvn package`, respectively. We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). From 90b4d5ac42dabafe6d0ecc07a4613977cf098611 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 6 Feb 2024 08:53:19 -0800 Subject: [PATCH 236/553] Fix typo in 'renamePathForDeletion' javadoc --- .../dataone/hashstore/filehashstore/FileHashStoreUtility.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index e3ddc53a..bab3ff3f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -138,7 +138,7 @@ public static List getFilesFromDir(Path directory) throws IOException { /** * Rename the given path to the 'file name' + '_delete' * - * @param pathToRename The path to the file to be renamed with '_deleted' + * @param pathToRename The path to the file to be renamed with '_delete' * @return Path to the file with '_delete' appended * @throws IOException Issue with renaming the given file path */ From f3e4fea937ae0b974a481bd24e7c16949943c936 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 8 Feb 2024 14:00:29 -0800 Subject: [PATCH 237/553] Refactor 'tagObject' to handle scenarios where exceptions are unnecessary and add new junit tests --- .../filehashstore/FileHashStore.java | 90 ++++++++--- .../FileHashStoreReferencesTest.java | 149 ++++++++++++++---- 2 files changed, 182 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 930e728e..c7ea5046 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -660,15 +660,55 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi try { Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + boolean pidRefsFound = Files.exists(absPidRefsPath); + boolean cidRefsFound = Files.exists(absCidRefsPath); - // Check that pid refs file doesn't exist yet - if (Files.exists(absPidRefsPath)) { - String errMsg = "FileHashStore.tagObject - pid refs file already exists for pid: " - + pid + ". A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - - } else if (Files.exists(absCidRefsPath)) { + // Both files found, confirm that reference files are where they are expected to be + if (pidRefsFound && cidRefsFound) { + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.tagObject - Object with cid: " + cid + + " already exists and is tagged with pid: " + pid + ); + } else if (pidRefsFound && !cidRefsFound) { + // If pid refs exists, it can only contain and reference one cid + // First, compare the cid retrieved from the pid refs file from the supplid cid + String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); + if (retrievedCid.equalsIgnoreCase(cid)) { + // The pid correctly references the cid, but the cid refs file is missing + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws exceptions if there's an issue + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.tagObject - Pid refs file exists for pid: " + pid + + ", but cid refs file for: " + cid + + " is missing. Missing cid refs file created and tagging completed." + ); + return; + } else { + // Check if the retrieved cid refs file exists and pid is referenced + Path retrievedAbsCidRefsPath = getExpectedPath( + retrievedCid, "refs", HashStoreIdTypes.cid.getName() + ); + boolean retrievedAbsCidRefsPathExists = Files.exists(retrievedAbsCidRefsPath); + if (retrievedAbsCidRefsPathExists && isStringInRefsFile( + pid, retrievedAbsCidRefsPath + )) { + // This pid is accounted for and tagged as expected. + String errMsg = + "FileHashStore.tagObject - pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the pid." + + " A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } + // Orphaned pid refs file found, the retrieved cid refs file exists + // but doesn't contain the cid. Proceed to overwrite the pid refs file. + // There is no return statement, so we move out of this if block. + } + } else if (!pidRefsFound && cidRefsFound) { // Only update cid refs file if pid is not in the file boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { @@ -680,30 +720,28 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi move(pidRefsTmpFile, absPathPidRefsFile, "refs"); // Verify tagging process, this throws exceptions if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid + " has been updated and tagged successfully with pid: " + pid ); - - } else { - // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); - // Move refs files to permanent location - File absPathPidRefsFile = absPidRefsPath.toFile(); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws exceptions if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - - logFileHashStore.info( - "FileHashStore.tagObject - Object with cid: " + cid - + " has been tagged successfully with pid: " + pid - ); + return; } + // Get pid and cid refs files + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + // Move refs files to permanent location + File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws exceptions if there's an issue + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.tagObject - Object with cid: " + cid + + " has been tagged successfully with pid: " + pid + ); + } finally { // Release lock synchronized (referenceLockedCids) { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 4c185ee5..6147e3bb 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -12,12 +12,14 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; import java.util.List; import java.util.Properties; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -67,95 +69,180 @@ public void initializeFileHashStore() { @TempDir public Path tempFolder; + + /** + * Check that tagObject creates reference files + */ + @Test + public void tagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + assertEquals(1, pidRefsFiles.length); + File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + assertEquals(1, cidRefsFiles.length); + } + /** * Check that tagObject writes expected pid refs files */ @Test - public void tagObject_pidRefsFile() throws Exception { + public void tagObject_pidRefsFileContent() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); assertTrue(Files.exists(pidRefsFilePath)); + + String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); + assertEquals(cid, retrievedCid); } /** * Check that tagObject writes expected cid refs files */ @Test - public void tagObject_cidRefsFile() throws Exception { + public void tagObject_cidRefsFileContent() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); assertTrue(Files.exists(cidRefsFilePath)); + + String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); + assertEquals(pid, retrievedPid); } /** - * Check that tagObject throws exception when pid refs file already exists + * Check that tagObject does not throw exception when pid and cid refs + * file already exists */ @Test - public void tagObject_pidRefsFileExists() throws Exception { + public void tagObject_refsFileAlreadyExists() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - assertThrows(PidRefsFileExistsException.class, () -> { - fileHashStore.tagObject(pid, cid); - }); - + // Should not throw any exceptions, everything is where it's supposed to be. + fileHashStore.tagObject(pid, cid); + // Confirm that there is only 1 of each refs file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + assertEquals(1, pidRefsFiles.length); + File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + assertEquals(1, cidRefsFiles.length); } /** - * Check that tagObject creates a pid refs file and updates an existing cid refs file + * Check tagObject throws exception when the supplied cid is different from what is + * found in the pid refs file, and the associated cid refs file from the pid refs file + * is correctly tagged (everything is where it's expected to be) */ @Test - public void tagObject_cidRefsFileExists() throws Exception { + public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileFound() + throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + String existingCid = "987654321fedcba"; + fileHashStore.tagObject(pid, existingCid); - String pidAdditional = "another.pid.2"; - fileHashStore.tagObject(pidAdditional, cid); + // This will throw an exception because the pid and cid refs file are in sync + assertThrows(PidRefsFileExistsException.class, () -> { + fileHashStore.tagObject(pid, cid); + }); + } - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - assertTrue(Files.exists(pidRefsFilePath)); + /** + * Check tagObject overwrites a oprhaned pid refs file. + */ + @Test + public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound() + throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + String cidForOrphanPidRef = "987654321fedcba"; - // Check cid refs file - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( - pidAdditional, cidRefsFilePath + // Create orphaned pid refs file + Path absPidRefsPath = fileHashStore.getExpectedPath( + pid, "refs", HashStoreIdTypes.pid.getName() ); - assertTrue(pidFoundInCidRefFiles); + File pidRefsTmpFile = fileHashStore.writeRefsFile( + cidForOrphanPidRef, HashStoreIdTypes.pid.getName() + ); + File absPathPidRefsFile = absPidRefsPath.toFile(); + fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + + fileHashStore.tagObject(pid, cid); + // There should only be 1 of each refs file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + assertEquals(1, pidRefsFiles.length); + File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + assertEquals(1, cidRefsFiles.length); } /** - * Check that tagObject creates pid refs file when pid already exists in cid refs file + * Check that tagObject creates a missing cid refs file */ @Test - public void tagObject_pidExistsInCidRefsFile() throws Exception { + public void tagObject_pidRefsFileFound_cidRefsFileNotFound() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - - File cidRefsTmpFile = fileHashStore.writeRefsFile(pid, "cid"); + fileHashStore.tagObject(pid, cid); + // Manually delete the cid refs file Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); + Files.delete(cidRefsFilePath); fileHashStore.tagObject(pid, cid); + // Confirm that there is only 1 of each refs file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + assertEquals(1, pidRefsFiles.length); + File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + assertEquals(1, cidRefsFiles.length); + } - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - assertTrue(Files.exists(pidRefsFilePath)); - // Confirm that cid refs file only has 1 line - List lines = Files.readAllLines(cidRefsFilePath); - int numberOfLines = lines.size(); - assertEquals(numberOfLines, 1); + /** + * Check that tagObject creates a pid refs file and updates an existing cid refs file + */ + @Test + public void tagObject_pidRefsFileNotFound_cidRefsFileFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String pidAdditional = "another.pid.2"; + fileHashStore.tagObject(pidAdditional, cid); + + // Confirm missing pid refs file has been created + Path pidAdditionalRefsFilePath = fileHashStore.getExpectedPath( + pidAdditional, "refs", "pid" + ); + assertTrue(Files.exists(pidAdditionalRefsFilePath)); + // Check cid refs file + Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( + pidAdditional, cidRefsFilePath + ); + assertTrue(pidFoundInCidRefFiles); + + // There should be 2 pid refs file, and 1 cid refs file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + assertEquals(2, pidRefsFiles.length); + File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + assertEquals(1, cidRefsFiles.length); } + /** * Check that the cid supplied is written into the file given */ From 3d37174b0f4ff990af3f1692e02ef0efe4c4163b Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 15 Feb 2024 09:21:13 -0800 Subject: [PATCH 238/553] replaced pytest with JUnit. Removed Python/Black formatting sentence --- CONTRIBUTING.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4a4ce31e..d310a887 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -115,8 +115,8 @@ start work on the next release. ## Testing -**Unit and integration tests**. HashStore has a full suite of `pytest` tests in the `tests` subdirectory. -Any new code developed should include a robust set of tests for each public +**Unit and integration tests**. HashStore has a full suite of `JUnit` tests in the `tests` +subdirectory. Any new code developed should include a robust set of tests for each public method, as well as integration tests from new feature sets. Tests should fully exercise the feature to ensure that it responds correctly to both good data inputs and various classes of corrupt or bad data. All tests should pass before submitting a PR @@ -130,9 +130,9 @@ for this GitHub Actions status badge and make sure it says "Passing": Code should be written to professional standards to enable clean, well-documented, readable, and maintainable software. While there has been significant variability in the coding styles applied historically, new contributions should strive for -clean code formatting. We generally follow PEP8 guidelines for Python code formatting, -typically enforced through the `black` code formatting package. For Java, we typically -follow the [Google Style](https://raw.githubusercontent.com/google/styleguide/gh-pages/eclipse-java-google-style.xml), enforced through the VS Code formatting extensions. +clean code formatting. For Java, we generally follow the [Google +Style](https://raw.githubusercontent.com/google/styleguide/gh-pages/eclipse-java-google-style.xml), +enforced through the VS Code formatting extensions. ## Contributor license agreement From 786f81e3ebf6bf768b182cc534cbca3bb0ab2a78 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Feb 2024 09:34:17 -0800 Subject: [PATCH 239/553] Add TODO item --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c7ea5046..53d90775 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -41,6 +41,8 @@ import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +// TODO REMINDER: Both deleteObject and deleteMetadata need to be thread safe, sync cid and pid + /** * FileHashStore is a HashStore adapter class that manages the storage of objects and metadata to a * given store path on disk. To instantiate FileHashStore, the calling app must provide predefined From fd605a81a30135f2162c07837ca27db4f3e374e0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 09:55:54 -0700 Subject: [PATCH 240/553] Fix inaccurate javadoc return description in 'ObjectMetadata' class --- src/main/java/org/dataone/hashstore/ObjectMetadata.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 22f6689d..159b039c 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -43,7 +43,7 @@ public String getPid() { /** * Set the persistent identifier * - * @return cid + * @return pid */ public String setPid(String pid) { this.pid = pid; From 7a343cb1e0e58de96570c5860b073b4e19708efb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 10:07:36 -0700 Subject: [PATCH 241/553] Swallow file already exists exception when creating directories in 'move' method --- .../hashstore/filehashstore/FileHashStore.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 53d90775..7e05da99 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1817,7 +1817,16 @@ protected void move(File source, File target, String entity) throws IOException, // Create parent directory if it doesn't exist if (!destinationDirectory.exists()) { Path destinationDirectoryPath = destinationDirectory.toPath(); - Files.createDirectories(destinationDirectoryPath); + + try { + Files.createDirectories(destinationDirectoryPath); + + } catch (FileAlreadyExistsException faee) { + logFileHashStore.warn( + "FileHashStore.move - Directory already exists at: " + + destinationDirectoryPath + " - Skipping directory creation" + ); + } } // Move file @@ -1830,7 +1839,7 @@ protected void move(File source, File target, String entity) throws IOException, + targetFilePath ); - } catch (FileAlreadyExistsException amnse) { + } catch (FileAlreadyExistsException faee) { logFileHashStore.warn( "FileHashStore.move - File already exists, skipping request to move object." + " Source: " + source + ". Target: " + target From 519f9b654d3997c8a4b7aa9afccdc722ff427c4d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 10:36:29 -0700 Subject: [PATCH 242/553] Synchronize 'deleteObjectByCid' method with other delete methods for thread safety --- .../filehashstore/FileHashStore.java | 59 +++++++++++++++---- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7e05da99..5043db2f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -815,7 +815,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { String errMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid + "FileHashStore.findObject - Cid refs file does not exist for cid: " + cid + " with address: " + absCidRefsPath + ", but pid refs file exists."; logFileHashStore.error(errMsg); throw new OrphanPidRefsFileException(errMsg); @@ -841,7 +841,7 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio } } else { - String errMsg = "FileHashStore.deleteObject - Pid refs file exists, but pid (" + pid + String errMsg = "FileHashStore.findObject - Pid refs file exists, but pid (" + pid + ") not found in cid refs file for cid: " + cid + " with address: " + absCidRefsPath; logFileHashStore.error(errMsg); @@ -1864,14 +1864,16 @@ protected void move(File source, File target, String entity) throws IOException, } /** - * Attempt to delete an object based on the given content identifier (cid). If the object - * has pids that references it and/or a cid refs file exists, the object will not be deleted. - * + * Attempt to delete an object based on the given content identifier (cid). If the object has + * pids that references it and/or a cid refs file exists, the object will not be deleted. + * * @param cid Content identifier * @throws IOException If an issue arises during deletion of object * @throws NoSuchAlgorithmException Incompatible algorithm used to find relative path to cid + * @throws InterruptedException Issue with synchronization of cid deletion */ - protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException { + protected void deleteObjectByCid(String cid) + throws IOException, NoSuchAlgorithmException, InterruptedException { Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); if (Files.exists(absCidRefsPath)) { String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" @@ -1886,13 +1888,46 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithm ); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - // If file exists, delete it. - if (Files.exists(expectedRealPath)) { - Files.delete(expectedRealPath); + // Minimize the amount of time the cid is locked + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" + + " waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.add(cid); + } + + try { + // If file exists, delete it. + if (Files.exists(expectedRealPath)) { + Files.delete(expectedRealPath); + } + String debugMsg = "FileHashStore - deleteObjectByCid: object deleted at" + + expectedRealPath; + logFileHashStore.debug(debugMsg); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.remove(cid); + referenceLockedCids.notifyAll(); + } } - String debugMsg = "FileHashStore - deleteObjectByCid: object deleted at" - + expectedRealPath; - logFileHashStore.debug(debugMsg); } } From 6fb8abe39e42aeb01bbd45ad644627dd05502cf5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 10:50:01 -0700 Subject: [PATCH 243/553] Fix typos in 'FileHashStore' class --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 5043db2f..cd179ed2 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1675,7 +1675,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor * @throws NoSuchAlgorithmException Unable to generate new instance of supplied algorithm * @throws IOException Issue with writing file from InputStream * @throws SecurityException Unable to write to tmpFile - * @throws FileNotFoundException tmnpFile cannot be found + * @throws FileNotFoundException tmpFile cannot be found */ protected Map writeToTmpFileAndGenerateChecksums( File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm @@ -2230,7 +2230,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) if (!Files.exists(pathToPidRefsFile)) { String errMsg = "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid: " + abId - + " with object address: " + pathToPidRefsFile + ". Cannot retrive cid."; + + " with object address: " + pathToPidRefsFile + ". Cannot retrieve cid."; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } else { From 4c9348a44d571c088949e1d7ceaf32ddf99ef4f5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 14:40:50 -0700 Subject: [PATCH 244/553] Remove TODO item --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index cd179ed2..8a7c8219 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -41,8 +41,6 @@ import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; -// TODO REMINDER: Both deleteObject and deleteMetadata need to be thread safe, sync cid and pid - /** * FileHashStore is a HashStore adapter class that manages the storage of objects and metadata to a * given store path on disk. To instantiate FileHashStore, the calling app must provide predefined From 0c3fc660993df7abdc6b3dea7ef2931cb52b03e7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 15:24:16 -0700 Subject: [PATCH 245/553] Improve clarity in comments for 'getExpectedPath' method --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8a7c8219..d30669e6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2218,7 +2218,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) Path realPath; String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); if (entity.equalsIgnoreCase("object")) { - // hashId = pidRefId + // `hashId` is the pid refs file string to split, and contains the cid String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); @@ -2255,13 +2255,13 @@ protected Path getExpectedPath(String abId, String entity, String formatId) } else if (entity.equalsIgnoreCase("refs")) { if (formatId.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { - // hashId = pidRefId + // `hashId` is the pid refs file string to split String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { - // hashId = cid + // `hashId` is the cid refs file string to split String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); From 5fa19f88189f7416895c625d3548ecaf9a43b9a3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 9 May 2024 15:56:39 -0700 Subject: [PATCH 246/553] Update comment formatting, revise javadoc and fix typo --- .../hashstore/filehashstore/FileHashStore.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d30669e6..4c5a987c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -672,7 +672,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi ); } else if (pidRefsFound && !cidRefsFound) { // If pid refs exists, it can only contain and reference one cid - // First, compare the cid retrieved from the pid refs file from the supplid cid + // First, compare the cid retrieved from the pid refs file from the supplied cid String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); if (retrievedCid.equalsIgnoreCase(cid)) { // The pid correctly references the cid, but the cid refs file is missing @@ -1894,16 +1894,16 @@ protected void deleteObjectByCid(String cid) } catch (InterruptedException ie) { String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" - + " waiting to delete object with cid: " + cid + "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " + + "interrupted while waiting to delete object with cid: " + cid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid - ); + "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " + + cid); referenceLockedCids.add(cid); } @@ -1920,8 +1920,8 @@ protected void deleteObjectByCid(String cid) // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid - ); + "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + + cid); referenceLockedCids.remove(cid); referenceLockedCids.notifyAll(); } @@ -2203,7 +2203,7 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea } /** - * Get the absolute path of a HashStore object or metadata file + * Get the absolute path of a HashStore object, metadata or refs file * * @param abId Authority-based, persistent or content identifier * @param entity "object" or "metadata" From bc0be54405ac191a49c31eba038352abfea6a1bb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 10:48:02 -0700 Subject: [PATCH 247/553] Move initial synchronization block to within try statement to prevent potential deadlock --- .../filehashstore/FileHashStore.java | 199 +++++++++--------- 1 file changed, 100 insertions(+), 99 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4c5a987c..a186c5f9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -475,29 +475,29 @@ private ObjectMetadata syncPutObject( String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, PidRefsFileExistsException, IOException, RuntimeException, InterruptedException { - // Lock pid for thread safety, transaction control and atomic writing - // A pid can only be stored once and only once, subsequent calls will - // be accepted but will be rejected if pid hash object exists - synchronized (objectLockedIds) { - if (objectLockedIds.contains(pid)) { - String errMsg = - "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " - + pid + ". Already in progress."; - logFileHashStore.warn(errMsg); - throw new RuntimeException(errMsg); - } - logFileHashStore.debug( - "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid - ); - objectLockedIds.add(pid); - } + logFileHashStore.debug( + "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid + + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum + + ". checksumAlgorithm: " + checksumAlgorithm + ); try { - logFileHashStore.debug( - "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid - + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum - + ". checksumAlgorithm: " + checksumAlgorithm - ); + // Lock pid for thread safety, transaction control and atomic writing + // A pid can only be stored once and only once, subsequent calls will + // be accepted but will be rejected if pid hash object exists + synchronized (objectLockedIds) { + if (objectLockedIds.contains(pid)) { + String errMsg = + "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " + + pid + ". Already in progress."; + logFileHashStore.warn(errMsg); + throw new RuntimeException(errMsg); + } + logFileHashStore.debug( + "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid + ); + objectLockedIds.add(pid); + } // Store object ObjectMetadata objInfo = putObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize @@ -637,32 +637,32 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" - + " waiting to tag pid: " + pid + " and cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.add(cid); - } - try { Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); boolean pidRefsFound = Files.exists(absPidRefsPath); boolean cidRefsFound = Files.exists(absCidRefsPath); + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" + + " waiting to tag pid: " + pid + " and cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.tagObject - Synchronizing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.add(cid); + } + // Both files found, confirm that reference files are where they are expected to be if (pidRefsFound && cidRefsFound) { verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); @@ -890,31 +890,32 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF // However, the same pid could be used with different formatIds, so // synchronize ids with pid + formatId; String pidFormatId = pid + checkedFormatId; - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(pidFormatId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); + logFileHashStore.debug( + "FileHashStore.storeMetadata - .putMetadata() request for pid: " + pid + + ". formatId: " + checkedFormatId + ); - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.storeMetadata - Metadata lock was interrupted while" - + " storing metadata for: " + pid + " and formatId: " + checkedFormatId - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); + try { + synchronized (metadataLockedIds) { + while (metadataLockedIds.contains(pidFormatId)) { + try { + metadataLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.storeMetadata - Metadata lock was interrupted while" + + " storing metadata for: " + pid + " and formatId: " + checkedFormatId + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } } + logFileHashStore.debug( + "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid + ); + metadataLockedIds.add(pidFormatId); } - logFileHashStore.debug( - "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid - ); - metadataLockedIds.add(pidFormatId); - } - try { - logFileHashStore.debug( - "FileHashStore.storeMetadata - .putMetadata() request for pid: " + pid - + ". formatId: " + checkedFormatId - ); // Store metadata String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( @@ -1200,27 +1201,27 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Pid refs file Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" - + " waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); + try { + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" + + " waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid + ); + referenceLockedCids.add(cid); } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.add(cid); - } - try { // Then pid reference file deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Remove pid from cid refs file @@ -1886,28 +1887,28 @@ protected void deleteObjectByCid(String cid) ); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - // Minimize the amount of time the cid is locked - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " - + "interrupted while waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); + try { + // Minimize the amount of time the cid is locked + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " + + "interrupted while waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } } + logFileHashStore.debug( + "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " + + cid); + referenceLockedCids.add(cid); } - logFileHashStore.debug( - "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " - + cid); - referenceLockedCids.add(cid); - } - try { // If file exists, delete it. if (Files.exists(expectedRealPath)) { Files.delete(expectedRealPath); From 1a3835d923c974f5b538e95d48fc3e310396af25 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 12:14:12 -0700 Subject: [PATCH 248/553] Revise comments and extract 'checkObjectEquality' method from FileHashStore to utility class --- .../filehashstore/FileHashStore.java | 63 ++++++------------- .../filehashstore/FileHashStoreUtility.java | 20 ++++++ 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a186c5f9..be2e9b5b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -229,11 +229,13 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep * @param storeMetadataNamespace Default metadata namespace (`formatId`) * @throws NoSuchAlgorithmException If algorithm supplied is not supported * @throws IOException If `hashstore.yaml` config file cannot be retrieved/opened + * @throws IllegalArgumentException If depth or width is less than 0 + * @throws IllegalStateException If dirs/objects exist, but HashStore config is missing */ protected void verifyHashStoreProperties( Path storePath, int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace - ) throws NoSuchAlgorithmException, IOException { + ) throws NoSuchAlgorithmException, IOException, IllegalArgumentException, IllegalStateException { if (storeDepth <= 0 || storeWidth <= 0) { String errMsg = "FileHashStore - Depth and width must be greater than 0." + " Depth: " + storeDepth + ". Width: " + storeWidth; @@ -266,12 +268,12 @@ protected void verifyHashStoreProperties( ); // Verify properties when 'hashstore.yaml' found - checkConfigurationEquality("store depth", storeDepth, existingStoreDepth); - checkConfigurationEquality("store width", storeWidth, existingStoreWidth); - checkConfigurationEquality("store algorithm", storeAlgorithm, existingStoreAlgorithm); - checkConfigurationEquality( - "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs - ); + FileHashStoreUtility.checkObjectEquality("store depth", storeDepth, existingStoreDepth); + FileHashStoreUtility.checkObjectEquality("store width", storeWidth, existingStoreWidth); + FileHashStoreUtility.checkObjectEquality("store algorithm", storeAlgorithm, + existingStoreAlgorithm); + FileHashStoreUtility.checkObjectEquality( + "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs); } else { // Check if HashStore exists at the given store path (and is missing config) @@ -362,27 +364,6 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { } } - /** - * Checks the equality of a supplied value with an existing value for a specific configuration - * property. - * - * @param propertyName The name of the config property being checked - * @param suppliedValue The value supplied for the config property - * @param existingValue The existing value of the config property - * @throws IllegalArgumentException If the supplied value is not equal to the existing value - */ - protected void checkConfigurationEquality( - String propertyName, Object suppliedValue, Object existingValue - ) { - if (!Objects.equals(suppliedValue, existingValue)) { - String errMsg = "FileHashStore.checkConfigurationEquality() - Supplied " + propertyName - + ": " + suppliedValue + " does not match the existing configuration value: " - + existingValue; - logFileHashStore.fatal(errMsg); - throw new IllegalArgumentException(errMsg); - } - } - /** * Build the string content of the configuration file for HashStore - 'hashstore.yaml' * @@ -483,8 +464,7 @@ private ObjectMetadata syncPutObject( try { // Lock pid for thread safety, transaction control and atomic writing - // A pid can only be stored once and only once, subsequent calls will - // be accepted but will be rejected if pid hash object exists + // An object is stored once and only once synchronized (objectLockedIds) { if (objectLockedIds.contains(pid)) { String errMsg = @@ -556,11 +536,9 @@ private ObjectMetadata syncPutObject( public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException { // 'putObject' is called directly to bypass the pid synchronization implemented to - // efficiently handle duplicate object store requests. Since there is no pid, calling - // 'storeObject' would unintentionally create a bottleneck for all requests without a - // pid (they would be executed sequentially). This scenario occurs when metadata about - // the object (ex. form data including the pid, checksum, checksum algorithm, etc.) is - // unavailable. + // efficiently handle object store requests without a pid. This scenario occurs when + // metadata about the object (ex. form data including the pid, checksum, checksum + // algorithm, etc.) is unavailable. // // Note: This method does not tag the object to make it discoverable, so the client can // call 'verifyObject' (optional) to check that the object is valid, and 'tagObject' @@ -636,13 +614,13 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); + // Prepare booleans to determine path of tagObject to proceed with + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + boolean pidRefsFound = Files.exists(absPidRefsPath); + boolean cidRefsFound = Files.exists(absCidRefsPath); try { - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - boolean pidRefsFound = Files.exists(absPidRefsPath); - boolean cidRefsFound = Files.exists(absCidRefsPath); - synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -676,10 +654,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); if (retrievedCid.equalsIgnoreCase(cid)) { // The pid correctly references the cid, but the cid refs file is missing + // Create the file and verify tagging process File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); File absPathCidRefsFile = absCidRefsPath.toFile(); move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws exceptions if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Pid refs file exists for pid: " + pid @@ -714,11 +692,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi if (!pidFoundInCidRefFiles) { updateRefsFile(pid, absCidRefsPath, "add"); } - // Get the pid refs file + // Get the pid refs file and verify tagging process File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - // Verify tagging process, this throws exceptions if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index bab3ff3f..e871ac3f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -12,6 +12,7 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Random; import java.util.stream.Stream; @@ -262,4 +263,23 @@ public static File generateTmpFile(String prefix, Path directory) throws IOExcep newFile.deleteOnExit(); return newFile; } + + /** + * Ensures that two objects are equal. If not, throws an IllegalArgumentException. + * + * @param nameValue The name of the object being checked + * @param suppliedValue The value supplied to compare + * @param existingValue The existing value to compare with + * @throws IllegalArgumentException If the supplied value is not equal to the existing value + */ + public static void checkObjectEquality( + String nameValue, Object suppliedValue, Object existingValue) { + if (!Objects.equals(suppliedValue, existingValue)) { + String errMsg = + "FileHashStore.checkConfigurationEquality() - Mismatch in " + nameValue + ": " + + suppliedValue + " does not match the existing configuration value: " + + existingValue; + throw new IllegalArgumentException(errMsg); + } + } } From 9aef583088d5bbcc9016d988a3f95878c8ce0121 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 13:32:05 -0700 Subject: [PATCH 249/553] Refactor 'deleteObject' to delete metadata in respective blocks for scenarios --- .../filehashstore/FileHashStore.java | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index be2e9b5b..1897ab57 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1107,22 +1107,23 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( expectedPidMetadataDirectory ); - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } // Before we begin deleting files, we handle orphaned files scenarios try { // Begin by looking for the cid and confirming state // If a custom exception is thrown, this try block will return; - cid = findObject(pid); + cid = findObject(id); } catch (OrphanPidRefsFileException oprfe) { - // Delete the pid refs file and return, nothing else to delete + // Delete the pid refs file Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete metadata documents + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid @@ -1147,6 +1148,11 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } + // Delete metadata documents + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid @@ -1155,11 +1161,15 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti return; } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // Delete pid refs file and return, nothing else to delete + // Delete pid refs file and return Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete metadata documents + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + // Delete items FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = @@ -1199,6 +1209,11 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti referenceLockedCids.add(cid); } + // Delete metadata documents + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + // Then pid reference file deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Remove pid from cid refs file From 645d3e729c0c1fb6ca63f7a06bbe3dd8e3f314ea Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 13:43:55 -0700 Subject: [PATCH 250/553] Refactor 'tagObject' to synchronize based on new array list 'referenceLockedPids' --- .../filehashstore/FileHashStore.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1897ab57..db16924f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -52,6 +52,7 @@ public class FileHashStore implements HashStore { private static final ArrayList objectLockedIds = new ArrayList<>(100); private static final ArrayList metadataLockedIds = new ArrayList<>(100); private static final ArrayList referenceLockedCids = new ArrayList<>(100); + private static final ArrayList referenceLockedPids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -621,10 +622,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi boolean cidRefsFound = Files.exists(absCidRefsPath); try { - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { + synchronized (referenceLockedPids) { + while (referenceLockedPids.contains(pid)) { try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); + referenceLockedPids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = @@ -636,9 +637,9 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } } logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedCids for cid: " + cid + "FileHashStore.tagObject - Synchronizing referenceLockedPids for pid: " + pid ); - referenceLockedCids.add(cid); + referenceLockedPids.add(pid); } // Both files found, confirm that reference files are where they are expected to be @@ -721,12 +722,12 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } finally { // Release lock - synchronized (referenceLockedCids) { + synchronized (referenceLockedPids) { logFileHashStore.debug( - "FileHashStore.tagObject - Releasing referenceLockedCids for cid: " + cid + "FileHashStore.tagObject - Releasing referenceLockedPids for pid: " + pid ); - referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll(); + referenceLockedPids.remove(pid); + referenceLockedPids.notifyAll(); } } } @@ -1088,6 +1089,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // If 'idType' is cid, attempt to delete the object directly without checking refs files if (idType.equals(HashStoreIdTypes.cid.getName())) { + // The following method is synchronized based on cids deleteObjectByCid(id); } else { @@ -1867,10 +1869,10 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException, InterruptedException { Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); if (Files.exists(absCidRefsPath)) { + // The cid refs file exists, so the cid object cannot be deleted. String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" + " references, skipping deletion."; logFileHashStore.warn(warnMsg); - // The cid is referenced by pids, do not delete. } else { // Get permanent address of the actual cid From 644eae5a9e8b61f58a72b038f17b0cca3e73c197 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 14:23:17 -0700 Subject: [PATCH 251/553] Refactor 'deleteObject' with pids to synchronize based on pids shared with 'tagObject' --- .../filehashstore/FileHashStore.java | 151 +++++++++--------- 1 file changed, 74 insertions(+), 77 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index db16924f..e24630a4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1098,7 +1098,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti String pid = id; List deleteList = new ArrayList<>(); - // Get list of metadata documents, these will always be deleted if they exist. + // Get list of metadata documents first, these will always be deleted if they exist + // and reduces the time spent in the synchronization block // Metadata directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -1112,16 +1113,71 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Before we begin deleting files, we handle orphaned files scenarios try { + synchronized (referenceLockedPids) { + while (referenceLockedPids.contains(pid)) { + try { + referenceLockedPids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedPids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedPids for pid: " + + pid); + referenceLockedPids.add(pid); + } // Begin by looking for the cid and confirming state // If a custom exception is thrown, this try block will return; cid = findObject(id); + // Proceed with comprehensive deletion - cid exists, nothing out of place + // Get all the required paths to streamline deletion process + // Permanent address of the object + Path objRealPath = getExpectedPath(pid, "object", null); + // Cid refs file + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + // Pid refs file + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + + // Rename pid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete obj and cid refs file only if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Rename empty cid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + // Rename actual object for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + } else { + String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); + } + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath + ); + } catch (OrphanPidRefsFileException oprfe) { - // Delete the pid refs file + // Rename pid refs file for deletion Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete metadata documents + // Rename metadata documents for deletion for (Path metadataDoc : metadataDocPaths) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); } @@ -1129,15 +1185,14 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid - + ". Deleted orphan pid refs file."; + + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); - return; } catch (OrphanRefsFilesException orfe) { - // Object does not exist, attempt to remove orphan files + // Object does not exist, but cid&pid refs do, attempt to remove orphan files + // Rename pid refs file for deletion Path absPidRefsPath = getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - // Add the pid refs file to deleteList deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Remove the pid from the cid refs file @@ -1145,29 +1200,30 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti cidRead, "refs", HashStoreIdTypes.cid.getName() ); updateRefsFile(pid, absCidRefsPath, "remove"); + // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } - // Delete metadata documents + // Rename metadata documents for deletion for (Path metadataDoc : metadataDocPaths) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); } + // Delete items FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted orphan files."; + + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); - return; } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // Delete pid refs file and return + // Rename pid refs file for deletion Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete metadata documents + // Rename metadata documents for deletion for (Path metadataDoc : metadataDocPaths) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); } @@ -1176,76 +1232,17 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " - + pid + ". Deleted orphan pid refs file."; + + pid + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); - return; - } - - // Proceed with comprehensive deletion - cid exists, nothing out of place - // Get all the required paths to streamline deletion process - // Permanent address of the object - Path objRealPath = getExpectedPath(pid, "object", null); - // Cid refs file - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - // Pid refs file - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - - try { - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" - + " waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.add(cid); - } - - // Delete metadata documents - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - - // Then pid reference file - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file - updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Delete empty cid refs file - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - // Delete actual object - deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); - } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object deletion."; - logFileHashStore.warn(warnMsg); - } - // Delete all related items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); } finally { // Release lock - synchronized (referenceLockedCids) { + synchronized (referenceLockedPids) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll(); + "FileHashStore.deleteObject - Releasing referenceLockedPids for pid: " + + pid); + referenceLockedPids.remove(pid); + referenceLockedPids.notifyAll(); } } } From 14062e5f83a388f55f60bf088aa6d4a2e1dc0811 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 14:51:34 -0700 Subject: [PATCH 252/553] Fix bug in 'getExpectedPath' where metadata document id was not correctly formed (was only using formatId, instead of pid + formatId), fix affected test and rename variables for improved clarity --- .../filehashstore/FileHashStore.java | 34 +++++++++---------- .../hashstore/HashStoreClientTest.java | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e24630a4..db87e580 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1273,20 +1273,21 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getExpectedPath(pid, "metadata", formatId); + // Get permanent address of the metadata document by calculating the sha-256 hex digest + // of the 'pid' + 'formatId' + Path metadataDocPath = getExpectedPath(pid, "metadata", formatId); - if (!Files.exists(metadataCidPath)) { + if (!Files.exists(metadataDocPath)) { String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid - + " with metadata address: " + metadataCidPath; + + " with metadata address: " + metadataDocPath; logFileHashStore.warn(errMsg); } else { // Proceed to delete - Files.delete(metadataCidPath); + Files.delete(metadataDocPath); logFileHashStore.info( "FileHashStore.deleteMetadata - File deleted for: " + pid - + " with metadata address: " + metadataCidPath + + " with metadata address: " + metadataDocPath ); } } @@ -2210,11 +2211,11 @@ protected Path getExpectedPath(String abId, String entity, String formatId) Path realPath; String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); if (entity.equalsIgnoreCase("object")) { - // `hashId` is the pid refs file string to split, and contains the cid - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + // `hashId` here is the address of the pid refs file, and contains the cid + String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); - Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); + Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); // Attempt to retrieve the cid String objectCid; if (!Files.exists(pathToPidRefsFile)) { @@ -2233,27 +2234,26 @@ protected Path getExpectedPath(String abId, String entity, String formatId) realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); } else if (entity.equalsIgnoreCase("metadata")) { - // Get the pid metadata directory (the relative path of the hashId) + // Get the pid metadata directory (the sharded path of the hashId) String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); - // The file name for the metadata document is the hash of the supplied 'formatId' - String metadataFormatIdHash = FileHashStoreUtility.getPidHexDigest( - formatId, OBJECT_STORE_ALGORITHM - ); + // The file name for the metadata document is the hash of the supplied 'pid + 'formatId' + String metadataDocHash = + FileHashStoreUtility.getPidHexDigest(abId + formatId, OBJECT_STORE_ALGORITHM); realPath = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve( - metadataFormatIdHash + metadataDocHash ); } else if (entity.equalsIgnoreCase("refs")) { if (formatId.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { - // `hashId` is the pid refs file string to split + // `hashId` here is the pid refs file string to split String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { - // `hashId` is the cid refs file string to split + // `hashId` here is the cid refs file string to split String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId ); diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index dcd151e0..29307d64 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -225,7 +225,7 @@ public void client_storeMetadata() throws Exception { ); // The file name for the metadata document is the hash of the supplied 'formatId' String metadataCidPartTwo = FileHashStoreUtility.getPidHexDigest( - optFormatIdValue, storeAlgorithm + pid + optFormatIdValue, storeAlgorithm ); Path expectedMetadataPath = metadataDirectory.resolve(pidMetadataDirectory).resolve( metadataCidPartTwo From 318911839dd93e6ca4763d9da6f94d8eb316ae92 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 10 May 2024 15:51:28 -0700 Subject: [PATCH 253/553] Add comments to catch blocks in 'deleteObject' to help with debugging --- .../hashstore/filehashstore/FileHashStore.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index db87e580..f2e3ae59 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1133,7 +1133,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti referenceLockedPids.add(pid); } // Begin by looking for the cid and confirming state - // If a custom exception is thrown, this try block will return; + // If there is an issue with finding an object (ex. orphaned reference files), + // custom exceptions will be thrown and handled in the catch blocks cid = findObject(id); // Proceed with comprehensive deletion - cid exists, nothing out of place @@ -1173,7 +1174,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti ); } catch (OrphanPidRefsFileException oprfe) { - // Rename pid refs file for deletion + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file and related metadata documents + + // Begin by renaming pid refs file for deletion Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1189,7 +1193,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti logFileHashStore.warn(warnMsg); } catch (OrphanRefsFilesException orfe) { - // Object does not exist, but cid&pid refs do, attempt to remove orphan files + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + // Rename pid refs file for deletion Path absPidRefsPath = getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); @@ -1219,6 +1225,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti logFileHashStore.warn(warnMsg); } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. + // Rename pid refs file for deletion Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); From a76fe7a0551cc287cd3880b32ae6f17b4ffba165 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 10:50:10 -0700 Subject: [PATCH 254/553] Revert 'deleteObject' order of operations to debug changes --- .../filehashstore/FileHashStore.java | 116 +++++++++--------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f2e3ae59..d5147c15 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1113,66 +1113,11 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Before we begin deleting files, we handle orphaned files scenarios try { - synchronized (referenceLockedPids) { - while (referenceLockedPids.contains(pid)) { - try { - referenceLockedPids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedPids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedPids for pid: " - + pid); - referenceLockedPids.add(pid); - } // Begin by looking for the cid and confirming state // If there is an issue with finding an object (ex. orphaned reference files), // custom exceptions will be thrown and handled in the catch blocks cid = findObject(id); - // Proceed with comprehensive deletion - cid exists, nothing out of place - // Get all the required paths to streamline deletion process - // Permanent address of the object - Path objRealPath = getExpectedPath(pid, "object", null); - // Cid refs file - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - // Pid refs file - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - - // Rename pid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file - updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Rename empty cid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - // Rename actual object for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); - } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object deletion."; - logFileHashStore.warn(warnMsg); - } - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); - } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file and related metadata documents @@ -1191,6 +1136,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); + return; } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when the pid and cid refs file exists, @@ -1223,6 +1169,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); + return; } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -1243,6 +1190,65 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); + return; + } + + try { + synchronized (referenceLockedPids) { + while (referenceLockedPids.contains(pid)) { + try { + referenceLockedPids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedPids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedPids for pid: " + + pid); + referenceLockedPids.add(pid); + } + + // Proceed with comprehensive deletion - cid exists, nothing out of place + // Get all the required paths to streamline deletion process + // Permanent address of the object + Path objRealPath = getExpectedPath(pid, "object", null); + // Cid refs file + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + // Pid refs file + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + + // Rename pid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete obj and cid refs file only if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Rename empty cid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + // Rename actual object for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + } else { + String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); + } + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath + ); } finally { // Release lock From 6261abcac1a2160a9fedc101960fd4922b5f5bc1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 11:05:23 -0700 Subject: [PATCH 255/553] Revert changes to synchronization for tagObject and deleteObject for debugging --- .../filehashstore/FileHashStore.java | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d5147c15..8e0908da 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -52,7 +52,6 @@ public class FileHashStore implements HashStore { private static final ArrayList objectLockedIds = new ArrayList<>(100); private static final ArrayList metadataLockedIds = new ArrayList<>(100); private static final ArrayList referenceLockedCids = new ArrayList<>(100); - private static final ArrayList referenceLockedPids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -622,10 +621,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi boolean cidRefsFound = Files.exists(absCidRefsPath); try { - synchronized (referenceLockedPids) { - while (referenceLockedPids.contains(pid)) { + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(pid)) { try { - referenceLockedPids.wait(TIME_OUT_MILLISEC); + referenceLockedCids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = @@ -637,9 +636,9 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } } logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedPids for pid: " + pid + "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid ); - referenceLockedPids.add(pid); + referenceLockedCids.add(pid); } // Both files found, confirm that reference files are where they are expected to be @@ -722,12 +721,12 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } finally { // Release lock - synchronized (referenceLockedPids) { + synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.tagObject - Releasing referenceLockedPids for pid: " + pid + "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid ); - referenceLockedPids.remove(pid); - referenceLockedPids.notifyAll(); + referenceLockedCids.remove(pid); + referenceLockedCids.notifyAll(); } } } @@ -1194,14 +1193,14 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } try { - synchronized (referenceLockedPids) { - while (referenceLockedPids.contains(pid)) { + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(pid)) { try { - referenceLockedPids.wait(TIME_OUT_MILLISEC); + referenceLockedCids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = - "FileHashStore.deleteObject - referenceLockedPids lock was " + "FileHashStore.deleteObject - referenceLockedCids lock was " + "interrupted while waiting to delete objects for pid: " + pid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); @@ -1209,9 +1208,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } } logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedPids for pid: " + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " + pid); - referenceLockedPids.add(pid); + referenceLockedCids.add(pid); } // Proceed with comprehensive deletion - cid exists, nothing out of place @@ -1252,12 +1251,12 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } finally { // Release lock - synchronized (referenceLockedPids) { + synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedPids for pid: " + "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + pid); - referenceLockedPids.remove(pid); - referenceLockedPids.notifyAll(); + referenceLockedCids.remove(pid); + referenceLockedCids.notifyAll(); } } } From 71cbae8fe1f342c1a9afd97666bb77e6c76d837c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 12:19:10 -0700 Subject: [PATCH 256/553] Revert changes to synchronization placement for debugging --- .../filehashstore/FileHashStore.java | 173 +++++++++--------- 1 file changed, 87 insertions(+), 86 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8e0908da..4902de7a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -462,22 +462,23 @@ private ObjectMetadata syncPutObject( + ". checksumAlgorithm: " + checksumAlgorithm ); - try { - // Lock pid for thread safety, transaction control and atomic writing - // An object is stored once and only once - synchronized (objectLockedIds) { - if (objectLockedIds.contains(pid)) { - String errMsg = - "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " - + pid + ". Already in progress."; - logFileHashStore.warn(errMsg); - throw new RuntimeException(errMsg); - } - logFileHashStore.debug( - "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid - ); - objectLockedIds.add(pid); + // Lock pid for thread safety, transaction control and atomic writing + // An object is stored once and only once + synchronized (objectLockedIds) { + if (objectLockedIds.contains(pid)) { + String errMsg = + "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " + + pid + ". Already in progress."; + logFileHashStore.warn(errMsg); + throw new RuntimeException(errMsg); } + logFileHashStore.debug( + "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid + ); + objectLockedIds.add(pid); + } + + try { // Store object ObjectMetadata objInfo = putObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize @@ -620,27 +621,27 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi boolean pidRefsFound = Files.exists(absPidRefsPath); boolean cidRefsFound = Files.exists(absCidRefsPath); - try { - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(pid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(pid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" - + " waiting to tag pid: " + pid + " and cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" + + " waiting to tag pid: " + pid + " and cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } - logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid - ); - referenceLockedCids.add(pid); } + logFileHashStore.debug( + "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid + ); + referenceLockedCids.add(pid); + } + try { // Both files found, confirm that reference files are where they are expected to be if (pidRefsFound && cidRefsFound) { verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); @@ -872,27 +873,27 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF + ". formatId: " + checkedFormatId ); - try { - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(pidFormatId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); + synchronized (metadataLockedIds) { + while (metadataLockedIds.contains(pidFormatId)) { + try { + metadataLockedIds.wait(TIME_OUT_MILLISEC); - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.storeMetadata - Metadata lock was interrupted while" - + " storing metadata for: " + pid + " and formatId: " + checkedFormatId - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.storeMetadata - Metadata lock was interrupted while" + + " storing metadata for: " + pid + " and formatId: " + checkedFormatId + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } - logFileHashStore.debug( - "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid - ); - metadataLockedIds.add(pidFormatId); } + logFileHashStore.debug( + "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid + ); + metadataLockedIds.add(pidFormatId); + } + try { // Store metadata String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( @@ -1192,27 +1193,27 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti return; } - try { - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(pid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(pid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " - + pid); - referenceLockedCids.add(pid); } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " + + pid); + referenceLockedCids.add(pid); + } + try { // Proceed with comprehensive deletion - cid exists, nothing out of place // Get all the required paths to streamline deletion process // Permanent address of the object @@ -1893,28 +1894,28 @@ protected void deleteObjectByCid(String cid) ); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - try { - // Minimize the amount of time the cid is locked - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " - + "interrupted while waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } + // Minimize the amount of time the cid is locked + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " + + "interrupted while waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } - logFileHashStore.debug( - "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " - + cid); - referenceLockedCids.add(cid); } + logFileHashStore.debug( + "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " + + cid); + referenceLockedCids.add(cid); + } + try { // If file exists, delete it. if (Files.exists(expectedRealPath)) { Files.delete(expectedRealPath); From 059fee117d464ebb972aa7302aa9d602a8ccb543 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 12:43:34 -0700 Subject: [PATCH 257/553] Replace usage of 'notifyAll' to 'notify' with synchronization blocks --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4902de7a..6ed5cfac 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -525,7 +525,7 @@ private ObjectMetadata syncPutObject( "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid ); objectLockedIds.remove(pid); - objectLockedIds.notifyAll(); + objectLockedIds.notify(); } } } @@ -727,7 +727,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid ); referenceLockedCids.remove(pid); - referenceLockedCids.notifyAll(); + referenceLockedCids.notify(); } } } @@ -924,7 +924,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF + " and formatId " + checkedFormatId ); metadataLockedIds.remove(pidFormatId); - metadataLockedIds.notifyAll(); + metadataLockedIds.notify(); } } } @@ -1257,7 +1257,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + pid); referenceLockedCids.remove(pid); - referenceLockedCids.notifyAll(); + referenceLockedCids.notify(); } } } @@ -1931,7 +1931,7 @@ protected void deleteObjectByCid(String cid) "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid); referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll(); + referenceLockedCids.notify(); } } } From c01d2f3b37f2aa3d07fb2b16a61272b0806e3248 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 13:25:03 -0700 Subject: [PATCH 258/553] Revert usage of 'notify' back to 'notifyAll' --- .../filehashstore/FileHashStore.java | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6ed5cfac..fc2d429e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -456,12 +456,6 @@ private ObjectMetadata syncPutObject( String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, PidRefsFileExistsException, IOException, RuntimeException, InterruptedException { - logFileHashStore.debug( - "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid - + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum - + ". checksumAlgorithm: " + checksumAlgorithm - ); - // Lock pid for thread safety, transaction control and atomic writing // An object is stored once and only once synchronized (objectLockedIds) { @@ -479,6 +473,11 @@ private ObjectMetadata syncPutObject( } try { + logFileHashStore.debug( + "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid + + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum + + ". checksumAlgorithm: " + checksumAlgorithm + ); // Store object ObjectMetadata objInfo = putObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize @@ -525,7 +524,7 @@ private ObjectMetadata syncPutObject( "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid ); objectLockedIds.remove(pid); - objectLockedIds.notify(); + objectLockedIds.notifyAll();; } } } @@ -615,11 +614,6 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); - // Prepare booleans to determine path of tagObject to proceed with - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - boolean pidRefsFound = Files.exists(absPidRefsPath); - boolean cidRefsFound = Files.exists(absCidRefsPath); synchronized (referenceLockedCids) { while (referenceLockedCids.contains(pid)) { @@ -642,6 +636,12 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { + // Prepare booleans to determine path of tagObject to proceed with + Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + boolean pidRefsFound = Files.exists(absPidRefsPath); + boolean cidRefsFound = Files.exists(absCidRefsPath); + // Both files found, confirm that reference files are where they are expected to be if (pidRefsFound && cidRefsFound) { verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); @@ -727,7 +727,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid ); referenceLockedCids.remove(pid); - referenceLockedCids.notify(); + referenceLockedCids.notifyAll();; } } } @@ -868,11 +868,6 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF // However, the same pid could be used with different formatIds, so // synchronize ids with pid + formatId; String pidFormatId = pid + checkedFormatId; - logFileHashStore.debug( - "FileHashStore.storeMetadata - .putMetadata() request for pid: " + pid - + ". formatId: " + checkedFormatId - ); - synchronized (metadataLockedIds) { while (metadataLockedIds.contains(pidFormatId)) { try { @@ -894,6 +889,10 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF } try { + logFileHashStore.debug( + "FileHashStore.storeMetadata - .putMetadata() request for pid: " + pid + + ". formatId: " + checkedFormatId + ); // Store metadata String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( @@ -924,7 +923,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF + " and formatId " + checkedFormatId ); metadataLockedIds.remove(pidFormatId); - metadataLockedIds.notify(); + metadataLockedIds.notifyAll();; } } } @@ -1257,7 +1256,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + pid); referenceLockedCids.remove(pid); - referenceLockedCids.notify(); + referenceLockedCids.notifyAll();; } } } @@ -1931,7 +1930,7 @@ protected void deleteObjectByCid(String cid) "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid); referenceLockedCids.remove(cid); - referenceLockedCids.notify(); + referenceLockedCids.notifyAll();; } } } From 0bc5c062f13daaaf6ce91046bfe78301c7ea2861 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 14:09:31 -0700 Subject: [PATCH 259/553] Fix bug in sychronization with incorrect value (used pid instead of cid) --- .../filehashstore/FileHashStore.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index fc2d429e..bdb79231 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -484,6 +484,7 @@ private ObjectMetadata syncPutObject( ); // Tag object String cid = objInfo.getCid(); + objInfo.setPid(pid); tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid @@ -616,14 +617,14 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(pid)) { + while (referenceLockedCids.contains(cid)) { try { referenceLockedCids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" - + " waiting to tag pid: " + pid + " and cid: " + cid + + " waiting to tag pid: " + pid + " with cid: " + cid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); @@ -632,7 +633,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi logFileHashStore.debug( "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid ); - referenceLockedCids.add(pid); + referenceLockedCids.add(cid); } try { @@ -725,8 +726,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi synchronized (referenceLockedCids) { logFileHashStore.debug( "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid - ); - referenceLockedCids.remove(pid); + + " with cid: " + cid); + referenceLockedCids.remove(cid); referenceLockedCids.notifyAll();; } } @@ -1193,7 +1194,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(pid)) { + while (referenceLockedCids.contains(cid)) { try { referenceLockedCids.wait(TIME_OUT_MILLISEC); @@ -1208,8 +1209,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } logFileHashStore.debug( "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " - + pid); - referenceLockedCids.add(pid); + + pid + " with cid: " + cid); + referenceLockedCids.add(cid); } try { @@ -1254,8 +1255,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti synchronized (referenceLockedCids) { logFileHashStore.debug( "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " - + pid); - referenceLockedCids.remove(pid); + + pid + " with cid: " + cid); + referenceLockedCids.remove(cid); referenceLockedCids.notifyAll();; } } From 702f90fb159f9664c607e33482d4f869668765c0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 13 May 2024 14:51:16 -0700 Subject: [PATCH 260/553] Replace usage of 'notifyAll' to 'notify' as we have implemented mutually exclusive locking, 'notifyAll' is unnecessary --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bdb79231..1be8d077 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -525,7 +525,7 @@ private ObjectMetadata syncPutObject( "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid ); objectLockedIds.remove(pid); - objectLockedIds.notifyAll();; + objectLockedIds.notify(); } } } @@ -728,7 +728,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid + " with cid: " + cid); referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll();; + referenceLockedCids.notify(); } } } @@ -924,7 +924,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF + " and formatId " + checkedFormatId ); metadataLockedIds.remove(pidFormatId); - metadataLockedIds.notifyAll();; + metadataLockedIds.notify(); } } } @@ -1257,7 +1257,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + pid + " with cid: " + cid); referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll();; + referenceLockedCids.notify(); } } } @@ -1931,7 +1931,7 @@ protected void deleteObjectByCid(String cid) "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid); referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll();; + referenceLockedCids.notify(); } } } From fa174898d033256ebb6516e094b48349c5f7ef5a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 14 May 2024 13:39:02 -0700 Subject: [PATCH 261/553] Add missing synchronization in 'deleteObject' on pid, and when accessing cid reference file --- .../filehashstore/FileHashStore.java | 346 +++++++++++------- 1 file changed, 207 insertions(+), 139 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1be8d077..1a5fed31 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -484,12 +484,12 @@ private ObjectMetadata syncPutObject( ); // Tag object String cid = objInfo.getCid(); - objInfo.setPid(pid); tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid + ". Permanent address: " + getExpectedPath(pid, "object", null) ); + objInfo.setPid(pid); return objInfo; } catch (NoSuchAlgorithmException nsae) { @@ -1098,166 +1098,234 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti String pid = id; List deleteList = new ArrayList<>(); - // Get list of metadata documents first, these will always be deleted if they exist - // and reduces the time spent in the synchronization block - // Metadata directory - String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest - ); - Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); - // Add all metadata doc paths to a List to iterate over below - List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( - expectedPidMetadataDirectory - ); - - // Before we begin deleting files, we handle orphaned files scenarios - try { - // Begin by looking for the cid and confirming state - // If there is an issue with finding an object (ex. orphaned reference files), - // custom exceptions will be thrown and handled in the catch blocks - cid = findObject(id); - - } catch (OrphanPidRefsFileException oprfe) { - // `findObject` throws this exception when the cid refs file doesn't exist, - // so we only need to delete the pid refs file and related metadata documents - - // Begin by renaming pid refs file for deletion - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + // Store and delete is synchronized together + synchronized (objectLockedIds) { + if (objectLockedIds.contains(pid)) { + String errMsg = + "FileHashStore.syncPutObject - Duplicate object request encountered for " + + "pid: " + + pid + ". Already in progress."; + logFileHashStore.warn(errMsg); + throw new RuntimeException(errMsg); } + logFileHashStore.debug( + "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid); + objectLockedIds.add(pid); + } - FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid - + ". Deleted orphan pid refs file and metadata."; - logFileHashStore.warn(warnMsg); - return; + try { + // Get list of metadata documents first, these will always be deleted if they exist + // and reduces the time spent in the synchronization block + // Metadata directory + String pidHexDigest = + FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + pidHexDigest); + Path expectedPidMetadataDirectory = + METADATA_STORE_DIRECTORY.resolve(pidRelativePath); + // Add all metadata doc paths to a List to iterate over below + List metadataDocPaths = + FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); + + // Before we begin deleting files, we handle orphaned files scenarios + try { + // Begin by looking for the cid and confirming state + // If there is an issue with finding an object (ex. orphaned reference files), + // custom exceptions will be thrown and handled in the catch blocks + cid = findObject(id); + + } catch (OrphanPidRefsFileException oprfe) { + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file and related metadata documents + + // Begin by renaming pid refs file for deletion + Path absPidRefsPath = + getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } - } catch (OrphanRefsFilesException orfe) { - // `findObject` throws this exception when the pid and cid refs file exists, - // but the actual object being referenced by the pid does not exist + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = + "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file and metadata."; + logFileHashStore.warn(warnMsg); + return; - // Rename pid refs file for deletion - Path absPidRefsPath = getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); - String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (OrphanRefsFilesException orfe) { + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + + // Rename pid refs file for deletion + Path absPidRefsPath = + getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); + // Get the cid from the pid refs file before renaming it for deletion + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + // Remove the pid from the cid refs file + Path absCidRefsPath = + getExpectedPath(cidRead, "refs", HashStoreIdTypes.cid.getName()); + // Since we must access the cid reference file, this must be synchronized + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cidRead)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + + pid + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " + + "pid: " + + pid + " with cid: " + cidRead); + referenceLockedCids.add(cidRead); + } - // Remove the pid from the cid refs file - Path absCidRefsPath = getExpectedPath( - cidRead, "refs", HashStoreIdTypes.cid.getName() - ); - updateRefsFile(pid, absCidRefsPath, "remove"); + try { + updateRefsFile(pid, absCidRefsPath, "remove"); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for " + + "pid: " + + pid + " with cid: " + cidRead); + referenceLockedCids.remove(cidRead); + referenceLockedCids.notify(); + } + } - // Add the cid reference file to deleteList if it's now empty - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } + // Add the cid reference file to deleteList if it's now empty + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead - + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted pid and cid ref files and metadata."; - logFileHashStore.warn(warnMsg); - return; + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } - } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // `findObject` throws this exception when both the pid and cid refs file exists - // but the pid is not found in the cid refs file. + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files and metadata."; + logFileHashStore.warn(warnMsg); + return; - // Rename pid refs file for deletion - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } + // Rename pid refs file for deletion + Path absPidRefsPath = + getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = - "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " - + pid + ". Deleted orphan pid refs file and metadata."; - logFileHashStore.warn(warnMsg); - return; - } + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = + "FileHashStore.deleteObject - Pid not found in expected cid refs file for" + + " pid: " + + pid + ". Deleted orphan pid refs file and metadata."; + logFileHashStore.warn(warnMsg); + return; + } - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " + + pid + " with cid: " + cid); + referenceLockedCids.add(cid); } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " - + pid + " with cid: " + cid); - referenceLockedCids.add(cid); - } - try { - // Proceed with comprehensive deletion - cid exists, nothing out of place - // Get all the required paths to streamline deletion process - // Permanent address of the object - Path objRealPath = getExpectedPath(pid, "object", null); - // Cid refs file - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - // Pid refs file - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } + try { + // Proceed with comprehensive deletion - cid exists, nothing out of place + // Get all the required paths to streamline deletion process + // Permanent address of the object + Path objRealPath = getExpectedPath(pid, "object", null); + // Cid refs file + Path absCidRefsPath = + getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + // Pid refs file + Path absPidRefsPath = + getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } - // Rename pid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file - updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Rename empty cid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - // Rename actual object for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); - } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object deletion."; - logFileHashStore.warn(warnMsg); + // Rename pid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete obj and cid refs file only if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Rename empty cid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + // Rename actual object for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + } else { + String warnMsg = + "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); + } + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + + pid + " with cid: " + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); + } } - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); - } finally { // Release lock - synchronized (referenceLockedCids) { + synchronized (objectLockedIds) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " - + pid + " with cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); + "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid); + objectLockedIds.remove(pid); + objectLockedIds.notify(); } } } From 321a44e0b01b9158ae3ed8aeefc27d8f5e8759cc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 14 May 2024 13:47:49 -0700 Subject: [PATCH 262/553] Refactor 'deleteObjectByCid' --- .../filehashstore/FileHashStore.java | 84 +++++++++---------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1a5fed31..22411e2f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1207,7 +1207,6 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } } - // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); @@ -1948,59 +1947,58 @@ protected void move(File source, File target, String entity) throws IOException, */ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException, InterruptedException { + logFileHashStore.debug("FileHashStore - deleteObjectByCid: called to delete cid: " + cid); + // Get expected path of the cid refs file Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - if (Files.exists(absCidRefsPath)) { - // The cid refs file exists, so the cid object cannot be deleted. - String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" - + " references, skipping deletion."; - logFileHashStore.warn(warnMsg); - - } else { - // Get permanent address of the actual cid - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid - ); - Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + // Get permanent address of the actual cid + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); + Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - // Minimize the amount of time the cid is locked - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); + // Minimize the amount of time the cid is locked + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " - + "interrupted while waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " + + "interrupted while waiting to delete object with cid: " + cid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } - logFileHashStore.debug( - "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " - + cid); - referenceLockedCids.add(cid); } + logFileHashStore.debug( + "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " + + cid); + referenceLockedCids.add(cid); + } - try { + try { + if (Files.exists(absCidRefsPath)) { + // The cid refs file exists, so the cid object cannot be deleted. + String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" + + " references, skipping deletion."; + logFileHashStore.warn(warnMsg); + } else { // If file exists, delete it. if (Files.exists(expectedRealPath)) { Files.delete(expectedRealPath); } - String debugMsg = "FileHashStore - deleteObjectByCid: object deleted at" - + expectedRealPath; + String debugMsg = + "FileHashStore - deleteObjectByCid: object deleted at" + expectedRealPath; logFileHashStore.debug(debugMsg); - - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " - + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } + } + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObjectByCid - Releasing referenceLockedCids for cid: " + + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); } } } From e397cbb006e6e9f32da2688443e86ccd839a44c7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 14 May 2024 13:49:03 -0700 Subject: [PATCH 263/553] Remove unused import statement --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 22411e2f..2473686e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -24,7 +24,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Properties; import com.fasterxml.jackson.databind.ObjectMapper; From 3f3f828f820bdecd9a2bd1a118c63c72ce7c57f2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 14 May 2024 16:21:00 -0700 Subject: [PATCH 264/553] Fix inaccurate debug statements and revise sync process in 'OrphanRefsFileException' scenario in 'deleteObject' --- .../filehashstore/FileHashStore.java | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2473686e..df77b692 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1101,14 +1101,14 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti synchronized (objectLockedIds) { if (objectLockedIds.contains(pid)) { String errMsg = - "FileHashStore.syncPutObject - Duplicate object request encountered for " + "FileHashStore.deleteObject - Duplicate object request encountered for " + "pid: " + pid + ". Already in progress."; logFileHashStore.warn(errMsg); throw new RuntimeException(errMsg); } logFileHashStore.debug( - "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid); + "FileHashStore.deleteObject - Synchronizing objectLockedIds for pid: " + pid); objectLockedIds.add(pid); } @@ -1193,6 +1193,23 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti try { updateRefsFile(pid, absCidRefsPath, "remove"); + // Add the cid reference file to deleteList if it's now empty + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } + + // Rename metadata documents for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files and metadata."; + logFileHashStore.warn(warnMsg); + return; } finally { // Release lock @@ -1205,25 +1222,6 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti referenceLockedCids.notify(); } } - - // Add the cid reference file to deleteList if it's now empty - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead - + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted pid and cid ref files and metadata."; - logFileHashStore.warn(warnMsg); - return; - } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file. @@ -1321,7 +1319,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Release lock synchronized (objectLockedIds) { logFileHashStore.debug( - "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid); + "FileHashStore.deleteObject - Releasing objectLockedIds for pid: " + pid); objectLockedIds.remove(pid); objectLockedIds.notify(); } From 09b6549b81ccbcd56fcb17a2f4d03c7ea71e7382 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 15 May 2024 11:05:42 -0700 Subject: [PATCH 265/553] Add additional comments in javadoc for 'TestDataHarness' --- .../java/org/dataone/hashstore/testdata/TestDataHarness.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java index e523d40a..a106401a 100644 --- a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java +++ b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java @@ -13,7 +13,8 @@ * - "object_cid" is the SHA-256 hash of the pid * - algorithms without any prefixes are the algorithm hash of the pid's respective data object * content - * - "metadata_sha256" is the hash of the pid's respective metadata object content + * - "metadata_sha256" is the hash of the pid's respective metadata object content identifier + * - "metacat_cid" is the sha256 hash of the pid + formatId * */ public class TestDataHarness { From e53f7820899c5723b2be1d851bd8ac36296fef66 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 15 May 2024 12:59:34 -0700 Subject: [PATCH 266/553] Refactor 'deleteObject(String, String)' to improve clarity --- .../filehashstore/FileHashStore.java | 200 +++++++++--------- 1 file changed, 104 insertions(+), 96 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index df77b692..e9327afe 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -615,6 +615,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); + // tagObject is synchronized with deleteObject based on a `cid` synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -1097,15 +1098,21 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti String pid = id; List deleteList = new ArrayList<>(); - // Store and delete is synchronized together + // Storing and deleting objects are synchronized together + // Duplicate store object requests for a pid are rejected, but deleting an object + // will wait for a pid to be released if it's found to be in use before proceeding. synchronized (objectLockedIds) { - if (objectLockedIds.contains(pid)) { - String errMsg = - "FileHashStore.deleteObject - Duplicate object request encountered for " - + "pid: " - + pid + ". Already in progress."; - logFileHashStore.warn(errMsg); - throw new RuntimeException(errMsg); + while (objectLockedIds.contains(pid)) { + try { + objectLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - Delete request for pid: " + pid + + " has been interrupted."; + logFileHashStore.warn(errMsg); + throw new InterruptedException(errMsg); + } } logFileHashStore.debug( "FileHashStore.deleteObject - Synchronizing objectLockedIds for pid: " + pid); @@ -1114,7 +1121,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti try { // Get list of metadata documents first, these will always be deleted if they exist - // and reduces the time spent in the synchronization block + // and reduces the time spent in the synchronization block. We have locked the `pid` + // so we are safe to proceed with working on `pid` related metadata documents. // Metadata directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); @@ -1127,13 +1135,86 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti List metadataDocPaths = FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); - // Before we begin deleting files, we handle orphaned files scenarios try { - // Begin by looking for the cid and confirming state - // If there is an issue with finding an object (ex. orphaned reference files), - // custom exceptions will be thrown and handled in the catch blocks + // Before we begin deletion process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. cid = findObject(id); + // If no exceptions are thrown, we proceed to synchronization based on the `cid` + // Multiple threads may access the cid reference file (which contains a list of + // `pid`s that reference a `cid`) and this needs to be coordinated. Otherwise, + // we will run into a `OverlappingFileLockException` + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " + + pid + " with cid: " + cid); + referenceLockedCids.add(cid); + } + + try { + // Proceed with comprehensive deletion - cid exists, nothing out of place + // Get all the required paths to streamline deletion process + // Permanent address of the object + Path objRealPath = getExpectedPath(pid, "object", null); + // Cid refs file + Path absCidRefsPath = + getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + // Pid refs file + Path absPidRefsPath = + getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + + // Rename metadata documents to prepare for deletion + for (Path metadataDoc : metadataDocPaths) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } + + // Rename pid refs file to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete obj and cid refs file **only** if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Rename empty cid refs file to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + // Rename actual object to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + } else { + String warnMsg = + "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); + } + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + + pid + " with cid: " + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); + } + } + } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file and related metadata documents @@ -1153,23 +1234,17 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); - return; } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when the pid and cid refs file exists, // but the actual object being referenced by the pid does not exist - // Rename pid refs file for deletion + // Get the cid from the pid refs file before renaming it for deletion Path absPidRefsPath = getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); - // Get the cid from the pid refs file before renaming it for deletion String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove the pid from the cid refs file - Path absCidRefsPath = - getExpectedPath(cidRead, "refs", HashStoreIdTypes.cid.getName()); - // Since we must access the cid reference file, this must be synchronized + // Since we must access the cid reference file, the `cid` must be synchronized synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cidRead)) { try { @@ -1186,12 +1261,17 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } logFileHashStore.debug( "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " - + "pid: " - + pid + " with cid: " + cidRead); + + "pid: " + pid + " with cid: " + cidRead); referenceLockedCids.add(cidRead); } try { + // Rename pid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + // Remove the pid from the cid refs file + Path absCidRefsPath = + getExpectedPath(cidRead, "refs", HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { @@ -1209,7 +1289,6 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); - return; } finally { // Release lock @@ -1243,80 +1322,9 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti + " pid: " + pid + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); - return; - } - - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " - + pid + " with cid: " + cid); - referenceLockedCids.add(cid); - } - - try { - // Proceed with comprehensive deletion - cid exists, nothing out of place - // Get all the required paths to streamline deletion process - // Permanent address of the object - Path objRealPath = getExpectedPath(pid, "object", null); - // Cid refs file - Path absCidRefsPath = - getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - // Pid refs file - Path absPidRefsPath = - getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - - // Rename pid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file - updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Rename empty cid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - // Rename actual object for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); - } else { - String warnMsg = - "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object deletion."; - logFileHashStore.warn(warnMsg); - } - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath); - - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " - + pid + " with cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } } } finally { - // Release lock + // Release lock on the pid synchronized (objectLockedIds) { logFileHashStore.debug( "FileHashStore.deleteObject - Releasing objectLockedIds for pid: " + pid); From 5e8b98ef7fd6cd032ddc655f3c949f5d97f8a01d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 15 May 2024 14:14:05 -0700 Subject: [PATCH 267/553] Apply formatting to 'deleteObject' method --- .../filehashstore/FileHashStore.java | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e9327afe..089b7935 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1073,8 +1073,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType - + ")" - ); + + ")"); // Validate input parameters FileHashStoreUtility.ensureNotNull(id, "id", "deleteObject"); FileHashStoreUtility.checkForEmptyString(id, "id", "deleteObject"); @@ -1153,14 +1152,15 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } catch (InterruptedException ie) { String errMsg = "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); + + "interrupted while waiting to delete objects for pid: " + + pid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for pid: " + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cid); referenceLockedCids.add(cid); } @@ -1189,13 +1189,15 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Delete obj and cid refs file **only** if the cid refs file is empty if (Files.size(absCidRefsPath) == 0) { // Rename empty cid refs file to prepare for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + deleteList.add( + FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); // Rename actual object to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); } else { String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object deletion."; + + " is not empty (refs exist for cid). Skipping object " + + "deletion."; logFileHashStore.warn(warnMsg); } // Delete all related/relevant items with the least amount of delay @@ -1208,7 +1210,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for pid: " + "FileHashStore.deleteObject - Releasing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cid); referenceLockedCids.remove(cid); referenceLockedCids.notify(); @@ -1275,7 +1278,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti updateRefsFile(pid, absCidRefsPath, "remove"); // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + deleteList.add( + FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } // Rename metadata documents for deletion @@ -1286,8 +1290,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Delete items FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead - + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted pid and cid ref files and metadata."; + + " does not exist, but pid and cid reference file found for pid: " + + pid + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); } finally { @@ -1295,8 +1299,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti synchronized (referenceLockedCids) { logFileHashStore.debug( "FileHashStore.deleteObject - Releasing referenceLockedCids for " - + "pid: " - + pid + " with cid: " + cidRead); + + "pid: " + pid + " with cid: " + cidRead); referenceLockedCids.remove(cidRead); referenceLockedCids.notify(); } @@ -1319,8 +1322,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.deleteObject - Pid not found in expected cid refs file for" - + " pid: " - + pid + ". Deleted orphan pid refs file and metadata."; + + " pid: " + pid + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); } } finally { From 1a1f75f6bb653b42ca90f9d6b12b4aa6727e9a18 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 20 May 2024 16:43:28 -0700 Subject: [PATCH 268/553] Fix bug when creating cid refs file where hashed value was used instead of the cid (content identifier) --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 089b7935..4bdea623 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2341,9 +2341,9 @@ protected Path getExpectedPath(String abId, String entity, String formatId) ); realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { - // `hashId` here is the cid refs file string to split + // If refs type is 'cid', use the abId directly provided String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId + DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId ); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } else { From 67f7862f0f3ad47c9ed0df2a8b417e01f298caa6 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 21 May 2024 11:11:29 -0700 Subject: [PATCH 269/553] Add new junit tests for 'getExpectedPath' method in FileHashStore --- .../FileHashStoreProtectedTest.java | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index a1ecd963..674d8159 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.fail; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -968,6 +969,9 @@ public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { } } + /** + * Confirm getExpectedPath returns a file path that exists + */ @Test public void getExpectedPath() throws Exception { // Get single test file to "upload" @@ -985,4 +989,166 @@ public void getExpectedPath() throws Exception { assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); } + + /** + * Confirm getExpectedPath returns correct object path + */ + @Test + public void getExpectedPath_objectPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, cid + ); + Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); + + Path expectedObjCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + + assertEquals(expectedObjCidAbsPath, calculatedObjRealPath); + } + } + + /** + * Confirm getExpectedPath returns correct metadata path + */ + @Test + public void getExpectedPath_metadataPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + String storeFormatId = fhsProperties.getProperty("storeMetadataNamespace"); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + + // Document ID + String hashId = FileHashStoreUtility.getPidHexDigest(pid + storeFormatId, storeAlgo); + + // Metadata directory of the given pid + String metadataPidDirId = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String metadataPidDirIdSharded = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, metadataPidDirId + ); + + // Complete path + Path calculatedMetadataRealPath = + storePath.resolve("metadata").resolve(metadataPidDirIdSharded).resolve(hashId); + + Path expectedMetadataPidPath = fileHashStore.getExpectedPath( + pid, "metadata", storeFormatId + ); + + assertEquals(expectedMetadataPidPath, calculatedMetadataRealPath); + } + } + + /** + * Confirm getExpectedPath returns correct pid refs path + */ + @Test + public void getExpectedPath_pidRefsPaths() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + + // Pid refs file + String metadataPidHash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String metadataPidHashSharded = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, metadataPidHash + ); + Path calculatedPidRefsRealPath = storePath.resolve("refs/pid").resolve(metadataPidHashSharded); + + Path expectedPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + + assertEquals(expectedPidRefsPath, calculatedPidRefsRealPath); + } + } + + /** + * Confirm getExpectedPath returns correct cid refs path + */ + @Test + public void getExpectedPath_cidRefsPaths() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + + // Cid refs file + String objShardString = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, cid + ); + Path calculatedCidRefsRealPath = storePath.resolve("refs/cid").resolve(objShardString); + + Path expectedCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + + assertEquals(expectedCidRefsPath, calculatedCidRefsRealPath); + } + } + + /** + * Confirm getExpectedPath throws exception when requesting the path to a refs file + * with a formatId arg that is not "cid" or "pid" + */ + @Test + public void getExpectedPath_incorrectRefsFormatId() { + assertThrows(IllegalArgumentException.class, () -> { + String cid = "testcid"; + fileHashStore.getExpectedPath(cid, "refs", "not_cid_or_pid"); + }); + } + + /** + * Confirm getExpectedPath throws exception when requesting path for an object + * that does not exist + */ + @Test + public void getExpectedPath_fileNotFound() { + assertThrows(FileNotFoundException.class, () -> { + String pid = "dou.test.1"; + fileHashStore.getExpectedPath(pid, "object", null); + }); + } } From 898920cc76102da738912c20157c38c634824706 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 09:17:59 -0700 Subject: [PATCH 270/553] Add new class 'HashStoreServiceRequest' --- .../hashstore/HashStoreServiceRequest.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java diff --git a/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java b/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java new file mode 100644 index 00000000..3c99d605 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java @@ -0,0 +1,30 @@ +package org.dataone.hashstore; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; + +/** + * A HashStoreServiceRequest represents the data needed for a single request to HashStore + * packaged as a Runnable task that can be executed within a thread pool, typically + * provided by the Executor service. + */ +public class HashStoreServiceRequest { + public static final int storeObject = 1; + public static final int deleteObject = 2; + private HashStore hashstore = null; + private int publicAPIMethod; + + private static final Log logHssr = LogFactory.getLog(HashStoreServiceRequest.class); + + protected HashStoreServiceRequest(HashStore hashstore, int publicAPIMethod) { + FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", + "HashStoreServiceRequestConstructor"); + this.hashstore = hashstore; + this.publicAPIMethod = publicAPIMethod; + } + + public void run() { + logHssr.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); + } +} From 0ffc8a6e51e8ff07b444cdf78fe9f9ea1359884f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 09:33:00 -0700 Subject: [PATCH 271/553] Add 'storeObject' and 'deleteObject' switch case options in HashStoreServiceRequest.run() method --- .../hashstore/HashStoreServiceRequest.java | 28 ++++++++++++++++++- .../exceptions/HashStoreServiceException.java | 10 +++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java diff --git a/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java b/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java index 3c99d605..c6a32723 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java +++ b/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java @@ -2,8 +2,11 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dataone.hashstore.exceptions.HashStoreServiceException; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; +import java.io.InputStream; + /** * A HashStoreServiceRequest represents the data needed for a single request to HashStore * packaged as a Runnable task that can be executed within a thread pool, typically @@ -14,17 +17,40 @@ public class HashStoreServiceRequest { public static final int deleteObject = 2; private HashStore hashstore = null; private int publicAPIMethod; + private String pid; + private InputStream objStream; private static final Log logHssr = LogFactory.getLog(HashStoreServiceRequest.class); - protected HashStoreServiceRequest(HashStore hashstore, int publicAPIMethod) { + protected HashStoreServiceRequest(HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", "HashStoreServiceRequestConstructor"); + FileHashStoreUtility.checkNotNegativeOrZero(publicAPIMethod, "HashStoreServiceRequestConstructor"); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; + this.objStream = objStream; + this.pid = pid; } public void run() { logHssr.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); + try { + switch (publicAPIMethod) { + case storeObject: + try { + hashstore.storeObject(objStream, pid, null, null, null, -1); + } catch (Exception e) { + throw new HashStoreServiceException(e.getMessage()); + } + case deleteObject: + try { + hashstore.deleteObject("pid", pid); + } catch (Exception e) { + throw new HashStoreServiceException(e.getMessage()); + } + } + } catch (HashStoreServiceException hse) { + logHssr.error("HashStoreServiceRequest - Error: " + hse.getMessage()); + } } } diff --git a/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java b/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java new file mode 100644 index 00000000..8225869e --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java @@ -0,0 +1,10 @@ +package org.dataone.hashstore.exceptions; + +/** + * An exception that encapsulates errors from the HashStore service + */ +public class HashStoreServiceException extends Exception { + public HashStoreServiceException(String message) { + super(message); + } +} \ No newline at end of file From e376f5d581dbd3849769533e81bd8a1bf808223d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 09:40:12 -0700 Subject: [PATCH 272/553] Add new 'hsr' option to test hashstore service in 'HashStoreClient' --- src/main/java/org/dataone/hashstore/HashStoreClient.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 7778eaf5..5a7be6b9 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -246,6 +246,11 @@ public static void main(String[] args) throws Exception { "Metadata for pid (" + pid + ") and namespace (" + formatId + ") has been deleted." ); + } else if (cmd.hasOption("hsr")) { + String pid = cmd.getOptionValue("pid"); + Path path = Paths.get(cmd.getOptionValue("path")); + InputStream dataStream = Files.newInputStream(path); + HashStoreServiceRequest request = new HashStoreServiceRequest(hashStore, 1, dataStream, pid); } else { System.out.println("HashStoreClient - No options found, use -h for help."); } @@ -353,6 +358,7 @@ private static Options addHashStoreClientOptions() { options.addOption( "dfs", "delfromhs", false, "(knbvm) Test flag to delete objs from a HashStore" ); + options.addOption("hsr", "hsservicerequest", false, "Dev option to test threading."); return options; } From 752fbe379c769a555c620c937c08aa637419faea Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 13:57:30 -0700 Subject: [PATCH 273/553] Remove 'hsr' option from 'HashStoreClient' class, rename 'HashStoreServiceRequest' class to 'HashStoreRunnable' and add new junit test in 'FileHashStoreInterface' class --- .../dataone/hashstore/HashStoreClient.java | 5 --- .../dataone/hashstore/HashStoreRunnable.java} | 11 +++-- .../FileHashStoreInterfaceTest.java | 41 +++++++++++++++++++ 3 files changed, 48 insertions(+), 9 deletions(-) rename src/{main/java/org/dataone/hashstore/HashStoreServiceRequest.java => test/java/org/dataone/hashstore/HashStoreRunnable.java} (85%) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 5a7be6b9..65d97359 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -246,11 +246,6 @@ public static void main(String[] args) throws Exception { "Metadata for pid (" + pid + ") and namespace (" + formatId + ") has been deleted." ); - } else if (cmd.hasOption("hsr")) { - String pid = cmd.getOptionValue("pid"); - Path path = Paths.get(cmd.getOptionValue("path")); - InputStream dataStream = Files.newInputStream(path); - HashStoreServiceRequest request = new HashStoreServiceRequest(hashStore, 1, dataStream, pid); } else { System.out.println("HashStoreClient - No options found, use -h for help."); } diff --git a/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java similarity index 85% rename from src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java rename to src/test/java/org/dataone/hashstore/HashStoreRunnable.java index c6a32723..f161b636 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreServiceRequest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -8,11 +8,11 @@ import java.io.InputStream; /** - * A HashStoreServiceRequest represents the data needed for a single request to HashStore + * A HashStoreRunnable represents the data needed for a single request to HashStore * packaged as a Runnable task that can be executed within a thread pool, typically * provided by the Executor service. */ -public class HashStoreServiceRequest { +public class HashStoreRunnable implements Runnable { public static final int storeObject = 1; public static final int deleteObject = 2; private HashStore hashstore = null; @@ -20,9 +20,10 @@ public class HashStoreServiceRequest { private String pid; private InputStream objStream; - private static final Log logHssr = LogFactory.getLog(HashStoreServiceRequest.class); + private static final Log logHssr = LogFactory.getLog(HashStoreRunnable.class); - protected HashStoreServiceRequest(HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { + public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, + String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", "HashStoreServiceRequestConstructor"); FileHashStoreUtility.checkNotNegativeOrZero(publicAPIMethod, "HashStoreServiceRequestConstructor"); @@ -42,12 +43,14 @@ public void run() { } catch (Exception e) { throw new HashStoreServiceException(e.getMessage()); } + break; case deleteObject: try { hashstore.deleteObject("pid", pid); } catch (Exception e) { throw new HashStoreServiceException(e.getMessage()); } + break; } } catch (HashStoreServiceException hse) { logHssr.error("HashStoreServiceRequest - Error: " + hse.getMessage()); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index aad63f56..935361a5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -19,6 +19,7 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutorService; @@ -29,6 +30,7 @@ import javax.xml.bind.DatatypeConverter; +import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; @@ -711,6 +713,45 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { executorService.awaitTermination(1, TimeUnit.MINUTES); } + /** + * Test storeObject synchronization using a Runnable class + */ + @Test + public void storeObject_50duplicateObjects_viaRunnable() throws Exception { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + + ExecutorService executorService = Executors.newFixedThreadPool(5); + + for (int i = 1; i <= 50; i++) { + String pidAdjusted = pid + ".dou.test." + i; + InputStream dataStream = Files.newInputStream(testDataFile); + HashStoreRunnable + request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); + executorService.execute(request); + } + + executorService.shutdown(); + executorService.awaitTermination(1, TimeUnit.MINUTES); + + // Check cid refs file that every pid is found + String cidSha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + Path cidRefsFilePath = fileHashStore.getExpectedPath(cidSha256DigestFromTestData, "refs", "cid"); + for (int i = 1; i <= 50; i++) { + String pidAdjusted = pid + ".dou.test." + i; + boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( + pidAdjusted, cidRefsFilePath + ); + assertTrue(pidFoundInCidRefFiles); + } + + // Confirm that 50 pid refs file exists + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/pid")); + assertEquals(50, pidRefFiles.size()); + } + /** * Test storeMetadata stores metadata as expected */ From 2e71f8f1dbc3ee24f05a5fd4bcddf5e858f4cef5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 13:57:57 -0700 Subject: [PATCH 274/553] Revise exception log message in 'writeToTmpFileAndGenerateChecksums' method in 'FileHashStore' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4bdea623..f2e459e5 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1822,8 +1822,8 @@ protected Map writeToTmpFileAndGenerateChecksums( } catch (IOException ioe) { String errMsg = - "FileHashStore.writeToTmpFileAndGenerateChecksums - Unexpected Exception: " + ioe - .fillInStackTrace(); + "FileHashStore.writeToTmpFileAndGenerateChecksums - Unexpected Exception ~ " + ioe + .getClass().getName() + ": " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; From d7d79ca0815573a19597cb7256a10411c0b16a5b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 14:10:36 -0700 Subject: [PATCH 275/553] Refactor newly added junit test 'storeObject_50Pids_1Obj_viaRunnable' --- .../FileHashStoreInterfaceTest.java | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 935361a5..f46b180d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -19,9 +19,12 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -717,15 +720,19 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { * Test storeObject synchronization using a Runnable class */ @Test - public void storeObject_50duplicateObjects_viaRunnable() throws Exception { + public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { // Get single test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); + List pidModifiedList = new ArrayList<>(); + for (int i = 1; i <= 50; i++) { + pidModifiedList.add(pid + ".dou.test." + i); + } + ExecutorService executorService = Executors.newFixedThreadPool(5); - for (int i = 1; i <= 50; i++) { - String pidAdjusted = pid + ".dou.test." + i; + for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); HashStoreRunnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); @@ -738,13 +745,16 @@ public void storeObject_50duplicateObjects_viaRunnable() throws Exception { // Check cid refs file that every pid is found String cidSha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); Path cidRefsFilePath = fileHashStore.getExpectedPath(cidSha256DigestFromTestData, "refs", "cid"); - for (int i = 1; i <= 50; i++) { - String pidAdjusted = pid + ".dou.test." + i; - boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( - pidAdjusted, cidRefsFilePath - ); - assertTrue(pidFoundInCidRefFiles); + Set stringSet = new HashSet<>(pidModifiedList); + List lines = Files.readAllLines(cidRefsFilePath); + boolean allFoundPidsFound = true; + for (String line : lines) { + if (!stringSet.contains(line)) { + allFoundPidsFound = false; + break; + } } + assertTrue(allFoundPidsFound); // Confirm that 50 pid refs file exists Path storePath = Paths.get(fhsProperties.getProperty("storePath")); From fe1621aee86e204b35215b399dfa1fd358d9f082 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 14:21:34 -0700 Subject: [PATCH 276/553] Add new junit test 'deleteObject_50Pids_1Obj_viaRunnable' --- .../dataone/hashstore/HashStoreRunnable.java | 9 ++++ .../FileHashStoreInterfaceTest.java | 46 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index f161b636..f4c02e3a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -33,6 +33,15 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream o this.pid = pid; } + public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { + FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", + "HashStoreServiceRequestConstructor"); + FileHashStoreUtility.checkNotNegativeOrZero(publicAPIMethod, "HashStoreServiceRequestConstructor"); + this.hashstore = hashstore; + this.publicAPIMethod = publicAPIMethod; + this.pid = pid; + } + public void run() { logHssr.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); try { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index f46b180d..430d2f4f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1672,6 +1672,52 @@ public void deleteObject_Cid_AndCidRefsExists() throws Exception { } } + /** + * Test deleteObject synchronization using a Runnable class + */ + @Test + public void deleteObject_50Pids_1Obj_viaRunnable() throws Exception { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + + List pidModifiedList = new ArrayList<>(); + for (int i = 1; i <= 50; i++) { + pidModifiedList.add(pid + ".dou.test." + i); + } + + ExecutorService executorService = Executors.newFixedThreadPool(5); + + // Store 50 + for (String pidAdjusted : pidModifiedList) { + InputStream dataStream = Files.newInputStream(testDataFile); + HashStoreRunnable + request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); + executorService.execute(request); + } + // Delete 50 + for (String pidAdjusted : pidModifiedList) { + InputStream dataStream = Files.newInputStream(testDataFile); + HashStoreRunnable + request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); + executorService.execute(request); + } + + executorService.shutdown(); + executorService.awaitTermination(1, TimeUnit.MINUTES); + + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + // Check that no objects exist + List objects = FileHashStoreUtility.getFilesFromDir(storePath.resolve("objects")); + assertEquals(0, objects.size()); + // Check that no refs files exist + List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/pid")); + assertEquals(0, pidRefFiles.size()); + List cidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/cid")); + assertEquals(0, pidRefFiles.size()); + } + + /** * Confirm that deleteMetadata deletes metadata and empty sub directories */ From 63828e57dc6f0e25db0b2cc8740cbdb7b561c04b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 10 Jun 2024 14:28:28 -0700 Subject: [PATCH 277/553] Remove unused variable and fix incorrect assert statement in 'FileHashStoreInterfaceTest' class --- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 430d2f4f..b019b372 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1697,7 +1697,6 @@ public void deleteObject_50Pids_1Obj_viaRunnable() throws Exception { } // Delete 50 for (String pidAdjusted : pidModifiedList) { - InputStream dataStream = Files.newInputStream(testDataFile); HashStoreRunnable request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); executorService.execute(request); @@ -1714,7 +1713,7 @@ public void deleteObject_50Pids_1Obj_viaRunnable() throws Exception { List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/pid")); assertEquals(0, pidRefFiles.size()); List cidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/cid")); - assertEquals(0, pidRefFiles.size()); + assertEquals(0, cidRefFiles.size()); } From 06808092b16f01bcd94e72419eacf96aac7896cd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 11 Jun 2024 13:36:51 -0700 Subject: [PATCH 278/553] Rename 'refs' subdirectories from 'pid' to 'pids' and 'cid' to 'cids', and update junit tests --- .../filehashstore/FileHashStore.java | 4 ++-- .../FileHashStoreInterfaceTest.java | 9 ++++++--- .../FileHashStoreProtectedTest.java | 5 +++-- .../FileHashStorePublicTest.java | 4 ++-- .../FileHashStoreReferencesTest.java | 20 +++++++++---------- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f2e459e5..e160ca2b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -161,8 +161,8 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep OBJECT_TMP_FILE_DIRECTORY = OBJECT_STORE_DIRECTORY.resolve("tmp"); METADATA_TMP_FILE_DIRECTORY = METADATA_STORE_DIRECTORY.resolve("tmp"); REFS_TMP_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("tmp"); - REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.pid.getName()); - REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve(HashStoreIdTypes.cid.getName()); + REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("pids"); + REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("cids"); try { // Physically create object & metadata store and tmp directories diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index b019b372..f34be6ac 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -758,7 +758,8 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { // Confirm that 50 pid refs file exists Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/pid")); + List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + + "/pids")); assertEquals(50, pidRefFiles.size()); } @@ -1710,9 +1711,11 @@ public void deleteObject_50Pids_1Obj_viaRunnable() throws Exception { List objects = FileHashStoreUtility.getFilesFromDir(storePath.resolve("objects")); assertEquals(0, objects.size()); // Check that no refs files exist - List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/pid")); + List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + + "/pids")); assertEquals(0, pidRefFiles.size()); - List cidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs/cid")); + List cidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + + "/cids")); assertEquals(0, cidRefFiles.size()); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 674d8159..cd034fdf 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1087,7 +1087,8 @@ public void getExpectedPath_pidRefsPaths() throws Exception { String metadataPidHashSharded = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, metadataPidHash ); - Path calculatedPidRefsRealPath = storePath.resolve("refs/pid").resolve(metadataPidHashSharded); + Path calculatedPidRefsRealPath = + storePath.resolve("refs/pids").resolve(metadataPidHashSharded); Path expectedPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); @@ -1120,7 +1121,7 @@ public void getExpectedPath_cidRefsPaths() throws Exception { String objShardString = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, cid ); - Path calculatedCidRefsRealPath = storePath.resolve("refs/cid").resolve(objShardString); + Path calculatedCidRefsRealPath = storePath.resolve("refs/cids").resolve(objShardString); Path expectedCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 05b0fa42..e6866389 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -277,9 +277,9 @@ public void initRefsDirectories() { assertTrue(Files.isDirectory(refsPath)); Path refsTmpPath = rootDirectory.resolve("refs/tmp"); assertTrue(Files.isDirectory(refsTmpPath)); - Path refsPidPath = rootDirectory.resolve("refs/pid"); + Path refsPidPath = rootDirectory.resolve("refs/pids"); assertTrue(Files.isDirectory(refsPidPath)); - Path refsCidPath = rootDirectory.resolve("refs/cid"); + Path refsCidPath = rootDirectory.resolve("refs/cids"); assertTrue(Files.isDirectory(refsCidPath)); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 6147e3bb..c2956383 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -80,9 +80,9 @@ public void tagObject() throws Exception { fileHashStore.tagObject(pid, cid); Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); assertEquals(1, cidRefsFiles.length); } @@ -132,9 +132,9 @@ public void tagObject_refsFileAlreadyExists() throws Exception { fileHashStore.tagObject(pid, cid); // Confirm that there is only 1 of each refs file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); assertEquals(1, cidRefsFiles.length); } @@ -181,9 +181,9 @@ public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound fileHashStore.tagObject(pid, cid); // There should only be 1 of each refs file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); assertEquals(1, cidRefsFiles.length); } @@ -202,9 +202,9 @@ public void tagObject_pidRefsFileFound_cidRefsFileNotFound() throws Exception { fileHashStore.tagObject(pid, cid); // Confirm that there is only 1 of each refs file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); assertEquals(1, cidRefsFiles.length); } @@ -236,9 +236,9 @@ public void tagObject_pidRefsFileNotFound_cidRefsFileFound() throws Exception { // There should be 2 pid refs file, and 1 cid refs file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pid").toFile().listFiles(); + File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(2, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cid").toFile().listFiles(); + File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); assertEquals(1, cidRefsFiles.length); } From b0c23d8112793f2f1bbd59837af8c3065059ec04 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 09:19:48 -0700 Subject: [PATCH 279/553] Update sysmeta 'formatId' used in junit tests and 'README.md' --- README.md | 2 +- .../org/dataone/hashstore/HashStoreTest.java | 6 ++-- .../FileHashStoreInterfaceTest.java | 8 +++--- .../FileHashStoreProtectedTest.java | 2 +- .../FileHashStorePublicTest.java | 28 +++++++++---------- .../FileHashStoreReferencesTest.java | 2 +- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 0acd4f6b..86f973ff 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ storeProperties.setProperty("storeDepth", "3"); storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); // Instantiate a HashStore diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 1f34c70a..bad93b5c 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -36,7 +36,7 @@ public void getHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); try { @@ -77,7 +77,7 @@ public void hashStore_classPackageNull() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); hashStore = HashStoreFactory.getHashStore(null, storeProperties); @@ -98,7 +98,7 @@ public void hashStore_classPackageNotFound() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index f34be6ac..b81875fb 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -74,7 +74,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); try { @@ -987,7 +987,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { // Submit 3 threads, each calling storeMetadata Future future1 = executorService.submit(() -> { try { - String formatId = "http://ns.dataone.org/service/types/v2.0"; + String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); // Calculate absolute path @@ -1002,7 +1002,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { }); Future future2 = executorService.submit(() -> { try { - String formatId = "http://ns.dataone.org/service/types/v2.0"; + String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); // Calculate absolute path @@ -1017,7 +1017,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { }); Future future3 = executorService.submit(() -> { try { - String formatId = "http://ns.dataone.org/service/types/v2.0"; + String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); // Calculate absolute path diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index cd034fdf..86254213 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -47,7 +47,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); try { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index e6866389..5df9fc7b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -49,7 +49,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); try { @@ -91,7 +91,7 @@ public void constructor_nullStorePath() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -110,7 +110,7 @@ public void constructor_illegalDepthArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -129,7 +129,7 @@ public void constructor_illegalWidthArg() { storeProperties.setProperty("storeWidth", "0"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -148,7 +148,7 @@ public void constructor_illegalAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "MD5"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -167,7 +167,7 @@ public void constructor_emptyAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", ""); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -186,7 +186,7 @@ public void constructor_emptySpacesAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", " "); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -239,7 +239,7 @@ public void initDefaultStore_directoryNull() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -302,7 +302,7 @@ public void testGetHashStoreYaml() throws IOException { assertEquals(hsProperties.get("storeWidth"), 2); assertEquals(hsProperties.get("storeAlgorithm"), "SHA-256"); assertEquals( - hsProperties.get("storeMetadataNamespace"), "http://ns.dataone.org/service/types/v2.0" + hsProperties.get("storeMetadataNamespace"), "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); } @@ -317,7 +317,7 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -336,7 +336,7 @@ public void testExistingHashStoreConfiguration_diffAlgorithm() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "MD5"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -356,7 +356,7 @@ public void testExistingHashStoreConfiguration_diffDepth() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -376,7 +376,7 @@ public void testExistingHashStoreConfiguration_diffWidth() { storeProperties.setProperty("storeWidth", "1"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); new FileHashStore(storeProperties); @@ -418,7 +418,7 @@ public void testExistingHashStoreConfiguration_missingYaml() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); FileHashStore secondHashStore = new FileHashStore(storeProperties); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index c2956383..70064144 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -47,7 +47,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); try { From 87ac4b2d5ef0abc025829eb5afa9f2b9bc9a5104 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 09:25:17 -0700 Subject: [PATCH 280/553] Update README.md examples and fix typos --- README.md | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 86f973ff..6974b273 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ hashStore.storeObject(stream, pid) In HashStore, objects are first saved as temporary files while their content identifiers are calculated. Once the default hash algorithm list and their hashes are generated, objects are stored in their permanent location using the store's algorithm's corresponding hash value, the store depth and the store width. Lastly, reference files are created for the object so that they can be found and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored once and only once. -By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: +By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: ```java // All-in-one process which stores, validates and tags an object objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) @@ -92,13 +92,13 @@ tagObject(pid, cid) **How do I delete an object if I have the pid?** - To delete an object, all its associated reference files and its metadata, call the Public API method `deleteObject()` with `idType` 'pid'. If an `idType` is not given (ex. calling `deleteObject(String pid)`), the `idType` will be assumed to be a 'pid' -- To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object if it it is not referenced by any pids. +- To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object if it is not referenced by any pids. - Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. ###### Working with metadata (store, retrieve, delete) -HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata documents related to a 'pid' are stored in a directory determined by calculating the hash of the pid (based on the store's algorithm). Each specific metadata document is then stored by calculating the hash of its associated `formatId`. By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). +HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata documents related to a 'pid' are stored in a directory determined by calculating the hash of the pid (based on the store's algorithm). Each specific metadata document is then stored by calculating the hash of its associated `pid+formatId`. By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). **How do I retrieve a metadata file?** - To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. @@ -139,17 +139,33 @@ These reference files are implemented in HashStore underneath the hood with no e ## - Objects are stored using their content identifier as the file address ## - The reference file for each pid contains a single cid ## - The reference file for each cid contains multiple pids each on its own line -## - There is one sysmeta document under the metadata directory for the pid hash - -.../metacat/hashstore/ -└─ objects - └─ /d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 -└─ metadata - └─ /d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2/affe1b6dd20659c63e99e63a29c...579c2d688880adc -└─ refs - └─ pid/0d/55/5e/d77052d7e166017f779...7230bcf7abcef65e - └─ cid/d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 -hashstore.yaml +## - There are two metadata docs under the metadata directory for the pid (sysmeta, annotations) + +.../metacat/hashstore +├── hashstore.yaml +└── objects +| ├── 4d +| └── 19 +| └── 81 +| └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c +└── metadata +| ├── 0d +| └── 55 +| └── 55 +| └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e +| └── 323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7 +| └── sha256(pid+formatId_annotations) +└── refs + ├── cids + | ├── 4d + | └── 19 + | └── 81 + | └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c + └── pids + ├── 0d + └── 55 + └── 55 + └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e ``` @@ -182,7 +198,7 @@ $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreCl # Get the checksum of a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 -# Find an object in HashStore (returns its content identifer if it exists) +# Find an object in HashStore (returns its content identifier if it exists) $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -findobject -pid testpid1 # Store a data object From be50903b2a0c259c9e5d046f4a50a72bb0f22c9b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 10:14:23 -0700 Subject: [PATCH 281/553] Update synchronization value in 'storeMetadata' to be the hash of the 'pid'+'formatId' and comments for clarity --- .../hashstore/filehashstore/FileHashStore.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e160ca2b..e728077f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -869,8 +869,10 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF // However, the same pid could be used with different formatIds, so // synchronize ids with pid + formatId; String pidFormatId = pid + checkedFormatId; + String metadataDocId = FileHashStoreUtility.getPidHexDigest(pidFormatId, + OBJECT_STORE_ALGORITHM); synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(pidFormatId)) { + while (metadataLockedIds.contains(metadataDocId)) { try { metadataLockedIds.wait(TIME_OUT_MILLISEC); @@ -886,7 +888,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF logFileHashStore.debug( "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid ); - metadataLockedIds.add(pidFormatId); + metadataLockedIds.add(metadataDocId); } try { @@ -923,7 +925,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF "FileHashStore.storeMetadata - Releasing metadataLockedIds for pid: " + pid + " and formatId " + checkedFormatId ); - metadataLockedIds.remove(pidFormatId); + metadataLockedIds.remove(metadataDocId); metadataLockedIds.notify(); } } @@ -1362,8 +1364,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); - // Get permanent address of the metadata document by calculating the sha-256 hex digest - // of the 'pid' + 'formatId' + // Get permanent address of the metadata document Path metadataDocPath = getExpectedPath(pid, "metadata", formatId); if (!Files.exists(metadataDocPath)) { @@ -2225,7 +2226,7 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // Get permanent address for the given metadata document // All metadata documents for a pid are stored in a directory that is formed - // by using the hash of the 'pid', with the file name being the hash of the 'formatId' + // by using the hash of the 'pid', with the file name being the hash of the 'pid+formatId' Path pathToStoredMetadata = getExpectedPath(pid, "metadata", checkedFormatId); // Store metadata to tmpMetadataFile From d6e16472db0153f20075d6853b7dc204d66bf79a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 10:23:18 -0700 Subject: [PATCH 282/553] Add thread safety to 'deleteMetadata(pid, formatId)' and update hashstore interface --- .../java/org/dataone/hashstore/HashStore.java | 6 +- .../filehashstore/FileHashStore.java | 65 ++++++++++++++----- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 7ca04a14..5eb3b18a 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -292,9 +292,10 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio * @throws IOException I/O error when deleting metadata or empty directories * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported + * @throws InterruptedException Issue with synchronization on metadta doc */ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException; + IOException, NoSuchAlgorithmException, InterruptedException; /** * Deletes all metadata related for the given 'pid' from HashStore @@ -304,9 +305,10 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * @throws IOException I/O error when deleting metadata or empty directories * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported + * @throws InterruptedException Issue with synchronization on metadta doc */ public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException; + NoSuchAlgorithmException, InterruptedException; /** * Calculates the hex digest of an object that exists in HashStore using a given persistent diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e728077f..53e95153 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1354,9 +1354,10 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio @Override public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException { + IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( - "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid + "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid + " with " + + "formatId: " + formatId ); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); @@ -1364,21 +1365,55 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); - // Get permanent address of the metadata document - Path metadataDocPath = getExpectedPath(pid, "metadata", formatId); - - if (!Files.exists(metadataDocPath)) { - String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid - + " with metadata address: " + metadataDocPath; - logFileHashStore.warn(errMsg); + String metadataDocId = FileHashStoreUtility.getPidHexDigest(pid + formatId, + OBJECT_STORE_ALGORITHM); + synchronized (metadataLockedIds) { + while (metadataLockedIds.contains(metadataDocId)) { + try { + metadataLockedIds.wait(TIME_OUT_MILLISEC); - } else { - // Proceed to delete - Files.delete(metadataDocPath); - logFileHashStore.info( - "FileHashStore.deleteMetadata - File deleted for: " + pid - + " with metadata address: " + metadataDocPath + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteMetadata - Metadata lock was interrupted while" + + " deleting metadata for: " + pid + " and formatId: " + formatId + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteMetadata - Synchronizing metadataLockedIds for pid: " + pid ); + metadataLockedIds.add(metadataDocId); + } + + try { + // Get permanent address of the metadata document + Path metadataDocPath = getExpectedPath(pid, "metadata", formatId); + + if (!Files.exists(metadataDocPath)) { + String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid + + " with metadata address: " + metadataDocPath; + logFileHashStore.warn(errMsg); + + } else { + // Proceed to delete + Files.delete(metadataDocPath); + logFileHashStore.info( + "FileHashStore.deleteMetadata - File deleted for: " + pid + + " with metadata address: " + metadataDocPath + ); + } + } finally { + // Release lock + synchronized (metadataLockedIds) { + logFileHashStore.debug( + "FileHashStore.deleteMetadata - Releasing metadataLockedIds for pid: " + pid + + " and formatId " + formatId + ); + metadataLockedIds.remove(metadataDocId); + metadataLockedIds.notify(); + } } } From 93225e44f209cb6a20f0a41afd965f899aff9ac7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 10:47:00 -0700 Subject: [PATCH 283/553] Add thread safety to 'deleteMetadata(pid)' method --- .../filehashstore/FileHashStore.java | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 53e95153..62066748 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1422,7 +1422,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx */ @Override public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException { + NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( "FileHashStore.deleteMetadata - Called to delete all metadata for pid: " + pid ); @@ -1441,7 +1441,41 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept expectedPidMetadataDirectory ); for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + String metadataDocId = metadataDoc.getFileName().toString(); + synchronized (metadataLockedIds) { + while (metadataLockedIds.contains(metadataDocId)) { + try { + metadataLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteMetadata - Metadata lock was interrupted while" + + " deleting metadata doc: " + metadataDocId + " for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.deleteMetadata - Synchronizing metadataLockedIds for pid: " + pid + ); + metadataLockedIds.add(metadataDocId); + } + + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); + } finally { + // Release lock + synchronized (metadataLockedIds) { + logFileHashStore.debug( + "FileHashStore.deleteMetadata - Releasing metadataLockedIds for pid: " + pid + + " and doc " + metadataDocId + ); + metadataLockedIds.remove(metadataDocId); + metadataLockedIds.notify(); + } + } + } // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); From 7780992eb29d4a322eaf5d6de57917bcc2823178 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 13:05:17 -0700 Subject: [PATCH 284/553] Refactor 'deleteObject(pid)' to call 'deleteMetadata(pid)' instead of manually searching for & deleting metadata docs --- .../filehashstore/FileHashStore.java | 37 +++++-------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 62066748..0586a0d8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1130,11 +1130,6 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest); - Path expectedPidMetadataDirectory = - METADATA_STORE_DIRECTORY.resolve(pidRelativePath); - // Add all metadata doc paths to a List to iterate over below - List metadataDocPaths = - FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); try { // Before we begin deletion process, we look for the `cid` by calling @@ -1179,11 +1174,6 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - // Rename metadata documents to prepare for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - // Rename pid refs file to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Remove pid from cid refs file @@ -1204,6 +1194,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); + // Remove metadata files + deleteMetadata(pid); logFileHashStore.info( "FileHashStore.deleteObject - File and references deleted for: " + pid + " with object address: " + objRealPath); @@ -1228,13 +1220,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - + // Delete items FileHashStoreUtility.deleteListItems(deleteList); + // Remove metadata files + deleteMetadata(pid); String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file and metadata."; @@ -1283,14 +1272,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti deleteList.add( FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - // Delete items FileHashStoreUtility.deleteListItems(deleteList); + // Remove metadata files + deleteMetadata(pid); String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files and metadata."; @@ -1314,14 +1299,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - // Rename metadata documents for deletion - for (Path metadataDoc : metadataDocPaths) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } - // Delete items FileHashStoreUtility.deleteListItems(deleteList); + // Remove metadata files + deleteMetadata(pid); String warnMsg = "FileHashStore.deleteObject - Pid not found in expected cid refs file for" + " pid: " + pid + ". Deleted orphan pid refs file and metadata."; From f59507f052822e4ed437c4938add400c43e92576 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 13 Jun 2024 13:54:49 -0700 Subject: [PATCH 285/553] Fix typos in 'hashstore' interface --- src/main/java/org/dataone/hashstore/HashStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 5eb3b18a..97501fad 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -292,7 +292,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio * @throws IOException I/O error when deleting metadata or empty directories * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported - * @throws InterruptedException Issue with synchronization on metadta doc + * @throws InterruptedException Issue with synchronization on metadata doc */ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; @@ -305,7 +305,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * @throws IOException I/O error when deleting metadata or empty directories * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported - * @throws InterruptedException Issue with synchronization on metadta doc + * @throws InterruptedException Issue with synchronization on metadata doc */ public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; From 5db413f6bac431c07f21055ec8d146d1d4b53147 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 14 Jun 2024 10:49:41 -0700 Subject: [PATCH 286/553] Add new custom exception classes to be thrown by 'verifyObject' --- .../exceptions/NonMatchingChecksumException.java | 10 ++++++++++ .../exceptions/NonMatchingObjSizeException.java | 12 ++++++++++++ .../UnsupportedHashAlgorithmException.java | 8 ++++++++ 3 files changed, 30 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java create mode 100644 src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java create mode 100644 src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java new file mode 100644 index 00000000..c0b2c3e4 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java @@ -0,0 +1,10 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +public class NonMatchingChecksumException extends IOException { + + public NonMatchingChecksumException(String message) { + super(message); + } +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java new file mode 100644 index 00000000..532abdb9 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java @@ -0,0 +1,12 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +public class NonMatchingObjSizeException extends IOException { + + public NonMatchingObjSizeException(String message) { + super(message); + } + +} + diff --git a/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java new file mode 100644 index 00000000..8b9bd591 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java @@ -0,0 +1,8 @@ +package org.dataone.hashstore.exceptions; + +public class UnsupportedHashAlgorithmException extends IllegalArgumentException { + + public UnsupportedHashAlgorithmException(String message) { + super(message); + } +} From 0b9b6aa3d49c88c711a039bb212ff13a484acbee Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 14 Jun 2024 10:50:18 -0700 Subject: [PATCH 287/553] Update 'verifyObject' to throw custom exceptions when there is an issue, update 'hashstore' interface and junit tests --- .../java/org/dataone/hashstore/HashStore.java | 14 +++++-- .../filehashstore/FileHashStore.java | 40 ++++++++++--------- .../FileHashStoreReferencesTest.java | 30 +++++++------- 3 files changed, 47 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 97501fad..fe161130 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -5,10 +5,13 @@ import java.io.InputStream; import java.security.NoSuchAlgorithmException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; /** * HashStore is a content-addressable file management system that utilizes the content identifier of @@ -143,16 +146,19 @@ public void tagObject(String pid, String cid) throws IOException, /** * Confirms that an ObjectMetadata's content is equal to the given values. If it is not * equal, it will return False - otherwise True. - * + * * @param objectInfo ObjectMetadata object with values * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing - * @throws IllegalArgumentException An expected value does not match + * @throws NonMatchingObjSizeException Given size =/= objMeta size value + * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value + * @throws UnsupportedHashAlgorithmException Given algo is not found or supported */ - public boolean verifyObject( + public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IllegalArgumentException; + ) throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException; /** * Checks whether an object referenced by a pid exists and returns the content identifier. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0586a0d8..4ded77ab 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -35,10 +35,13 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; /** * FileHashStore is a HashStore adapter class that manages the storage of objects and metadata to a @@ -734,9 +737,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } @Override - public boolean verifyObject( + public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IllegalArgumentException { + ) throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException { logFileHashStore.debug( "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() ); @@ -746,32 +750,30 @@ public boolean verifyObject( FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); Map hexDigests = objectInfo.getHexDigests(); + // TODO: CHeck if algorithm is found in hexDigests, and whether it's supported or not String digestFromHexDigests = hexDigests.get(checksumAlgorithm); long objInfoRetrievedSize = objectInfo.getSize(); String objCid = objectInfo.getCid(); if (objInfoRetrievedSize != objSize) { - logFileHashStore.info( - "FileHashStore.verifyObject - Object size invalid for cid: " + objCid - + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize - ); - return false; + String errMsg = "FileHashStore.verifyObject - Object size invalid for cid: " + objCid + + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize; + logFileHashStore.error(errMsg); + throw new NonMatchingObjSizeException(errMsg); } else if (!digestFromHexDigests.equals(checksum)) { - logFileHashStore.info( - "FileHashStore.verifyObject - Object content invalid for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" - ); - return false; + String errMsg = "FileHashStore.verifyObject - Object content invalid for cid: " + objCid + + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; + logFileHashStore.error(errMsg); + throw new NonMatchingChecksumException(errMsg); } else { - logFileHashStore.info( - "FileHashStore.verifyObject - Object has been validated for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" - ); - return true; + String errMsg = "FileHashStore.verifyObject - Object has been validated for cid: " + objCid + + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; + logFileHashStore.error(errMsg); + throw new UnsupportedHashAlgorithmException(errMsg); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 70064144..4c695dd3 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -18,6 +18,8 @@ import java.util.Properties; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; @@ -395,7 +397,7 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { } /** - * Check that verifyObject returns true with good values + * Check that verifyObject does not throw exception with matching values */ @Test public void verifyObject_correctValues() throws Exception { @@ -412,15 +414,14 @@ public void verifyObject_correctValues() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - boolean isObjectValid = fileHashStore.verifyObject( + fileHashStore.verifyObject( objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize ); - assertTrue(isObjectValid); } } /** - * Check that verifyObject returns false with mismatched size value + * Check that verifyObject throws exception when non-matching size value provided */ @Test public void verifyObject_mismatchedValuesBadSize() throws Exception { @@ -437,16 +438,16 @@ public void verifyObject_mismatchedValuesBadSize() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; - boolean isObjectValid = fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - assertFalse(isObjectValid); + assertThrows(NonMatchingObjSizeException.class, () -> { + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + }); } } /** - * Check that verifyObject returns false and does not delete the file when - * there is a mismatch + * Check that verifyObject throws exception with non-matching checksum value */ @Test public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { @@ -463,10 +464,11 @@ public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - boolean isObjectValid = fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - assertFalse(isObjectValid); + assertThrows(NonMatchingChecksumException.class, () -> { + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + ); + }); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); From d0eeea2f77cf88b391f1a82724262f3e487e5325 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 14 Jun 2024 12:13:26 -0700 Subject: [PATCH 288/553] Add new logic to 'verifyObject' to attempt to calculate the checksum for a supported algo not in the default list, add new junit tests and update 'hashstore' interface --- .../java/org/dataone/hashstore/HashStore.java | 3 +- .../filehashstore/FileHashStore.java | 63 +++++++++++++------ .../FileHashStoreReferencesTest.java | 49 ++++++++++++++- 3 files changed, 94 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index fe161130..94429365 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -154,11 +154,12 @@ public void tagObject(String pid, String cid) throws IOException, * @throws NonMatchingObjSizeException Given size =/= objMeta size value * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value * @throws UnsupportedHashAlgorithmException Given algo is not found or supported + * @throws IOException Issue with recalculating supported algo for checksum not found */ public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws NonMatchingObjSizeException, NonMatchingChecksumException, - UnsupportedHashAlgorithmException; + UnsupportedHashAlgorithmException, IOException; /** * Checks whether an object referenced by a pid exists and returns the content identifier. diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4ded77ab..0dcacff4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -740,7 +740,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize ) throws NonMatchingObjSizeException, NonMatchingChecksumException, - UnsupportedHashAlgorithmException { + UnsupportedHashAlgorithmException, IOException { logFileHashStore.debug( "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() ); @@ -749,32 +749,60 @@ public void verifyObject( FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); + String objCid = objectInfo.getCid(); + long objInfoRetrievedSize = objectInfo.getSize(); Map hexDigests = objectInfo.getHexDigests(); - // TODO: CHeck if algorithm is found in hexDigests, and whether it's supported or not String digestFromHexDigests = hexDigests.get(checksumAlgorithm); - long objInfoRetrievedSize = objectInfo.getSize(); - String objCid = objectInfo.getCid(); - - if (objInfoRetrievedSize != objSize) { - String errMsg = "FileHashStore.verifyObject - Object size invalid for cid: " + objCid - + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize; - logFileHashStore.error(errMsg); - throw new NonMatchingObjSizeException(errMsg); - } else if (!digestFromHexDigests.equals(checksum)) { + // Confirm that requested checksum to verify against is available + if (digestFromHexDigests == null) { + try { + validateAlgorithm(checksumAlgorithm); + // If no exceptions thrown, calculate the checksum with the given algo + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, objCid + ); + Path pathToCidObject = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + try (InputStream inputStream = Files.newInputStream(pathToCidObject)) { + digestFromHexDigests = FileHashStoreUtility.calculateHexDigest(inputStream, + checksumAlgorithm); + } catch (IOException ioe) { + String errMsg = + "FileHashStore.verifyObject - Unexpected error when calculating a checksum" + + " for cid: " + objCid + " with algorithm (" + checksumAlgorithm + + ") that is not part of the default list. " + ioe.getMessage(); + throw new IOException(errMsg); + } + } catch (NoSuchAlgorithmException nsae) { + String errMsg = + "FileHashStore.verifyObject - checksumAlgorithm given: " + checksumAlgorithm + + " is not supported. Supported algorithms: " + Arrays.toString( + SUPPORTED_HASH_ALGORITHMS + ); + logFileHashStore.error(errMsg); + throw new UnsupportedHashAlgorithmException(errMsg); + } + } + // Validate checksum + if (!digestFromHexDigests.equals(checksum)) { String errMsg = "FileHashStore.verifyObject - Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; logFileHashStore.error(errMsg); throw new NonMatchingChecksumException(errMsg); + } + // Validate size + if (objInfoRetrievedSize != objSize) { + String errMsg = "FileHashStore.verifyObject - Object size invalid for cid: " + objCid + + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize; + logFileHashStore.error(errMsg); + throw new NonMatchingObjSizeException(errMsg); + } - } else { - String errMsg = "FileHashStore.verifyObject - Object has been validated for cid: " + objCid + String infoMsg = "FileHashStore.verifyObject - Object has been validated for cid: " + objCid + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; - logFileHashStore.error(errMsg); - throw new UnsupportedHashAlgorithmException(errMsg); - } + logFileHashStore.info(infoMsg); } @Override @@ -2373,7 +2401,6 @@ protected Path getExpectedPath(String abId, String entity, String formatId) DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid ); realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - } else if (entity.equalsIgnoreCase("metadata")) { // Get the pid metadata directory (the sharded path of the hashId) String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( @@ -2408,7 +2435,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) } else { throw new IllegalArgumentException( - "FileHashStore.getExpectedPath - entity must be 'object' or 'metadata'" + "FileHashStore.getExpectedPath - entity must be 'object', 'metadata' or 'refs'" ); } return realPath; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 4c695dd3..769536cf 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -21,6 +21,7 @@ import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -420,11 +421,55 @@ public void verifyObject_correctValues() throws Exception { } } + /** + * Check that verifyObject calculates and verifies a checksum with a supported algorithm that is + * not included in the default list + */ + @Test + public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("md2"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + fileHashStore.verifyObject( + objInfo, expectedChecksum, "MD2", expectedSize + ); + } + } + + /** + * Check that verifyObject calculates throws exception when given a checksumAlgorithm that is + * not supported + */ + @Test + public void verifyObject_unsupportedAlgo() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + assertThrows(UnsupportedHashAlgorithmException.class, () -> { + fileHashStore.verifyObject( + objInfo, "ValueNotRelevant", "BLAKE2S", 1000 + ); + }); + } + } + /** * Check that verifyObject throws exception when non-matching size value provided */ @Test - public void verifyObject_mismatchedValuesBadSize() throws Exception { + public void verifyObject_mismatchedValuesNonMatchingSize() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -450,7 +495,7 @@ public void verifyObject_mismatchedValuesBadSize() throws Exception { * Check that verifyObject throws exception with non-matching checksum value */ @Test - public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { + public void verifyObject_mismatchedValuesNonMatchingChecksum() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); From 45c844a6fd83812c3c19a7a9c4a146afafc54831 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 14 Jun 2024 12:31:03 -0700 Subject: [PATCH 289/553] Clean up test classes 'FileHashStoreInterfaceTest' and 'FileHashStoreReferencesTest' --- .../FileHashStoreInterfaceTest.java | 46 ++++++++++--------- .../FileHashStoreReferencesTest.java | 10 ---- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index b81875fb..bdc0995b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -35,6 +35,7 @@ import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; @@ -1559,28 +1560,11 @@ public void deleteObject_pidType_pidOrphan() throws Exception { } } - /** - * Confirm deleteObject removes pid and cid refs orphan files - */ - @Test - public void deleteObject_orphanRefsFiles() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path absPathCidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path absPathPidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - - fileHashStore.deleteObject("pid", pid); - assertFalse(Files.exists(absPathCidRefsPath)); - assertFalse(Files.exists(absPathPidRefsPath)); - } - /** * Confirm that deleteObject throws exception when associated pid obj not found */ @Test - public void deleteObject_Pid_NotFoundPid() { + public void deleteObject_pidType_NotFoundPid() { assertThrows( FileNotFoundException.class, () -> fileHashStore.deleteObject( fhsDeleteTypePid, "dou.2023.hashstore.1" @@ -1620,11 +1604,28 @@ public void deleteObject_idEmptySpaces() { ); } + /** + * Confirm deleteObject removes pid and cid refs orphan files + */ + @Test + public void deleteObject_orphanRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path absPathCidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path absPathPidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + + fileHashStore.deleteObject("pid", pid); + assertFalse(Files.exists(absPathCidRefsPath)); + assertFalse(Files.exists(absPathPidRefsPath)); + } + /** * Confirm deleteObject with idType 'cid' deletes cid object */ @Test - public void deleteObject_Cid_idType() throws Exception { + public void deleteObject_cidType() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1633,7 +1634,6 @@ public void deleteObject_Cid_idType() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); String cid = objInfo.getCid(); - // Set flag to true fileHashStore.deleteObject(fhsDeleteTypeCid, cid); // Get permanent address of the actual cid @@ -1653,7 +1653,7 @@ public void deleteObject_Cid_idType() throws Exception { * exists (there are still pids referencing the object) */ @Test - public void deleteObject_Cid_AndCidRefsExists() throws Exception { + public void deleteObject_cidType_AndCidRefsExists() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1664,12 +1664,14 @@ public void deleteObject_Cid_AndCidRefsExists() throws Exception { ); String cid = objInfo.getCid(); - // Set flag to true fileHashStore.deleteObject(fhsDeleteTypeCid, cid); // Get permanent address of the actual cid Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objRealPath)); + // Confirm cid refs file still exists + Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + assertTrue(Files.exists(cidRefsPath)); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 769536cf..0c12576d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -514,16 +514,6 @@ public void verifyObject_mismatchedValuesNonMatchingChecksum() throws Exception objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize ); }); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertTrue(Files.exists(objectStoreDirectory)); - } } } From 0875de3d97cf91d4b7d56681e0bfdf5e07ad770f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 14 Jun 2024 13:12:38 -0700 Subject: [PATCH 290/553] Adjust spacing for consistency in new custom exception classes --- .../hashstore/exceptions/NonMatchingChecksumException.java | 1 + .../hashstore/exceptions/UnsupportedHashAlgorithmException.java | 1 + 2 files changed, 2 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java index c0b2c3e4..e5bf79f9 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java @@ -7,4 +7,5 @@ public class NonMatchingChecksumException extends IOException { public NonMatchingChecksumException(String message) { super(message); } + } diff --git a/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java index 8b9bd591..0524d298 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java @@ -5,4 +5,5 @@ public class UnsupportedHashAlgorithmException extends IllegalArgumentException public UnsupportedHashAlgorithmException(String message) { super(message); } + } From 94d5109420a22aca3d1a26a594d35312320423d0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 14 Jun 2024 13:54:49 -0700 Subject: [PATCH 291/553] Update test classes to explicitly close new stream objects after they are used for resource management --- .../hashstore/HashStoreClientTest.java | 6 ++ .../dataone/hashstore/HashStoreRunnable.java | 4 + .../org/dataone/hashstore/HashStoreTest.java | 1 + .../FileHashStoreInterfaceTest.java | 76 ++++++++++++++++++- .../FileHashStoreProtectedTest.java | 40 ++++++++++ .../FileHashStorePublicTest.java | 1 + .../FileHashStoreReferencesTest.java | 5 ++ 7 files changed, 129 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 29307d64..8a28ec62 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -258,6 +258,7 @@ public void client_retrieveObjects() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); hashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Call client String optRetrieveObject = "-retrieveobject"; @@ -294,6 +295,7 @@ public void client_retrieveMetadata() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); hashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); // Call client String optRetrieveMetadata = "-retrievemetadata"; @@ -333,6 +335,7 @@ public void client_deleteObjects() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); hashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Call client String optDeleteObject = "-deleteobject"; @@ -373,6 +376,7 @@ public void client_deleteMetadata() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); hashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); // Call client String optDeleteMetadata = "-deletemetadata"; @@ -418,6 +422,7 @@ public void client_getHexDigest() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); hashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Call client String optGetChecksum = "-getchecksum"; @@ -460,6 +465,7 @@ public void client_findObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); hashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Call client String optFindObject = "-findobject"; diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index f4c02e3a..7006dcfe 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -5,6 +5,7 @@ import org.dataone.hashstore.exceptions.HashStoreServiceException; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; +import java.io.IOException; import java.io.InputStream; /** @@ -52,6 +53,7 @@ public void run() { } catch (Exception e) { throw new HashStoreServiceException(e.getMessage()); } + objStream.close(); break; case deleteObject: try { @@ -63,6 +65,8 @@ public void run() { } } catch (HashStoreServiceException hse) { logHssr.error("HashStoreServiceRequest - Error: " + hse.getMessage()); + } catch (IOException ioe) { + logHssr.error("HashStoreServiceRequest - Error: " + ioe.getMessage()); } } } diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index bad93b5c..9b9d9644 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -128,6 +128,7 @@ public void hashStore_storeObjects() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index bdc0995b..4949093b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -110,6 +110,7 @@ public void storeObject() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); @@ -131,6 +132,7 @@ public void storeObject_objSize() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); // Check the object size long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -151,6 +153,7 @@ public void storeObject_hexDigests() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); Map hexDigests = objInfo.getHexDigests(); @@ -191,6 +194,7 @@ public void storeObject_nullPid() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, null, null, null, null, -1); + dataStream.close(); }); } } @@ -207,6 +211,7 @@ public void storeObject_emptyPid() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, "", null, null, null, -1); + dataStream.close(); }); } } @@ -223,6 +228,7 @@ public void storeObject_zeroObjSize() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + dataStream.close(); }); } } @@ -243,6 +249,7 @@ public void storeObject_overloadChecksumCsAlgoAndSize() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, md2, "MD2", objectSize ); + dataStream.close(); Map hexDigests = objInfo.getHexDigests(); @@ -263,6 +270,7 @@ public void storeObject_overloadChecksumAndChecksumAlgo() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, md2, "MD2"); + dataStream.close(); Map hexDigests = objInfo.getHexDigests(); @@ -283,6 +291,7 @@ public void storeObject_overloadObjSize() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, objectSize); + dataStream.close(); assertEquals(objectSize, objInfo.getSize()); } @@ -300,6 +309,7 @@ public void storeObject_overloadInputStreamOnly() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); Map hexDigests = objInfo.getHexDigests(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); @@ -327,6 +337,7 @@ public void storeObject_overloadAdditionalAlgo() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); + dataStream.close(); Map hexDigests = objInfo.getHexDigests(); @@ -349,6 +360,7 @@ public void storeObject_validateChecksumValue() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); + dataStream.close(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objCidAbsPath)); @@ -367,6 +379,7 @@ public void storeObject_correctChecksumValue() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, "MD2", null, null, -1); + dataStream.close(); String md2 = testData.pidData.get(pid).get("md2"); assertEquals(checksumCorrect, md2); @@ -387,6 +400,7 @@ public void storeObject_incorrectChecksumValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", -1); + dataStream.close(); }); } @@ -404,6 +418,7 @@ public void storeObject_emptyChecksumValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", -1); + dataStream.close(); }); } @@ -419,6 +434,7 @@ public void storeObject_nullChecksumValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", -1); + dataStream.close(); }); } @@ -436,6 +452,7 @@ public void storeObject_objSizeCorrect() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, objectSize ); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) assertEquals(objectSize, objInfo.getSize()); @@ -456,6 +473,7 @@ public void storeObject_objSizeIncorrect() { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, 1000 ); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -476,6 +494,7 @@ public void storeObject_invalidAlgorithm() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, "SM2", null, null, -1); + dataStream.close(); }); } @@ -491,12 +510,14 @@ public void storeObject_duplicate() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); String pidTwo = pid + ".test"; InputStream dataStreamDup = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( dataStreamDup, pidTwo, null, null, null, -1 ); + dataStreamDup.close(); String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); @@ -533,6 +554,7 @@ public void storeObject_largeSparseFile() throws Exception { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); assertTrue(Files.exists(objCidAbsPath)); @@ -567,6 +589,7 @@ public void storeObject_interruptProcess() throws Exception { InputStream dataStream = Files.newInputStream(testFilePath); String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); } catch (IOException | NoSuchAlgorithmException | InterruptedException ioe) { ioe.printStackTrace(); } @@ -611,6 +634,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -631,6 +655,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -651,6 +676,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -671,6 +697,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -691,6 +718,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -778,6 +806,7 @@ public void storeMetadata() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); String testFormatId = "https://test.arcticdata.io/ns"; String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + metadataStream.close(); // Calculate absolute path Path metadataPidExpectedPath = fileHashStore.getExpectedPath( @@ -802,6 +831,7 @@ public void storeMetadata_defaultFormatId_overload() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); @@ -827,6 +857,7 @@ public void storeMetadata_pidHashIsDirectory() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -857,7 +888,11 @@ public void storeMetadata_multipleFormatIds() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); String testFormatId = "https://test.arcticdata.io/ns"; String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); - String metadataDefaultPath = fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); + + InputStream metadataStreamDup = Files.newInputStream(testMetaDataFile); + String metadataDefaultPath = fileHashStore.storeMetadata(metadataStreamDup, pid); + metadataStreamDup.close(); // Calculate absolute path Path metadataTestFormatIdExpectedPath = fileHashStore.getExpectedPath( @@ -888,6 +923,7 @@ public void storeMetadata_fileSize() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, null); + metadataStream.close(); long writtenMetadataFile = Files.size(testMetaDataFile); long originalMetadataFie = Files.size(Paths.get(metadataPath)); @@ -922,6 +958,7 @@ public void storeMetadata_pidNull() { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, null, null); + metadataStream.close(); }); } } @@ -941,6 +978,7 @@ public void storeMetadata_pidEmpty() { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, "", null); + metadataStream.close(); }); } } @@ -960,6 +998,7 @@ public void storeMetadata_pidEmptySpaces() { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, " ", null); + metadataStream.close(); }); } } @@ -991,6 +1030,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path metadataPidExpectedPath = fileHashStore.getExpectedPath( @@ -1006,6 +1046,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path metadataPidExpectedPath = fileHashStore.getExpectedPath( @@ -1021,6 +1062,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path metadataPidExpectedPath = fileHashStore.getExpectedPath( @@ -1065,6 +1107,7 @@ public void retrieveObject() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Retrieve object InputStream objectCidInputStream = fileHashStore.retrieveObject(pid); @@ -1135,6 +1178,7 @@ public void retrieveObject_verifyContent() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Retrieve object InputStream objectCidInputStream; @@ -1185,6 +1229,7 @@ public void retrieveMetadata() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid, null); + metadataStream.close(); String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); @@ -1206,6 +1251,7 @@ public void retrieveMetadata_overload() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid, null); + metadataStream.close(); InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid); assertNotNull(metadataCidInputStream); @@ -1308,6 +1354,7 @@ public void retrieveMetadata_verifyContent() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid, null); + metadataStream.close(); String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); @@ -1385,6 +1432,7 @@ public void deleteObject_stringPidAll() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Get metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); @@ -1393,10 +1441,13 @@ public void deleteObject_stringPidAll() throws Exception { String metadataPathString = fileHashStore.storeMetadata( metadataStream, pid, testFormatId ); - String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); + InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); + String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStreamTwo, pid); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); Path metadataPath = Paths.get(metadataPathString); Path metadataDefaultPath = Paths.get(metadataDefaultPathString); + metadataStreamTwo.close(); // Confirm expected documents exist assertTrue(Files.exists(metadataPath)); @@ -1425,6 +1476,7 @@ public void deleteObject_stringPidNoMetadataDocs() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Get metadata file Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -1451,6 +1503,7 @@ public void deleteObject_pidType_objectDeleted() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); fileHashStore.deleteObject(fhsDeleteTypePid, pid); @@ -1481,6 +1534,7 @@ public void deleteObject_pidType_referencesDeleted() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -1507,6 +1561,7 @@ public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws E ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String pidExtra = "dou.test" + pid; String cid = objInfo.getCid(); fileHashStore.tagObject(pidExtra, cid); @@ -1536,6 +1591,7 @@ public void deleteObject_pidType_pidOrphan() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); String pidExtra = "dou.test" + pid; Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -1632,6 +1688,7 @@ public void deleteObject_cidType() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); String cid = objInfo.getCid(); fileHashStore.deleteObject(fhsDeleteTypeCid, cid); @@ -1662,6 +1719,7 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); fileHashStore.deleteObject(fhsDeleteTypeCid, cid); @@ -1735,6 +1793,7 @@ public void deleteMetadata() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid, null); + metadataStream.close(); String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); fileHashStore.deleteMetadata(pid, storeFormatId); @@ -1768,8 +1827,14 @@ public void deleteMetadata_overload() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.storeMetadata(metadataStream, pid, null); - fileHashStore.storeMetadata(metadataStream, pid, formatIdTwo); - fileHashStore.storeMetadata(metadataStream, pid, formatIdThree); + metadataStream.close(); + InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); + fileHashStore.storeMetadata(metadataStreamTwo, pid, formatIdTwo); + metadataStreamTwo.close(); + InputStream metadataStreamThree = Files.newInputStream(testMetaDataFile); + fileHashStore.storeMetadata(metadataStreamThree, pid, formatIdThree); + metadataStreamThree.close(); + fileHashStore.deleteMetadata(pid); @@ -1885,6 +1950,7 @@ public void getHexDigest() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); // Then get the checksum String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); @@ -1951,6 +2017,7 @@ public void getHexDigest_badAlgo() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); fileHashStore.getHexDigest(pid, "BLAKE2S"); }); @@ -1970,6 +2037,7 @@ public void findObject_cid() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cidRetrieved = fileHashStore.findObject(pid); assertEquals(cidRetrieved, objInfo.getCid()); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 86254213..2cd67e6b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -184,6 +184,7 @@ public void putObject_testHarness_id() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); @@ -202,6 +203,7 @@ public void putObject_objSize() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -220,6 +222,7 @@ public void putObject_testHarness_hexDigests() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + dataStream.close(); Map hexDigests = address.getHexDigests(); @@ -252,6 +255,7 @@ public void putObject_validateChecksumValue() throws Exception { ObjectMetadata address = fileHashStore.putObject( dataStream, pid, null, checksumCorrect, "MD2", -1 ); + dataStream.close(); String objCid = address.getCid(); // Get relative path @@ -276,6 +280,7 @@ public void putObject_additionalAlgo_correctChecksumValue() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, "MD2", null, null, -1); + dataStream.close(); String md2 = testData.pidData.get(pid).get("md2"); assertEquals(checksumCorrect, md2); @@ -295,6 +300,7 @@ public void putObject_incorrectChecksumValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", -1); + dataStream.close(); }); } @@ -310,6 +316,7 @@ public void putObject_emptyChecksumValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, null, " ", "MD2", -1); + dataStream.close(); }); } @@ -325,6 +332,7 @@ public void putObject_nullChecksumValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, null, null, "MD2", -1); + dataStream.close(); }); } @@ -340,6 +348,7 @@ public void putObject_emptyChecksumAlgorithmValue() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, null, "abc", " ", -1); + dataStream.close(); }); } @@ -354,6 +363,7 @@ public void putObject_nullChecksumAlgorithmValue() { Path testDataFile = testData.getTestFile(pid); InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, null, "abc", null, -1); + dataStream.close(); }); } @@ -372,6 +382,7 @@ public void putObject_objSizeCorrect() throws Exception { ObjectMetadata objInfo = fileHashStore.putObject( dataStream, pid, null, null, null, objectSize ); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) assertEquals(objectSize, objInfo.getSize()); @@ -392,6 +403,7 @@ public void putObject_objSizeIncorrect() { ObjectMetadata objInfo = fileHashStore.putObject( dataStream, pid, null, null, null, 1000 ); + dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -412,11 +424,13 @@ public void putObject_duplicateObject() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, null, null, null, -1); + dataStream.close(); // Try duplicate upload String pidTwo = pid + ".test"; InputStream dataStreamTwo = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStreamTwo, pidTwo, null, null, null, -1); + dataStreamTwo.close(); // Confirm there are no files in 'objects/tmp' directory Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -436,6 +450,7 @@ public void putObject_invalidAlgorithm() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, "SM2", null, null, -1); + dataStream.close(); }); } @@ -451,6 +466,7 @@ public void putObject_emptyAlgorithm() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.putObject(dataStream, pid, " ", null, null, -1); + dataStream.close(); }); } @@ -470,6 +486,7 @@ public void writeToTmpFileAndGenerateChecksums() throws Exception { Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( newTmpFile, dataStream, null, null ); + dataStream.close(); // Validate checksum values String md5 = testData.pidData.get(pid).get("md5"); @@ -502,6 +519,7 @@ public void writeToTmpFileAndGenerateChecksums_tmpFileSize() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, null); + dataStream.close(); long testDataFileSize = Files.size(testDataFile); long tmpFileSize = Files.size(newTmpFile.toPath()); @@ -528,6 +546,7 @@ public void writeToTmpFileAndGenerateChecksums_addAlgo() throws Exception { Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( newTmpFile, dataStream, addAlgo, null ); + dataStream.close(); // Validate checksum values String md2 = testData.pidData.get(pid).get("md2"); @@ -554,6 +573,7 @@ public void writeToTmpFileAndGenerateChecksums_checksumAlgo() throws Exception { Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( newTmpFile, dataStream, null, checksumAlgo ); + dataStream.close(); // Validate checksum values String sha512224 = testData.pidData.get(pid).get("sha512-224"); @@ -581,6 +601,7 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( newTmpFile, dataStream, addAlgo, checksumAlgo ); + dataStream.close(); // Validate checksum values String md2 = testData.pidData.get(pid).get("md2"); @@ -610,6 +631,7 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { fileHashStore.writeToTmpFileAndGenerateChecksums( newTmpFile, dataStream, addAlgo, null ); + dataStream.close(); }); } } @@ -693,6 +715,7 @@ public void putMetadata() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); + metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); @@ -730,6 +753,7 @@ public void putMetadata_pidNull() { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.putMetadata(metadataStream, null, null); + metadataStream.close(); }); } } @@ -749,6 +773,7 @@ public void putMetadata_pidEmpty() { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.putMetadata(metadataStream, "", null); + metadataStream.close(); }); } } @@ -768,6 +793,7 @@ public void putMetadata_pidEmptySpaces() { InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.putMetadata(metadataStream, " ", null); + metadataStream.close(); }); } } @@ -788,6 +814,7 @@ public void writeToTmpMetadataFile() throws Exception { boolean metadataWritten = fileHashStore.writeToTmpMetadataFile( newTmpFile, metadataStream ); + metadataStream.close(); assertTrue(metadataWritten); } } @@ -809,6 +836,7 @@ public void writeToTmpMetadataFile_tmpFileSize() throws Exception { boolean metadataWritten = fileHashStore.writeToTmpMetadataFile( newTmpFile, metadataStream ); + metadataStream.close(); assertTrue(metadataWritten); long tmpMetadataFileSize = Files.size(newTmpFile.toPath()); @@ -832,6 +860,7 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { // Write it to the tmpFile InputStream metadataStream = Files.newInputStream(testMetaDataFile); fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + metadataStream.close(); // Create InputStream to tmp File InputStream metadataStoredStream; @@ -852,6 +881,7 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { while ((bytesRead = metadataStoredStream.read(buffer)) != -1) { sha256.update(buffer, 0, bytesRead); } + metadataStoredStream.close(); } catch (IOException ioe) { ioe.printStackTrace(); @@ -881,12 +911,14 @@ public void isStringInRefsFile_pidFound() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); String pidTwo = pid + ".test"; InputStream dataStreamDup = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject( dataStreamDup, pidTwo, null, null, null, -1 ); + dataStreamDup.close(); String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); @@ -907,6 +939,7 @@ public void isStringInRefsFile_pidNotFound() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); @@ -926,6 +959,7 @@ public void deleteObjectByCid() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); // Store object only ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); String cid = objInfo.getCid(); // Try deleting the object @@ -958,6 +992,7 @@ public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); // Try deleting the object @@ -980,6 +1015,7 @@ public void getExpectedPath() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -1003,6 +1039,7 @@ public void getExpectedPath_objectPath() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); // Manually form the permanent address of the actual cid @@ -1033,6 +1070,7 @@ public void getExpectedPath_metadataPath() throws Exception { InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); Path storePath = Paths.get(fhsProperties.getProperty("storePath")); String storeFormatId = fhsProperties.getProperty("storeMetadataNamespace"); @@ -1074,6 +1112,7 @@ public void getExpectedPath_pidRefsPaths() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); // Manually form the permanent address of the actual cid @@ -1109,6 +1148,7 @@ public void getExpectedPath_cidRefsPaths() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); + dataStream.close(); String cid = objInfo.getCid(); // Manually form the permanent address of the actual cid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 5df9fc7b..848eb383 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -434,6 +434,7 @@ public void testExistingHashStoreConfiguration_missingYaml() { InputStream dataStream = Files.newInputStream(testDataFile); secondHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); } // Delete configuration diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 0c12576d..c69cbb1f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -408,6 +408,7 @@ public void verifyObject_correctValues() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); @@ -433,6 +434,7 @@ public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); // Get verifyObject args String expectedChecksum = testData.pidData.get(pid).get("md2"); @@ -456,6 +458,7 @@ public void verifyObject_unsupportedAlgo() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); assertThrows(UnsupportedHashAlgorithmException.class, () -> { fileHashStore.verifyObject( @@ -476,6 +479,7 @@ public void verifyObject_mismatchedValuesNonMatchingSize() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); @@ -502,6 +506,7 @@ public void verifyObject_mismatchedValuesNonMatchingChecksum() throws Exception InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); From f192ae41da94bb3d3321344e90d9c6762b45694c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 17 Jun 2024 09:04:41 -0700 Subject: [PATCH 292/553] Cleanup 'deleteObject' by deleting now irrelevant code related to retrieving metadata docs --- .../hashstore/filehashstore/FileHashStore.java | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0dcacff4..1f8bfcca 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1151,20 +1151,10 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti } try { - // Get list of metadata documents first, these will always be deleted if they exist - // and reduces the time spent in the synchronization block. We have locked the `pid` - // so we are safe to proceed with working on `pid` related metadata documents. - // Metadata directory - String pidHexDigest = - FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidRelativePath = - FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, - pidHexDigest); - + // Before we begin deletion process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. try { - // Before we begin deletion process, we look for the `cid` by calling - // `findObject` which will throw custom exceptions if there is an issue with - // the reference files, which help us determine the path to proceed with. cid = findObject(id); // If no exceptions are thrown, we proceed to synchronization based on the `cid` From 2ec1616e50e5ba4aedbc5fecd26220bf451e4a0a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 12:12:58 -0700 Subject: [PATCH 293/553] Revise 'HashStoreFactory' .getHashStore method exception occurence to call .getMessage() --- .../java/org/dataone/hashstore/HashStoreFactory.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index 013a9d6f..1a7b9640 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -53,13 +53,13 @@ public static HashStore getHashStore(String classPackage, Properties storeProper } catch (ClassNotFoundException cnfe) { String errMsg = "HashStoreFactory - Unable to find 'FileHashStore' classPackage: " - + classPackage + " - " + cnfe.fillInStackTrace(); + + classPackage + " - " + cnfe.getMessage(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (NoSuchMethodException nsme) { String errMsg = "HashStoreFactory - Constructor not found for 'FileHashStore': " - + classPackage + " - " + nsme.fillInStackTrace(); + + classPackage + " - " + nsme.getMessage(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); @@ -67,19 +67,19 @@ public static HashStore getHashStore(String classPackage, Properties storeProper String errMsg = "HashStoreFactory - Executing method does not have access to the definition of" + " the specified class , field, method or constructor. " + iae - .fillInStackTrace(); + .getMessage(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (InstantiationException ie) { String errMsg = "HashStoreFactory - Error instantiating 'FileHashStore'" - + "(likely related to `.newInstance()`): " + ie.fillInStackTrace(); + + "(likely related to `.newInstance()`): " + ie.getMessage(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (InvocationTargetException ite) { String errMsg = "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite - .fillInStackTrace(); + .getMessage(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); From 2d20d82116466c91f38a38ba326cd9f7fcb9b536 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 12:18:03 -0700 Subject: [PATCH 294/553] Fix malformed 'hashstore.yaml' base template (had redundant key value) --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1f8bfcca..0bce587d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -396,19 +396,18 @@ protected String buildHashStoreYamlString( + "# │ └── c1\n" + "# │ └── " + "8f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6\n\n" + "############### Format of the Metadata ###############\n" - + "store_sysmeta_namespace: \"http://ns.dataone" + ".org/service/types/v2.0\"\n\n" + + "store_metadata_namespace: \"%s\"\n" + "############### Hash Algorithms ###############\n" + "# Hash algorithm to use when calculating object's hex digest " + "for the permanent address\n" + "store_algorithm: \"%s\"\n" + "############### Hash Algorithms ###############\n" + "# Hash algorithm to use when calculating object's hex digest " - + "for the permanent address\n" + "store_metadata_namespace: \"%s\"\n" + + "for the permanent address\n" + "# The default algorithm list includes the hash algorithms " + "calculated when storing an\n" + "# object to disk and returned to the caller after successful " + "storage.\n" + "store_default_algo_list:\n" + "- \"MD5\"\n" + "- \"SHA-1\"\n" + "- \"SHA-256\"\n" - + "- \"SHA-384\"\n" + "- \"SHA-512\"\n", storeDepth, storeWidth, storeAlgorithm, - storeMetadataNamespace + + "- \"SHA-384\"\n" + "- \"SHA-512\"\n", storeDepth, storeWidth, storeMetadataNamespace, storeAlgorithm ); } From 44a55988d9cf90f7e5849219acf4ce841d60205b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 12:38:12 -0700 Subject: [PATCH 295/553] Fix incorrect extends exception type in 'NonMatchingChecksumException' and 'NonMatchingChecksumException' class --- .../hashstore/exceptions/NonMatchingChecksumException.java | 2 +- .../hashstore/exceptions/NonMatchingObjSizeException.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java index e5bf79f9..97d797d0 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java @@ -2,7 +2,7 @@ import java.io.IOException; -public class NonMatchingChecksumException extends IOException { +public class NonMatchingChecksumException extends IllegalArgumentException { public NonMatchingChecksumException(String message) { super(message); diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java index 532abdb9..73f853c4 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java @@ -2,7 +2,7 @@ import java.io.IOException; -public class NonMatchingObjSizeException extends IOException { +public class NonMatchingObjSizeException extends IllegalArgumentException { public NonMatchingObjSizeException(String message) { super(message); From 0bfb95b869c86a1e1de481cce2ea360de2f990f0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 12:40:37 -0700 Subject: [PATCH 296/553] Add missing javadocs to new custom exception classes --- .../hashstore/exceptions/NonMatchingChecksumException.java | 4 +++- .../hashstore/exceptions/NonMatchingObjSizeException.java | 4 +++- .../exceptions/UnsupportedHashAlgorithmException.java | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java index 97d797d0..3704144e 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java @@ -1,6 +1,8 @@ package org.dataone.hashstore.exceptions; -import java.io.IOException; +/** + * An exception thrown when a checksum does not match what is expected. + */ public class NonMatchingChecksumException extends IllegalArgumentException { diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java index 73f853c4..c42cf99c 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java @@ -1,6 +1,8 @@ package org.dataone.hashstore.exceptions; -import java.io.IOException; +/** + * An exception thrown when a data object size does not match what is expected. + */ public class NonMatchingObjSizeException extends IllegalArgumentException { diff --git a/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java index 0524d298..f196c77d 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java @@ -1,5 +1,9 @@ package org.dataone.hashstore.exceptions; +/** + * An exception thrown when a given algorithm is not supported by FileHashStore java + */ + public class UnsupportedHashAlgorithmException extends IllegalArgumentException { public UnsupportedHashAlgorithmException(String message) { From e2bceeee02cfd7cf075fe407a5e4ebfd95fb1c98 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 12:48:35 -0700 Subject: [PATCH 297/553] Add .printStackTrace() method to 'HashStoreFactory' exception scenarios to assist with debugging --- src/main/java/org/dataone/hashstore/HashStoreFactory.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index 1a7b9640..6c8f2fa7 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -75,12 +75,14 @@ public static HashStore getHashStore(String classPackage, Properties storeProper String errMsg = "HashStoreFactory - Error instantiating 'FileHashStore'" + "(likely related to `.newInstance()`): " + ie.getMessage(); logHashStore.error(errMsg); + ie.printStackTrace(); throw new HashStoreFactoryException(errMsg); } catch (InvocationTargetException ite) { String errMsg = "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite .getMessage(); logHashStore.error(errMsg); + ite.printStackTrace(); throw new HashStoreFactoryException(errMsg); } From 8f3d5d936bb0ba66b865b80d014cb3eaea97294d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 13:45:25 -0700 Subject: [PATCH 298/553] Update 'HashStoreFactory' exception scenarios to call '.getCause' instead of '.getMessage' to get more context --- .../java/org/dataone/hashstore/HashStoreFactory.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index 6c8f2fa7..a3d9b9ff 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -53,13 +53,13 @@ public static HashStore getHashStore(String classPackage, Properties storeProper } catch (ClassNotFoundException cnfe) { String errMsg = "HashStoreFactory - Unable to find 'FileHashStore' classPackage: " - + classPackage + " - " + cnfe.getMessage(); + + classPackage + " - " + cnfe.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (NoSuchMethodException nsme) { String errMsg = "HashStoreFactory - Constructor not found for 'FileHashStore': " - + classPackage + " - " + nsme.getMessage(); + + classPackage + " - " + nsme.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); @@ -67,22 +67,20 @@ public static HashStore getHashStore(String classPackage, Properties storeProper String errMsg = "HashStoreFactory - Executing method does not have access to the definition of" + " the specified class , field, method or constructor. " + iae - .getMessage(); + .getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (InstantiationException ie) { String errMsg = "HashStoreFactory - Error instantiating 'FileHashStore'" - + "(likely related to `.newInstance()`): " + ie.getMessage(); + + "(likely related to `.newInstance()`): " + ie.getCause(); logHashStore.error(errMsg); ie.printStackTrace(); throw new HashStoreFactoryException(errMsg); } catch (InvocationTargetException ite) { - String errMsg = "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite - .getMessage(); + String errMsg = "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite.getCause(); logHashStore.error(errMsg); - ite.printStackTrace(); throw new HashStoreFactoryException(errMsg); } From c35be2a42c97ab392c65ab383f9b831b979fa853 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 13:46:54 -0700 Subject: [PATCH 299/553] Refactor 'FileHashStore' init process when checking a given 'storePath' for potential conflicting HashStore folders to specifically check for 'objects', 'metadata' and 'refs' folders rather than any folder or files, and add new junit tests --- .../filehashstore/FileHashStore.java | 22 +++-- .../org/dataone/hashstore/HashStoreTest.java | 83 ++++++++++++++++++- 2 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0bce587d..b442d4b9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -278,19 +278,27 @@ protected void verifyHashStoreProperties( "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs); } else { - // Check if HashStore exists at the given store path (and is missing config) + // Check if HashStore related folders exist at the given store path logFileHashStore.debug( "FileHashStore - 'hashstore.yaml' not found, check store path for" + " objects and directories." ); if (Files.isDirectory(storePath)) { - if (FileHashStoreUtility.dirContainsFiles(storePath)) { - String errMsg = "FileHashStore - Missing 'hashstore.yaml' but directories" - + " and/or objects found."; - logFileHashStore.fatal(errMsg); - throw new IllegalStateException(errMsg); - + Path[] conflictingDirectories = { + storePath.resolve("objects"), + storePath.resolve("metadata"), + storePath.resolve("refs") + }; + for (Path dir : conflictingDirectories) { + if (Files.exists(dir) && Files.isDirectory(dir)) { + String errMsg = "FileHashStore - Unable to initialize HashStore." + + "`hashstore.yaml` is not found but potential conflicting" + + " directory exists: " + dir + ". Please choose a new folder or" + + " delete the conflicting directory and try again."; + logFileHashStore.fatal(errMsg); + throw new IllegalStateException(errMsg); + } } } logFileHashStore.debug( diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 9b9d9644..49a7b07f 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -1,6 +1,7 @@ package org.dataone.hashstore; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -28,7 +29,7 @@ public class HashStoreTest { @BeforeEach public void getHashStore() { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - Path rootDirectory = tempFolder.resolve("metacat"); + Path rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -72,7 +73,7 @@ public void isHashStore() { public void hashStore_classPackageNull() { assertThrows(HashStoreFactoryException.class, () -> { Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", "/test"); + storeProperties.setProperty("storePath", "/hashstore"); storeProperties.setProperty("storeDepth", "3"); storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); @@ -135,4 +136,82 @@ public void hashStore_storeObjects() throws Exception { assertEquals(objContentId, objInfo.getCid()); } } + + /** + * Confirm factory throws exception when a given folder is empty but an objects folder exists + */ + @Test + public void getHashStore_objFolderExists() throws Exception { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.resolve("doutest/hashstore"); + + Path conflictingObjDirectory = rootDirectory.resolve("objects"); + Files.createDirectories(rootDirectory.resolve("objects")); + assertTrue(Files.exists(conflictingObjDirectory)); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + assertThrows(HashStoreFactoryException.class, () -> { + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + }); + } + + /** + * Confirm factory throws exception when a given folder is empty but an objects folder exists + */ + @Test + public void getHashStore_metadataFolderExists() throws Exception { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.resolve("doutest/hashstore"); + + Path conflictingObjDirectory = rootDirectory.resolve("metadata"); + Files.createDirectories(rootDirectory.resolve("metadata")); + assertTrue(Files.exists(conflictingObjDirectory)); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + assertThrows(HashStoreFactoryException.class, () -> { + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + }); + } + + /** + * Confirm factory throws exception when a given folder is empty but an objects folder exists + */ + @Test + public void getHashStore_refsFolderExists() throws Exception { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.resolve("doutest/hashstore"); + + Path conflictingObjDirectory = rootDirectory.resolve("refs"); + Files.createDirectories(rootDirectory.resolve("refs")); + assertTrue(Files.exists(conflictingObjDirectory)); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + assertThrows(HashStoreFactoryException.class, () -> { + hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); + }); + } } From a1d0ef4705ab4cf783d9520f10079851acbfb89c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 13:51:03 -0700 Subject: [PATCH 300/553] Add and revise logging statements in 'verifyHashStoreProperties' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b442d4b9..29d49b71 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -276,14 +276,14 @@ protected void verifyHashStoreProperties( existingStoreAlgorithm); FileHashStoreUtility.checkObjectEquality( "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs); + logFileHashStore.info("FileHashStore - 'hashstore.yaml' found and HashStore verified"); } else { // Check if HashStore related folders exist at the given store path logFileHashStore.debug( - "FileHashStore - 'hashstore.yaml' not found, check store path for" - + " objects and directories." + "FileHashStore - 'hashstore.yaml' not found, checking store path for" + + " `/objects`, `/metadata` and `/refs` directories." ); - if (Files.isDirectory(storePath)) { Path[] conflictingDirectories = { storePath.resolve("objects"), From 7dba7399224f4320c07e19fcb27003b13b830053 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 13:54:55 -0700 Subject: [PATCH 301/553] Update junit test classes for 'storePath' and sysmeta namespace clarity --- .../java/org/dataone/hashstore/HashStoreClientTest.java | 8 ++++---- .../filehashstore/FileHashStoreInterfaceTest.java | 2 +- .../filehashstore/FileHashStoreProtectedTest.java | 4 ++-- .../hashstore/filehashstore/FileHashStorePublicTest.java | 2 +- .../filehashstore/FileHashStoreReferencesTest.java | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 8a28ec62..c89817ef 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -35,7 +35,7 @@ public class HashStoreClientTest { @BeforeEach public void getHashStore() { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - Path rootDirectory = tempFolder.resolve("metacat"); + Path rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -43,7 +43,7 @@ public void getHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); try { @@ -117,7 +117,7 @@ public Path getObjectAbsPath(String id, String objType) { public void client_createHashStore() throws Exception { String optCreateHashstore = "-chs"; String optStore = "-store"; - String optStorePath = tempFolder + "/metacat"; + String optStorePath = tempFolder + "/hashstore"; String optStoreDepth = "-dp"; String optStoreDepthValue = "3"; String optStoreWidth = "-wp"; @@ -125,7 +125,7 @@ public void client_createHashStore() throws Exception { String optAlgo = "-ap"; String optAlgoValue = "SHA-256"; String optFormatId = "-nsp"; - String optFormatIdValue = "http://ns.dataone.org/service/types/v2.0"; + String optFormatIdValue = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; String[] args = {optCreateHashstore, optStore, optStorePath, optStoreDepth, optStoreDepthValue, optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, optFormatId, optFormatIdValue}; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 4949093b..3f5575e0 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -65,7 +65,7 @@ public class FileHashStoreInterfaceTest { */ @BeforeEach public void initializeFileHashStore() { - rootDirectory = tempFolder.resolve("metacat"); + rootDirectory = tempFolder.resolve("hashstore"); fhsDeleteTypePid = HashStoreIdTypes.pid.getName(); fhsDeleteTypeCid = HashStoreIdTypes.cid.getName(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 2cd67e6b..3ead2bb6 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -39,7 +39,7 @@ public class FileHashStoreProtectedTest { */ @BeforeEach public void initializeFileHashStore() { - Path rootDirectory = tempFolder.resolve("metacat"); + Path rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -67,7 +67,7 @@ public void initializeFileHashStore() { * Non-test method using to generate a temp file */ public File generateTemporaryFile() throws Exception { - Path directory = tempFolder.resolve("metacat"); + Path directory = tempFolder.resolve("hashstore"); // newFile return FileHashStoreUtility.generateTmpFile("testfile", directory); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 848eb383..1c327be1 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -37,7 +37,7 @@ public class FileHashStorePublicTest { @BeforeEach public void initializeFileHashStore() { Path root = tempFolder; - rootDirectory = root.resolve("metacat"); + rootDirectory = root.resolve("hashstore"); objStringFull = rootDirectory.resolve("objects"); objTmpStringFull = rootDirectory.resolve("objects/tmp"); metadataStringFull = rootDirectory.resolve("metadata"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index c69cbb1f..83535d6f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -42,7 +42,7 @@ public class FileHashStoreReferencesTest { */ @BeforeEach public void initializeFileHashStore() { - rootDirectory = tempFolder.resolve("metacat"); + rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); From d91aa8fafd9d6c6d8aebfdb34eff9697deec0e3c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 14:01:21 -0700 Subject: [PATCH 302/553] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6974b273..505caa86 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ - Contact us: support@dataone.org - [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) -HashStore is a server-side java library implementing a content-based identifier file system for storing and accessing data and metadata for DataONE services. The package is used in DataONE system components that need direct, filesystem-based access to data objects, their system metadata, and extended metadata about the objects. This package is a core component of the [DataONE federation](https://dataone.org), and supports large-scale object storage for a variety of repositories, including the [KNB Data Repository](http://knb.ecoinformatics.org), the [NSF Arctic Data Center](https://arcticdata.io/catalog/), the [DataONE search service](https://search.dataone.org), and other repositories. +HashStore is a server-side java library that implements an object storage file system for storing and accessing data and metadata for DataONE services. The package is used in DataONE system components that need direct, filesystem-based access to data objects, their system metadata, and extended metadata about the objects. This package is a core component of the [DataONE federation](https://dataone.org), and supports large-scale object storage for a variety of repositories, including the [KNB Data Repository](http://knb.ecoinformatics.org), the [NSF Arctic Data Center](https://arcticdata.io/catalog/), the [DataONE search service](https://search.dataone.org), and other repositories. DataONE in general, and HashStore in particular, are open source, community projects. We [welcome contributions](https://github.com/DataONEorg/hashstore-java/blob/main/CONTRIBUTING.md) in many forms, including code, graphics, documentation, bug reports, testing, etc. Use the [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) to discuss these contributions with us. @@ -17,7 +17,7 @@ Documentation is a work in progress, and can be found on the [Metacat repository ## HashStore Overview -HashStore is a content-addressable file management system that utilizes the content identifier of an object to address files. The system stores both objects, references (refs) and metadata in its respective directories and provides an API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. +HashStore is an object storage system that provides persistent file-based storage using content hashes to de-duplicate data. The system stores both objects, references (refs) and metadata in its respective directories and utilizes an identifier-based API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. ###### Public API Methods - storeObject @@ -188,7 +188,7 @@ $ mvn clean package -Dmaven.test.skip=true # Get help $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -h -# Step 2: +# Step 2: Determine where your hashstore should live (ex. `/var/hashstore`) ## Create a HashStore (long option) $ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 From d1d048e7e34bf93c18f3f4415eb477d8dadf9ace Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 14:48:37 -0700 Subject: [PATCH 303/553] Update 'pom.xml' to create a specific shaded-jar file which contains dependencies, and normal .jar file which only contains 'hashstore-java' library --- pom.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 532e1c7b..e6501804 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ org.dataone hashstore - 1.0-SNAPSHOT + 1.0 hashstore https://github.com/DataONEorg/hashstore-java @@ -86,6 +86,8 @@ shade + ${basedir}/target/${project.name}-${project.version}-shaded.jar + ${basedir} From d49ce47f874955f27462732165878dc204c53527 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 18 Jun 2024 14:58:24 -0700 Subject: [PATCH 304/553] Re-add 'SNAPSHOT' to version name in 'pom.xml' to represent project is still in development state --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e6501804..56b9668e 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ org.dataone hashstore - 1.0 + 1.0-SNAPSHOT hashstore https://github.com/DataONEorg/hashstore-java From eb01034c8952f55c856fe4238a44ffdcd7390b81 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 09:50:42 -0700 Subject: [PATCH 305/553] Initial update to 'findObject' to return a map, update the 'HashStore' interface and fix affected junit tests --- src/main/java/org/dataone/hashstore/HashStore.java | 6 ++++-- .../java/org/dataone/hashstore/HashStoreClient.java | 4 ++-- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++++---- .../filehashstore/FileHashStoreInterfaceTest.java | 4 ++-- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 94429365..cb08105d 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.io.InputStream; import java.security.NoSuchAlgorithmException; +import java.util.Map; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; @@ -162,7 +163,8 @@ public void verifyObject( UnsupportedHashAlgorithmException, IOException; /** - * Checks whether an object referenced by a pid exists and returns the content identifier. + * Checks whether an object referenced by a pid exists and returns a map containing the + * absolute path to the object, pid refs file, cid refs file and sysmeta document. * * @param pid Authority-based identifier * @return Content identifier (cid) @@ -177,7 +179,7 @@ public void verifyObject( * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the * expected pid is not found in the cid refs file. */ - public String findObject(String pid) throws NoSuchAlgorithmException, IOException, + public Map findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException; /** diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 65d97359..2d8c27a4 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -146,8 +146,8 @@ public static void main(String[] args) throws Exception { String pid = cmd.getOptionValue("pid"); FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - String cid = hashStore.findObject(pid); - System.out.println(cid); + Map objInfoMap = hashStore.findObject(pid); + System.out.println(objInfoMap.get("cid")); } else if (cmd.hasOption("storeobject")) { System.out.println("Storing object"); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 29d49b71..341b82d3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -813,7 +813,7 @@ public void verifyObject( } @Override - public String findObject(String pid) throws NoSuchAlgorithmException, IOException, + public Map findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); @@ -845,7 +845,9 @@ public String findObject(String pid) throws NoSuchAlgorithmException, IOExceptio ); Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); if (Files.exists(realPath)) { - return cid; + Map objInfoMap = new HashMap<>(); + objInfoMap.put("cid", cid); + return objInfoMap; } else { String errMsg = "FileHashStore.findObject - Object with cid: " + cid @@ -1162,7 +1164,8 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - cid = findObject(id); + Map objInfoMap = findObject(id); + cid = objInfoMap.get("cid"); // If no exceptions are thrown, we proceed to synchronization based on the `cid` // Multiple threads may access the cid reference file (which contains a list of @@ -1505,7 +1508,8 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // Find the content identifier if (algorithm.equals(OBJECT_STORE_ALGORITHM)) { - return findObject(pid); + Map objInfoMap = findObject(pid); + return objInfoMap.get("cid"); } else { // Get permanent address of the pid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 3f5575e0..ac21a33d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -2039,8 +2039,8 @@ public void findObject_cid() throws Exception { ); dataStream.close(); - String cidRetrieved = fileHashStore.findObject(pid); - assertEquals(cidRetrieved, objInfo.getCid()); + Map objInfoMap = fileHashStore.findObject(pid); + assertEquals(objInfoMap.get("cid"), objInfo.getCid()); } } From 738c2f32ab8869e7999fe4be9a9a179889b5b922 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 10:07:43 -0700 Subject: [PATCH 306/553] Add 'object_path' key to map returned from 'findObject' and add new junit test --- .../filehashstore/FileHashStore.java | 1 + .../FileHashStoreInterfaceTest.java | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 341b82d3..0692895f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -847,6 +847,7 @@ public Map findObject(String pid) throws NoSuchAlgorithmExceptio if (Files.exists(realPath)) { Map objInfoMap = new HashMap<>(); objInfoMap.put("cid", cid); + objInfoMap.put("object_path", realPath.toString()); return objInfoMap; } else { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index ac21a33d..c0e5bc1b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -2044,6 +2044,35 @@ public void findObject_cid() throws Exception { } } + /** + * Check that findObject returns cid as expected. + */ + @Test + public void findObject_cidPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + Map objInfoMap = fileHashStore.findObject(pid); + String objectPath = objInfoMap.get("object_path"); + + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); + + assertEquals(objectPath, realPath.toString()); + } + } + /** * Confirm findObject throws exception when cid object does not exist but reference * files exist. From fe21ef9996d52a2f75c683e21b5254db4984d4ac Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 12:03:05 -0700 Subject: [PATCH 307/553] Add remaining refs paths and sysmeta path keys and values to map returned from 'findObject', and add new junit tests --- .../filehashstore/FileHashStore.java | 11 +++ .../FileHashStoreInterfaceTest.java | 94 ++++++++++++++++++- 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0692895f..69cdd7d4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -848,6 +848,17 @@ public Map findObject(String pid) throws NoSuchAlgorithmExceptio Map objInfoMap = new HashMap<>(); objInfoMap.put("cid", cid); objInfoMap.put("object_path", realPath.toString()); + objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); + objInfoMap.put("cid_refs_path", absCidRefsPath.toString()); + // If the default system metadata exists, include it + Path metadataPidExpectedPath = getExpectedPath( + pid, "metadata", DEFAULT_METADATA_NAMESPACE + ); + if (Files.exists(metadataPidExpectedPath)) { + objInfoMap.put("sysmeta_path", metadataPidExpectedPath.toString()); + } else { + objInfoMap.put("sysmeta_path", "Does not exist"); + } return objInfoMap; } else { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index c0e5bc1b..b44e3874 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -2045,7 +2045,7 @@ public void findObject_cid() throws Exception { } /** - * Check that findObject returns cid as expected. + * Check that findObject returns the path to the object as expected. */ @Test public void findObject_cidPath() throws Exception { @@ -2073,6 +2073,98 @@ public void findObject_cidPath() throws Exception { } } + /** + * Check that findObject returns the absolute path to the pid and cid refs file + */ + @Test + public void findObject_refsPaths() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + Map objInfoMap = fileHashStore.findObject(pid); + String cidRefsPath = objInfoMap.get("cid_refs_path"); + String pidRefsPath = objInfoMap.get("pid_refs_path"); + + Path cidRefsFilePath = fileHashStore.getExpectedPath(objInfo.getCid(), "refs", "cid"); + Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + + assertEquals(cidRefsPath, cidRefsFilePath.toString()); + assertEquals(pidRefsPath, pidRefsFilePath.toString()); + } + } + + /** + * Check that findObject returns the absolute path to sysmeta document if it exists + */ + @Test + public void findObject_sysmetaPath_exists() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + // Store Object + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + // Store Metadata + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); + System.out.println(metadataPath); + + + Map objInfoMap = fileHashStore.findObject(pid); + String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path sysmetaPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace + ); + System.out.println(sysmetaPath); + + assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); + } + } + + /** + * Check that findObject returns "Does not exist." when there is no sysmeta for the pid. + */ + @Test + public void findObject_sysmetaPath_doesNotExist() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + + Map objInfoMap = fileHashStore.findObject(pid); + String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path sysmetaPath = fileHashStore.getExpectedPath( + pid, "metadata", storeMetadataNamespace + ); + + assertEquals(objInfoSysmetaPath, "Does not exist"); + } + } + /** * Confirm findObject throws exception when cid object does not exist but reference * files exist. From 8f7f26db2ad5d74404a080d7c1ddd73ed73139b5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 12:43:11 -0700 Subject: [PATCH 308/553] Update variable names in 'findObject' method, update 'HashStoreClient's 'findObject' method to print out entirety of map received, and update junit tests --- .../dataone/hashstore/HashStoreClient.java | 16 +++++- .../filehashstore/FileHashStore.java | 4 +- .../hashstore/HashStoreClientTest.java | 56 +++++++++++++++++-- .../FileHashStoreInterfaceTest.java | 2 +- 4 files changed, 69 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 2d8c27a4..d049b25b 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -147,7 +147,21 @@ public static void main(String[] args) throws Exception { FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); Map objInfoMap = hashStore.findObject(pid); - System.out.println(objInfoMap.get("cid")); + String cid = objInfoMap.get("cid"); + String cidPath = objInfoMap.get("cid_object_path"); + String cidRefsPath = objInfoMap.get("cid_refs_path"); + String pidRefsPath = objInfoMap.get("pid_refs_path"); + String sysmetaPath = objInfoMap.get("sysmeta_path"); + System.out.println("Content Identifier:"); + System.out.println(cid); + System.out.println("Object Path:"); + System.out.println(cidPath); + System.out.println("Cid Reference File Path:"); + System.out.println(cidRefsPath); + System.out.println("Pid Reference File Path:"); + System.out.println(pidRefsPath); + System.out.println("Sysmeta Path:"); + System.out.println(sysmetaPath); } else if (cmd.hasOption("storeobject")) { System.out.println("Storing object"); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 69cdd7d4..c0453960 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -847,9 +847,9 @@ public Map findObject(String pid) throws NoSuchAlgorithmExceptio if (Files.exists(realPath)) { Map objInfoMap = new HashMap<>(); objInfoMap.put("cid", cid); - objInfoMap.put("object_path", realPath.toString()); - objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); + objInfoMap.put("cid_object_path", realPath.toString()); objInfoMap.put("cid_refs_path", absCidRefsPath.toString()); + objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); // If the default system metadata exists, include it Path metadataPidExpectedPath = getExpectedPath( pid, "metadata", DEFAULT_METADATA_NAMESPACE diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index c89817ef..1018a4d7 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -93,19 +93,40 @@ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String di * Utility method to get absolute path of a given object and objType * ("objects" or "metadata"). */ - public Path getObjectAbsPath(String id, String objType) { + public Path getObjectAbsPath(String id, String objType) throws Exception { + String storeAlgo = hsProperties.getProperty("storeAlgorithm"); int shardDepth = Integer.parseInt(hsProperties.getProperty("storeDepth")); int shardWidth = Integer.parseInt(hsProperties.getProperty("storeWidth")); - // Get relative path - String objCidShardString = this.getHierarchicalPathString(shardDepth, shardWidth, id); // Get absolute path Path storePath = Paths.get(hsProperties.getProperty("storePath")); Path absPath = null; if (objType.equals("object")) { + // Get relative path + String objCidShardString = getHierarchicalPathString(shardDepth, shardWidth, id); absPath = storePath.resolve("objects/" + objCidShardString); } if (objType.equals("metadata")) { - absPath = storePath.resolve("metadata/" + objCidShardString); + // Get pid metadata directory hash(pid) + String pidHash = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); + String pidMetadataDirectory = getHierarchicalPathString(shardDepth, shardWidth, pidHash); + // Get document name hash(pid+formatId) + String metadataDocHash = + FileHashStoreUtility.getPidHexDigest(id + hsProperties.getProperty( + "storeMetadataNamespace"), storeAlgo); + absPath = storePath.resolve("metadata").resolve(pidMetadataDirectory).resolve(metadataDocHash); + } + if (objType.equals("cid")) { + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + shardDepth, shardWidth, id + ); + absPath = storePath.resolve("refs/cids").resolve(pidRelativePath); + } + if (objType.equals("pid")) { + String hashId = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + shardDepth, shardWidth, hashId + ); + absPath = storePath.resolve("refs/pids").resolve(pidRelativePath); } return absPath; } @@ -461,11 +482,17 @@ public void client_findObject() throws Exception { PrintStream old = System.out; System.setOut(ps); + // Store object String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); hashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); + // Store metadata + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + hashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); // Call client String optFindObject = "-findobject"; @@ -477,6 +504,25 @@ public void client_findObject() throws Exception { HashStoreClient.main(args); String contentIdentifier = testData.pidData.get(pid).get("sha256"); + Path absObjPath = getObjectAbsPath(testData.pidData.get(pid).get("sha256"), "object"); + Path sysMetaPath = getObjectAbsPath(pid, "metadata"); + String storeAlgo = hsProperties.getProperty("storeAlgorithm").toLowerCase().replace( + "-", ""); + Path cidRefsPath = getObjectAbsPath( + testData.pidData.get(pid).get(storeAlgo), "cid" + );; + Path pidRefsPath = getObjectAbsPath( + pid, "pid" + ); + + String expectedOutPutPt1 = "Content Identifier:\n" + contentIdentifier + "\n"; + String expectedOutPutPt2 = "Object Path:\n" + absObjPath.toString() + "\n"; + String expectedOutPutPt3 = "Cid Reference File Path:\n" + cidRefsPath + "\n"; + String expectedOutPutPt4 = "Pid Reference File Path:\n" + pidRefsPath + "\n"; + String expectedOutPutPt5 = "Sysmeta Path:\n" + sysMetaPath; + String expectedOutPutFull = + expectedOutPutPt1 + expectedOutPutPt2 + expectedOutPutPt3 + expectedOutPutPt4 + expectedOutPutPt5;; + // Put things back System.out.flush(); @@ -484,7 +530,7 @@ public void client_findObject() throws Exception { // Confirm correct content identifier has been saved String pidStdOut = outputStream.toString(); - assertEquals(contentIdentifier, pidStdOut.trim()); + assertEquals(expectedOutPutFull, pidStdOut.trim()); } } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index b44e3874..1e5a4888 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -2062,7 +2062,7 @@ public void findObject_cidPath() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); Map objInfoMap = fileHashStore.findObject(pid); - String objectPath = objInfoMap.get("object_path"); + String objectPath = objInfoMap.get("cid_object_path"); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, objInfo.getCid() From 2149abd01a4b04d192c315aa7519133816561817 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 13:03:54 -0700 Subject: [PATCH 309/553] Clean up 'TestDataHarness' class for clarity, fix bug in 'HashStoreClientTest' and update affected junit tests --- .../hashstore/HashStoreClientTest.java | 12 ++++---- .../FileHashStoreInterfaceTest.java | 2 +- .../FileHashStoreProtectedTest.java | 2 +- .../hashstore/testdata/TestDataHarness.java | 29 +++++-------------- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 1018a4d7..8437df7a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -91,7 +91,7 @@ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String di /** * Utility method to get absolute path of a given object and objType - * ("objects" or "metadata"). + * ("objects", "metadata", "cid", or "pid"). */ public Path getObjectAbsPath(String id, String objType) throws Exception { String storeAlgo = hsProperties.getProperty("storeAlgorithm"); @@ -109,7 +109,7 @@ public Path getObjectAbsPath(String id, String objType) throws Exception { // Get pid metadata directory hash(pid) String pidHash = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); String pidMetadataDirectory = getHierarchicalPathString(shardDepth, shardWidth, pidHash); - // Get document name hash(pid+formatId) + // Get sysmeta name hash(pid+default_formatId) String metadataDocHash = FileHashStoreUtility.getPidHexDigest(id + hsProperties.getProperty( "storeMetadataNamespace"), storeAlgo); @@ -368,7 +368,7 @@ public void client_deleteObjects() throws Exception { HashStoreClient.main(args); // Confirm object was deleted - Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); + Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("sha256"), "object"); assertFalse(Files.exists(absPath)); // Put things back @@ -412,10 +412,8 @@ public void client_deleteMetadata() throws Exception { HashStoreClient.main(args); // Confirm metadata was deleted - Path absPath = getObjectAbsPath( - testData.pidData.get(pid).get("metadata_cid"), "metadata" - ); - assertFalse(Files.exists(absPath)); + Path sysmetaPath = getObjectAbsPath(pid, "metadata"); + assertFalse(Files.exists(sysmetaPath)); // Put things back System.out.flush(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 1e5a4888..e0ec0d03 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1391,7 +1391,7 @@ public void retrieveMetadata_verifyContent() throws Exception { String sha256MetadataDigest = DatatypeConverter.printHexBinary(sha256.digest()) .toLowerCase(); String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_sha256" + "metadata_cid_sha256" ); assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 3ead2bb6..d18adec2 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -891,7 +891,7 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_sha256" + "metadata_cid_sha256" ); assertEquals(sha256Digest, sha256MetadataDigestFromTestData); diff --git a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java index a106401a..2beb0ab8 100644 --- a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java +++ b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java @@ -10,11 +10,10 @@ * This class returns the test data expected hex digest values * * Notes: - * - "object_cid" is the SHA-256 hash of the pid * - algorithms without any prefixes are the algorithm hash of the pid's respective data object * content - * - "metadata_sha256" is the hash of the pid's respective metadata object content identifier - * - "metacat_cid" is the sha256 hash of the pid + formatId + * - "metadata_cid_sha256" is sha256 content identifier of the pid's metadata object + * - "sysmeta_address_sha256" is the sha256 hash of the pid + formatId * */ public class TestDataHarness { @@ -26,10 +25,6 @@ public TestDataHarness() { Map> pidsAndHexDigests = new HashMap<>(); Map values1 = new HashMap<>(); - values1.put( - "object_cid", - "0d555ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e" - ); values1.put("md2", "b33c730ac5e36b2b886a9cd14552f42e"); values1.put("md5", "db91c910a3202478c8def1071c54aae5"); values1.put("sha1", "1fe86e3c8043afa4c70857ca983d740ad8501ccd"); @@ -48,21 +43,17 @@ public TestDataHarness() { "sha512-224", "107f9facb268471de250625440b6c8b7ff8296fbe5d89bed4a61fd35" ); values1.put( - "metadata_cid", + "sysmeta_address_sha256", "323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7" ); values1.put( - "metadata_sha256", + "metadata_cid_sha256", "158d7e55c36a810d7c14479c952a4d0b370f2b844808f2ea2b20d7df66768b04" ); values1.put("size", "39993"); pidsAndHexDigests.put("doi:10.18739/A2901ZH2M", values1); Map values2 = new HashMap<>(); - values2.put( - "object_cid", - "a8241925740d5dcd719596639e780e0a090c9d55a5d0372b0eaf55ed711d4edf" - ); values2.put("md2", "9c25df1c8ba1d2e57bb3fd4785878b85"); values2.put("md5", "f4ea2d07db950873462a064937197b0f"); values2.put("sha1", "3d25436c4490b08a2646e283dada5c60e5c0539d"); @@ -81,21 +72,17 @@ public TestDataHarness() { "sha512-224", "7a2b22e36ced9e91cf8cdf6971897ec4ae21780e11d1c3903011af33" ); values2.put( - "metadata_cid", + "sysmeta_address_sha256", "ddf07952ef28efc099d10d8b682480f7d2da60015f5d8873b6e1ea75b4baf689" ); values2.put( - "metadata_sha256", + "metadata_cid_sha256", "d87c386943ceaeba5644c52b23111e4f47972e6530df0e6f0f41964b25855b08" ); values2.put("size", "8724"); pidsAndHexDigests.put("jtao.1700.1", values2); Map values3 = new HashMap<>(); - values3.put( - "object_cid", - "7f5cc18f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6" - ); values3.put("md2", "9f2b06b300f661ce4398006c41d8aa88"); values3.put("md5", "e1932fc75ca94de8b64f1d73dc898079"); values3.put("sha1", "c6d2a69a3f5adaf478ba796c114f57b990cf7ad1"); @@ -114,11 +101,11 @@ public TestDataHarness() { "sha512-224", "e1789a91c9df334fdf6ee5d295932ad96028c426a18b17016a627099" ); values3.put( - "metadata_cid", + "sysmeta_address_sha256", "9a2e08c666b728e6cbd04d247b9e556df3de5b2ca49f7c5a24868eb27cddbff2" ); values3.put( - "metadata_sha256", + "metadata_cid_sha256", "27003e07f2ab374020de73298dd24a1d8b1b57647b8fa3c49db00f8c342afa1d" ); values3.put("size", "18699"); From ccc2a6e94ca884a6cbb7ae395ce4df6d0091604c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 13:19:15 -0700 Subject: [PATCH 310/553] Remove unnecessary return statement in 'ObjectMetadata' class for method '.setPid' and change return type to 'void' --- src/main/java/org/dataone/hashstore/ObjectMetadata.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 159b039c..763d4952 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -42,12 +42,9 @@ public String getPid() { /** * Set the persistent identifier - * - * @return pid */ - public String setPid(String pid) { + public void setPid(String pid) { this.pid = pid; - return pid; } /** From 7c3bf4c760b4579faa3638d299eee7e74ac85fdd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 19 Jun 2024 13:24:29 -0700 Subject: [PATCH 311/553] Cleanup test classes for typos, unused imports and other minor warnings --- .../java/org/dataone/hashstore/HashStoreClientTest.java | 4 ++-- .../java/org/dataone/hashstore/HashStoreRunnable.java | 2 +- src/test/java/org/dataone/hashstore/HashStoreTest.java | 1 - .../filehashstore/FileHashStoreInterfaceTest.java | 1 - .../filehashstore/FileHashStoreProtectedTest.java | 3 +-- .../filehashstore/FileHashStoreReferencesTest.java | 8 ++++---- 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 8437df7a..f272932b 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -508,7 +508,7 @@ public void client_findObject() throws Exception { "-", ""); Path cidRefsPath = getObjectAbsPath( testData.pidData.get(pid).get(storeAlgo), "cid" - );; + ); Path pidRefsPath = getObjectAbsPath( pid, "pid" ); @@ -519,7 +519,7 @@ public void client_findObject() throws Exception { String expectedOutPutPt4 = "Pid Reference File Path:\n" + pidRefsPath + "\n"; String expectedOutPutPt5 = "Sysmeta Path:\n" + sysMetaPath; String expectedOutPutFull = - expectedOutPutPt1 + expectedOutPutPt2 + expectedOutPutPt3 + expectedOutPutPt4 + expectedOutPutPt5;; + expectedOutPutPt1 + expectedOutPutPt2 + expectedOutPutPt3 + expectedOutPutPt4 + expectedOutPutPt5; // Put things back diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 7006dcfe..168b012a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -16,7 +16,7 @@ public class HashStoreRunnable implements Runnable { public static final int storeObject = 1; public static final int deleteObject = 2; - private HashStore hashstore = null; + private HashStore hashstore; private int publicAPIMethod; private String pid; private InputStream objStream; diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 49a7b07f..58cdb68a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -1,7 +1,6 @@ package org.dataone.hashstore; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e0ec0d03..856e0d7c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -35,7 +35,6 @@ import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; -import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index d18adec2..eacc007c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1069,7 +1069,7 @@ public void getExpectedPath_metadataPath() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + fileHashStore.storeMetadata(metadataStream, pid); metadataStream.close(); Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -1155,7 +1155,6 @@ public void getExpectedPath_cidRefsPaths() throws Exception { Path storePath = Paths.get(fhsProperties.getProperty("storePath")); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); // Cid refs file String objShardString = FileHashStoreUtility.getHierarchicalPathString( diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 83535d6f..957c07b5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -133,7 +133,7 @@ public void tagObject_refsFileAlreadyExists() throws Exception { // Should not throw any exceptions, everything is where it's supposed to be. fileHashStore.tagObject(pid, cid); - // Confirm that there is only 1 of each refs file + // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); @@ -162,7 +162,7 @@ public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileFound() /** - * Check tagObject overwrites a oprhaned pid refs file. + * Check tagObject overwrites an orphaned pid refs file. */ @Test public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound() @@ -182,7 +182,7 @@ public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); fileHashStore.tagObject(pid, cid); - // There should only be 1 of each refs file + // There should only be 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); @@ -203,7 +203,7 @@ public void tagObject_pidRefsFileFound_cidRefsFileNotFound() throws Exception { Files.delete(cidRefsFilePath); fileHashStore.tagObject(pid, cid); - // Confirm that there is only 1 of each refs file + // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); assertEquals(1, pidRefsFiles.length); From 952997399cc613575ca0af8c2c6c08687ae9ca07 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 20 Jun 2024 09:13:25 -0700 Subject: [PATCH 312/553] Update 'verifyHashStoreProperties' javadoc and debug message --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c0453960..4fd9a8c0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -221,8 +221,8 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep * * If `hashstore.yaml` exists, it will retrieve its properties and compare them with the given * values; and if there is a mismatch, an exception will be thrown. If not, it will look to see - * if any directories/files exist in the given store path and throw an exception if any file or - * directory is found. + * if any relevant HashStore directories exist (i.e. '/objects', '/metadata', '/refs') in the + * given store path and throw an exception if any of those directories exist. * * @param storePath Path where HashStore will store objects * @param storeDepth Depth of directories @@ -257,7 +257,7 @@ protected void verifyHashStoreProperties( // Check to see if configuration exists before initializing Path hashstoreYamlPredictedPath = Paths.get(storePath + "/hashstore.yaml"); if (Files.exists(hashstoreYamlPredictedPath)) { - logFileHashStore.debug("FileHashStore - 'hashstore.yaml' found, verifying properties."); + logFileHashStore.debug("FileHashStore - 'hashstore.yaml' found, checking properties."); HashMap hsProperties = loadHashStoreYaml(storePath); int existingStoreDepth = (int) hsProperties.get(HashStoreProperties.storeDepth.name()); @@ -302,9 +302,7 @@ protected void verifyHashStoreProperties( } } logFileHashStore.debug( - "FileHashStore - 'hashstore.yaml' not found and store path" - + " not yet initialized." - ); + "FileHashStore - 'hashstore.yaml' not found. Supplied properties accepted."); } } From f1211ce8ce46270290d09499e240d573ec9353d1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 20 Jun 2024 09:18:13 -0700 Subject: [PATCH 313/553] Update README.md hashstore layout formatting --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 505caa86..587869de 100644 --- a/README.md +++ b/README.md @@ -144,12 +144,12 @@ These reference files are implemented in HashStore underneath the hood with no e .../metacat/hashstore ├── hashstore.yaml └── objects -| ├── 4d +| └── 4d | └── 19 | └── 81 | └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c └── metadata -| ├── 0d +| └── 0d | └── 55 | └── 55 | └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e @@ -157,12 +157,12 @@ These reference files are implemented in HashStore underneath the hood with no e | └── sha256(pid+formatId_annotations) └── refs ├── cids - | ├── 4d + | └── 4d | └── 19 | └── 81 | └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c └── pids - ├── 0d + └── 0d └── 55 └── 55 └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e From cdab02ae29b1633d47f9eca7d336a9a661d2fc79 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 20 Jun 2024 09:32:02 -0700 Subject: [PATCH 314/553] Clean up 'FileHashStore' Pt.1 (debug message typo), spacing and revise 'validateTmpObject' to throw newly added descriptive custom exceptions --- .../hashstore/filehashstore/FileHashStore.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4fd9a8c0..e376a17b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -385,7 +385,6 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { protected String buildHashStoreYamlString( int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace ) { - return String.format( "# Default configuration variables for HashStore\n\n" + "############### Directory Structure ###############\n" @@ -474,14 +473,14 @@ private ObjectMetadata syncPutObject( throw new RuntimeException(errMsg); } logFileHashStore.debug( - "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid + "FileHashStore.syncPutObject - Synchronizing objectLockedIds for pid: " + pid ); objectLockedIds.add(pid); } try { logFileHashStore.debug( - "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid + "FileHashStore.syncPutObject - calling .putObject() to store pid: " + pid + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum + ". checksumAlgorithm: " + checksumAlgorithm ); @@ -1674,7 +1673,7 @@ protected ObjectMetadata putObject( private void validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, Map hexDigests, long objSize, long storedObjFileSize - ) throws NoSuchAlgorithmException, IOException { + ) throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException { if (objSize > 0) { if (objSize != storedObjFileSize) { // Delete tmp File @@ -1688,7 +1687,7 @@ private void validateTmpObject( + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new NonMatchingObjSizeException(errMsg); } String errMsg = @@ -1696,7 +1695,7 @@ private void validateTmpObject( + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " + storedObjFileSize + ". Deleting tmpFile: " + tmpFile; logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); + throw new NonMatchingObjSizeException(errMsg); } } @@ -1727,7 +1726,7 @@ private void validateTmpObject( + ". Checksum" + " provided: " + checksum + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new NonMatchingChecksumException(errMsg); } String errMsg = @@ -1735,7 +1734,7 @@ private void validateTmpObject( + " calculated hex digest: " + digestFromHexDigests + ". Checksum" + " provided: " + checksum + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); + throw new NonMatchingChecksumException(errMsg); } } } From f1f6b7545dae282181c080b11536453b50bbf72a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 20 Jun 2024 10:20:43 -0700 Subject: [PATCH 315/553] Revise 'verifyHashStoreRefsFiles' method exceptions types to improve clarity, update called exception type's base class and update junit tests --- .../CidNotFoundInPidRefsFileException.java | 12 +++++++++++ .../PidNotFoundInCidRefsFileException.java | 2 +- .../filehashstore/FileHashStore.java | 21 +++++++++++-------- .../FileHashStoreReferencesTest.java | 6 ++++-- 4 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java new file mode 100644 index 00000000..7eb5b299 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java @@ -0,0 +1,12 @@ +package org.dataone.hashstore.exceptions; + +/** + * Custom exception class for FileHashStore when the expected cid is not found in the pid refs file. + */ +public class CidNotFoundInPidRefsFileException extends IllegalArgumentException { + + public CidNotFoundInPidRefsFileException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java index 2cd9d4b6..d635f304 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java @@ -5,7 +5,7 @@ /** * Custom exception class for FileHashStore when a pid is not found in a cid refs file. */ -public class PidNotFoundInCidRefsFileException extends IOException { +public class PidNotFoundInCidRefsFileException extends IllegalArgumentException { public PidNotFoundInCidRefsFileException(String message) { super(message); } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e376a17b..012a3525 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; @@ -2102,20 +2103,22 @@ protected void deleteObjectByCid(String cid) } /** - * Verifies that the reference files for the given pid and cid exist and contain - * the expected values. - * + * Verifies that the reference files for the given pid and cid exist and contain the expected + * values. + * * @param pid Authority-based or persistent identifier * @param cid Content identifier * @param absPidRefsPath Path to where the pid refs file exists * @param absCidRefsPath Path to where the cid refs file exists - * @throws FileNotFoundException Any refs files are missing - * @throws IOException Unable to read any of the refs files or if the refs content - * is not what is expected + * @throws FileNotFoundException Any refs files are missing + * @throws CidNotFoundInPidRefsFileException When the expected cid is not found in the pid refs + * @throws PidNotFoundInCidRefsFileException When a pid is not found in the cid refs file + * @throws IOException Unable to read any of the refs files */ protected void verifyHashStoreRefsFiles( String pid, String cid, Path absPidRefsPath, Path absCidRefsPath - ) throws FileNotFoundException, IOException { + ) throws FileNotFoundException, CidNotFoundInPidRefsFileException, + PidNotFoundInCidRefsFileException, IOException { // First confirm that the files were created if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " @@ -2137,14 +2140,14 @@ protected void verifyHashStoreRefsFiles( + cidRead + " found in pid refs file: " + absPidRefsPath + ". Expected cid: " + cid; logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new CidNotFoundInPidRefsFileException(errMsg); } boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + pid + " in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new PidNotFoundInCidRefsFileException(errMsg); } } catch (IOException ioe) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 957c07b5..1231dc01 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -18,8 +18,10 @@ import java.util.Properties; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; +import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; @@ -275,7 +277,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { // Get path of the cid refs file Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - assertThrows(IOException.class, () -> { + assertThrows(CidNotFoundInPidRefsFileException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); }); } @@ -297,7 +299,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception // Get path of the pid refs file Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - assertThrows(IOException.class, () -> { + assertThrows(PidNotFoundInCidRefsFileException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); }); } From 7d2b5763186b8cb8d6756d1d0d7a1dd926f0f64f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 08:52:43 -0700 Subject: [PATCH 316/553] Update 'tagObject' to check for existence of ref files in the if block, rather than pre-prepared booleans --- .../dataone/hashstore/filehashstore/FileHashStore.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 012a3525..3518da42 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -645,20 +645,17 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { - // Prepare booleans to determine path of tagObject to proceed with Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); - boolean pidRefsFound = Files.exists(absPidRefsPath); - boolean cidRefsFound = Files.exists(absCidRefsPath); // Both files found, confirm that reference files are where they are expected to be - if (pidRefsFound && cidRefsFound) { + if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid + " already exists and is tagged with pid: " + pid ); - } else if (pidRefsFound && !cidRefsFound) { + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { // If pid refs exists, it can only contain and reference one cid // First, compare the cid retrieved from the pid refs file from the supplied cid String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); @@ -696,7 +693,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // but doesn't contain the cid. Proceed to overwrite the pid refs file. // There is no return statement, so we move out of this if block. } - } else if (!pidRefsFound && cidRefsFound) { + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); if (!pidFoundInCidRefFiles) { From 881876522b78e1fd95e265e29dad3baf87265263 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 09:25:07 -0700 Subject: [PATCH 317/553] Cleanup 'tagObject' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3518da42..cdf24f11 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -677,8 +677,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi Path retrievedAbsCidRefsPath = getExpectedPath( retrievedCid, "refs", HashStoreIdTypes.cid.getName() ); - boolean retrievedAbsCidRefsPathExists = Files.exists(retrievedAbsCidRefsPath); - if (retrievedAbsCidRefsPathExists && isStringInRefsFile( + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( pid, retrievedAbsCidRefsPath )) { // This pid is accounted for and tagged as expected. @@ -695,8 +694,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file - boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); - if (!pidFoundInCidRefFiles) { + if (!isStringInRefsFile(pid, absCidRefsPath)) { updateRefsFile(pid, absCidRefsPath, "add"); } // Get the pid refs file and verify tagging process @@ -719,7 +717,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi File absPathCidRefsFile = absCidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws exceptions if there's an issue + // Verify tagging process, this throws an exception if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( "FileHashStore.tagObject - Object with cid: " + cid From df96c672f550d13e6b6616c4f25b7917f3a97baa Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 09:28:15 -0700 Subject: [PATCH 318/553] Cleanup 'verifyHashStoreRefsFiles' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index cdf24f11..cdd80fa5 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2114,7 +2114,7 @@ protected void verifyHashStoreRefsFiles( String pid, String cid, Path absPidRefsPath, Path absCidRefsPath ) throws FileNotFoundException, CidNotFoundInPidRefsFileException, PidNotFoundInCidRefsFileException, IOException { - // First confirm that the files were created + // First confirm that the refs files have been created if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " + absCidRefsPath + " for pid: " + pid; @@ -2127,7 +2127,7 @@ protected void verifyHashStoreRefsFiles( logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } - // Now verify the content + // Now confirm that the content is what is expected try { String cidRead = new String(Files.readAllBytes(absPidRefsPath)); if (!cidRead.equals(cid)) { @@ -2137,8 +2137,7 @@ protected void verifyHashStoreRefsFiles( logFileHashStore.error(errMsg); throw new CidNotFoundInPidRefsFileException(errMsg); } - boolean pidFoundInCidRefFiles = isStringInRefsFile(pid, absCidRefsPath); - if (!pidFoundInCidRefFiles) { + if (!isStringInRefsFile(pid, absCidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " + pid + " in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); From 8e42e4d4a742cb3435008d13c17e874f0ab8ba53 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 09:59:03 -0700 Subject: [PATCH 319/553] All new custom exception 'HashStoreRefsAlreadyExistException' --- .../HashStoreRefsAlreadyExistException.java | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java b/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java new file mode 100644 index 00000000..0b150884 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java @@ -0,0 +1,11 @@ +package org.dataone.hashstore.exceptions; + +import java.nio.file.FileAlreadyExistsException; + +public class HashStoreRefsAlreadyExistException extends FileAlreadyExistsException { + + public HashStoreRefsAlreadyExistException(String message) { + super(message); + } + +} From af95ea782a5a83d9a0d75aef43edfff44d06c828 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 09:59:37 -0700 Subject: [PATCH 320/553] Revise 'tagObject' to throw new custom exception when refs file already exists and are tagged appropriately, and update junit test --- .../hashstore/filehashstore/FileHashStore.java | 11 +++++++---- .../filehashstore/FileHashStoreReferencesTest.java | 7 +++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index cdd80fa5..ce25d22b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -36,6 +36,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; @@ -651,10 +652,12 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Both files found, confirm that reference files are where they are expected to be if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "FileHashStore.tagObject - Object with cid: " + cid - + " already exists and is tagged with pid: " + pid - ); + // We throw an exception so the client is aware that everything is in place + String errMsg = "FileHashStore.tagObject - Object with cid: " + cid + + " already exists and is tagged with pid: " + pid; + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { // If pid refs exists, it can only contain and reference one cid // First, compare the cid retrieved from the pid refs file from the supplied cid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 1231dc01..9b88da20 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -19,6 +19,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; @@ -133,8 +134,10 @@ public void tagObject_refsFileAlreadyExists() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - // Should not throw any exceptions, everything is where it's supposed to be. - fileHashStore.tagObject(pid, cid); + assertThrows(HashStoreRefsAlreadyExistException.class, () -> { + fileHashStore.tagObject(pid, cid); + }); + // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); From 4206c4521ad59cf2e5c771db77ff093c7b07bca2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 10:28:45 -0700 Subject: [PATCH 321/553] Add missing javadocs to newly created custom exception classes --- .../exceptions/HashStoreRefsAlreadyExistException.java | 3 +++ .../hashstore/exceptions/HashStoreServiceException.java | 2 +- .../hashstore/exceptions/PidRefsFileExistsException.java | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java b/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java index 0b150884..5b7a19b3 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java @@ -2,6 +2,9 @@ import java.nio.file.FileAlreadyExistsException; +/** + * Custom exception thrown when called to tag a pid and cid, and reference files already exist + */ public class HashStoreRefsAlreadyExistException extends FileAlreadyExistsException { public HashStoreRefsAlreadyExistException(String message) { diff --git a/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java b/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java index 8225869e..e100b702 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java @@ -1,7 +1,7 @@ package org.dataone.hashstore.exceptions; /** - * An exception that encapsulates errors from the HashStore service + * An exception that encapsulates errors from the HashStore Runnable Test Class */ public class HashStoreServiceException extends Exception { public HashStoreServiceException(String message) { diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java index 586d0f1f..57aae024 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java @@ -3,7 +3,8 @@ import java.io.IOException; /** - * Custom exception class for FileHashStore pidObjects + * Custom exception class thrown when a pid refs file already exists (a single pid can only ever + * reference one cid) */ public class PidRefsFileExistsException extends IOException { public PidRefsFileExistsException(String message) { From b2a98fb9a3bf860c1a659a8d2e69f16d7d2f6dc6 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 10:39:06 -0700 Subject: [PATCH 322/553] Update README.md for HashStoreClient examples to call the correct .jar file --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 587869de..c0ba9c5a 100644 --- a/README.md +++ b/README.md @@ -186,38 +186,38 @@ We also maintain a parallel [Python-based version of HashStore](https://github.c $ mvn clean package -Dmaven.test.skip=true # Get help -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -h +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -h # Step 2: Determine where your hashstore should live (ex. `/var/hashstore`) ## Create a HashStore (long option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 ## Create a HashStore (short option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 # Get the checksum of a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 -# Find an object in HashStore (returns its content identifier if it exists) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -findobject -pid testpid1 +# Find an object in HashStore (returns the content identifier, path to the obj, path to refs file and sysmeta path) +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -findobject -pid testpid1 # Store a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Retrieve a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrieveobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 # Delete a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deleteobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 ``` ## License From d412ad96b0cf0ff0b890e10f6a09ea360b1af02b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 21 Jun 2024 10:51:19 -0700 Subject: [PATCH 323/553] Update 'storeObject_objectLockedIds_FiveThreads' to catch newly added custom exception and javadoc --- .../FileHashStoreInterfaceTest.java | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 856e0d7c..f5512738 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -35,6 +35,7 @@ import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; @@ -608,14 +609,15 @@ public void storeObject_interruptProcess() throws Exception { /** * Tests that the `storeObject` method can store an object successfully with multiple threads * (5). This test uses five futures (threads) that run concurrently, all except one of which - * will encounter a `RunTimeException`. The thread that does not encounter an exception will - * store the given object, and verifies that the object is stored successfully. - * - * The threads are expected to encounter a `RunTimeException` since the expected - * object to store is already in progress (thrown by `syncPutObject` which coordinates - * `store_object` requests with a pid). If both threads execute simultaneously and bypasses - * the store object synchronization flow, we may also run into a `PidRefsFileExistsException` - * - which prevents the cid from being tagged twice by the same pid. + * will encounter a `HashStoreRefsAlreadyExistException`. The thread that does not encounter an + * exception will store the given object, and verifies that the object is stored successfully. + * + * The threads are expected to encounter a `RunTimeException` since the expected object to store + * is already in progress (thrown by `syncPutObject` which coordinates `store_object` requests + * with a pid). If both threads execute simultaneously and bypasses the store object + * synchronization flow, we may also run into a `HashStoreRefsAlreadyExistException` - which is + * called during the `tagObject` process when reference files already exist with the expected + * values. */ @Test public void storeObject_objectLockedIds_FiveThreads() throws Exception { @@ -644,8 +646,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - e.printStackTrace(); - assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); + System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } }); Future future2 = executorService.submit(() -> { @@ -665,8 +667,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - e.printStackTrace(); - assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); + System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } }); Future future3 = executorService.submit(() -> { @@ -686,8 +688,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - e.printStackTrace(); - assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); + System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } }); Future future4 = executorService.submit(() -> { @@ -707,8 +709,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - e.printStackTrace(); - assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); + System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } }); Future future5 = executorService.submit(() -> { @@ -728,8 +730,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - e.printStackTrace(); - assertTrue(e instanceof RuntimeException | e instanceof PidRefsFileExistsException); + System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } }); From e9d59560a6bb83768f41d618d55757fef4b2d84a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 09:53:14 -0700 Subject: [PATCH 324/553] Removed redundant 'storeObject' overload methos from 'HashStore' interface --- .../java/org/dataone/hashstore/HashStore.java | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index cb08105d..9f36c36f 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -85,46 +85,6 @@ public ObjectMetadata storeObject( public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - * - * Store an object and validate the given checksum & checksum algorithm and size. - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm, - long objSize - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; - - /** - * @see #storeObject(InputStream, String, String, String, String, long) - * - * Store an object and validate the given checksum & checksum algorithm. - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; - - /** - * @see #storeObject(InputStream, String, String, String, String, long) - * - * Store an object and generate an additional algorithm in hex digests. - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String additionalAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; - - /** - * @see #storeObject(InputStream, String, String, String, String, long) - * - * Store an object and validate its size. - */ - public ObjectMetadata storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; - /** * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, * deleting or calculating a hex digest of an object is based on a pid argument; and to From d986b7b535ddb13a2c3f8822b1aa9836b16a3656 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 09:55:42 -0700 Subject: [PATCH 325/553] Remove now obsolete 'storeObject' overload methods from 'FileHashStore' and respective junit tests, and revise code in 'HashStoreClient' --- .../dataone/hashstore/HashStoreClient.java | 2 +- .../filehashstore/FileHashStore.java | 56 ------------ .../FileHashStoreInterfaceTest.java | 86 ------------------- 3 files changed, 1 insertion(+), 143 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index d049b25b..6f9d9c69 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -604,7 +604,7 @@ private static void storeObjsWithChecksumFromDb(List> result // Store object System.out.println("Storing object for guid: " + guid); - hashStore.storeObject(objStream, guid, checksum, algorithm); + hashStore.storeObject(objStream, guid, null, checksum, algorithm, -1); } catch (PidRefsFileExistsException poee) { String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ce25d22b..7d4e0a37 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -556,62 +556,6 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce } - /** - * Overload method for storeObject with size and a checksum & checksumAlgorithm. - */ - @Override - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); - - return storeObject(object, pid, null, checksum, checksumAlgorithm, objSize); - } - - /** - * Overload method for storeObject with just a checksum and checksumAlgorithm - */ - @Override - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); - - return storeObject(object, pid, null, checksum, checksumAlgorithm, -1); - } - - /** - * Overload method for storeObject with just the size of object to validate - */ - @Override - public ObjectMetadata storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); - - return storeObject(object, pid, null, null, null, objSize); - } - - /** - * Overload method for storeObject with an additionalAlgorithm - */ - @Override - public ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.ensureNotNull( - additionalAlgorithm, "additionalAlgorithm", "storeObject" - ); - - return storeObject(object, pid, additionalAlgorithm, null, null, -1); - } - - @Override public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, InterruptedException { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index f5512738..d2c02131 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -39,7 +39,6 @@ import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -233,70 +232,6 @@ public void storeObject_zeroObjSize() { } } - /** - * Verify that storeObject stores and validates a given checksum and its expected size - * with overloaded method - */ - @Test - public void storeObject_overloadChecksumCsAlgoAndSize() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - String md2 = testData.pidData.get(pid).get("md2"); - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, md2, "MD2", objectSize - ); - dataStream.close(); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - assertEquals(md2, hexDigests.get("MD2")); - } - } - - /** - * Verify that storeObject stores and validates a given checksum with overloaded method - */ - @Test - public void storeObject_overloadChecksumAndChecksumAlgo() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - String md2 = testData.pidData.get(pid).get("md2"); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, md2, "MD2"); - dataStream.close(); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - assertEquals(md2, hexDigests.get("MD2")); - } - } - - /** - * Check that store object returns the correct ObjectMetadata size with overloaded method - */ - @Test - public void storeObject_overloadObjSize() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, objectSize); - dataStream.close(); - - assertEquals(objectSize, objInfo.getSize()); - } - } - /** * Check that store object executes as expected with only an InputStream (does not create * any reference files) @@ -326,27 +261,6 @@ public void storeObject_overloadInputStreamOnly() throws Exception { } } - /** - * Verify that storeObject generates an additional checksum with overloaded method - */ - @Test - public void storeObject_overloadAdditionalAlgo() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); - dataStream.close(); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(md2, hexDigests.get("MD2")); - } - } - /** * Verify that storeObject returns the expected checksum value */ From f56b3cfd52858ff0b8f9471b3702bbc11a8280fe Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 10:36:58 -0700 Subject: [PATCH 326/553] Add new utility method 'isValidString', which is now called by 'checkForEmptyString' --- .../filehashstore/FileHashStoreUtility.java | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index e871ac3f..f1ef7f7a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -177,12 +177,13 @@ public static void deleteListItems(List deleteList) throws IOException { } /** - * Checks whether a given string is empty and throws an exception if so + * Checks whether a given string is empty or contains illegal characters, and throws an + * exception if so * * @param string String to check * @param argument Value that is being checked * @param method Calling method - * @throws IllegalArgumentException If the string is empty or null + * @throws IllegalArgumentException If the string is empty or contains illegal characters */ public static void checkForEmptyString(String string, String argument, String method) throws IllegalArgumentException { @@ -192,6 +193,29 @@ public static void checkForEmptyString(String string, String argument, String me + "(): " + argument + " cannot be empty."; throw new IllegalArgumentException(errMsg); } + if (!isValidString(string)) { + String errMsg = "FileHashStoreUtility.checkForEmptyString - Calling Method: " + method + + "(): " + argument + " contains empty white spaces, tabs or newlines."; + throw new IllegalArgumentException(errMsg); + } + } + + /** + * Iterates over a given string and checks each character to make sure that there are no + * whitespaces, tabs, new lines or other illegal characters. + * @param string String to check + * @return True if valid, False if illegal characters found. + */ + public static boolean isValidString(String string) { + boolean valid = true; + for (int i = 0; i < string.length(); i++) { + char ch = string.charAt(i); + if (Character.isWhitespace(ch)) { + valid = false; + break; + } + } + return valid; } /** From 7e007f1e46b1bd8503259656b622bad379538eb6 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 10:38:20 -0700 Subject: [PATCH 327/553] Add new junit tests to check that illegal characters in a given 'pid' or string raises an exception --- .../FileHashStoreInterfaceTest.java | 34 ++++ .../FileHashStoreProtectedTest.java | 156 +++++++----------- 2 files changed, 97 insertions(+), 93 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index d2c02131..a81c8618 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -215,6 +215,40 @@ public void storeObject_emptyPid() { } } + /** + * Check that store object throws exception when pid contains new line character + */ + @Test + public void storeObject_pidWithNewLine() throws Exception { + for (String pid : testData.pidList) { + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, "dou.test.1\n", null, null, null, -1); + dataStream.close(); + }); + } + } + + /** + * Check that store object throws exception when pid contains tab character + */ + @Test + public void storeObject_pidWithTab() throws Exception { + for (String pid : testData.pidList) { + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, "dou.test.1\t", null, null, null, -1); + dataStream.close(); + }); + } + } + /** * Check that store object throws exception when object size is 0 */ diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index eacc007c..8531beb9 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -47,8 +47,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fhsProperties = storeProperties; @@ -107,8 +106,7 @@ public void isValidAlgorithm_notSupported() { } catch (NoSuchAlgorithmException nsae) { throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage() - ); + "NoSuchAlgorithmException encountered: " + nsae.getMessage()); } }); @@ -128,8 +126,7 @@ public void isValidAlgorithm_notSupportedLowerCase() { } catch (NoSuchAlgorithmException nsae) { throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage() - ); + "NoSuchAlgorithmException encountered: " + nsae.getMessage()); } }); @@ -165,9 +162,8 @@ public void generateTempFile() throws Exception { */ @Test public void getHierarchicalPathString() { - String shardedPath = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a" - ); + String shardedPath = FileHashStoreUtility.getHierarchicalPathString(3, 2, + "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"); String shardedPathExpected = "94/f9/b6/c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; assertEquals(shardedPath, shardedPathExpected); @@ -252,9 +248,7 @@ public void putObject_validateChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject( - dataStream, pid, null, checksumCorrect, "MD2", -1 - ); + ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); dataStream.close(); String objCid = address.getCid(); @@ -379,9 +373,7 @@ public void putObject_objSizeCorrect() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject( - dataStream, pid, null, null, null, objectSize - ); + ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) @@ -400,9 +392,7 @@ public void putObject_objSizeIncorrect() { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject( - dataStream, pid, null, null, null, 1000 - ); + ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, 1000); dataStream.close(); // Check id (sha-256 hex digest of the ab_id (pid)) @@ -413,8 +403,8 @@ public void putObject_objSizeIncorrect() { } /** - * Verify putObject deletes temporary file written if called to store an object - * that already exists (duplicate) + * Verify putObject deletes temporary file written if called to store an object that already + * exists (duplicate) */ @Test public void putObject_duplicateObject() throws Exception { @@ -483,9 +473,8 @@ public void writeToTmpFileAndGenerateChecksums() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, null, null - ); + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, null); dataStream.close(); // Validate checksum values @@ -543,9 +532,9 @@ public void writeToTmpFileAndGenerateChecksums_addAlgo() throws Exception { String addAlgo = "MD2"; InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, null - ); + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, + null); dataStream.close(); // Validate checksum values @@ -570,9 +559,9 @@ public void writeToTmpFileAndGenerateChecksums_checksumAlgo() throws Exception { String checksumAlgo = "SHA-512/224"; InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, null, checksumAlgo - ); + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, + checksumAlgo); dataStream.close(); // Validate checksum values @@ -598,9 +587,9 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce String checksumAlgo = "SHA-512/224"; InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, checksumAlgo - ); + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, + checksumAlgo); dataStream.close(); // Validate checksum values @@ -629,8 +618,7 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { InputStream dataStream = Files.newInputStream(testDataFile); fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, null - ); + newTmpFile, dataStream, addAlgo, null); dataStream.close(); }); } @@ -719,9 +707,7 @@ public void putMetadata() throws Exception { // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path metadataPidExpectedPath = fileHashStore.getExpectedPath(pid, "metadata", storeMetadataNamespace); assertEquals(metadataPath, metadataPidExpectedPath.toString()); } } @@ -733,8 +719,7 @@ public void putMetadata() throws Exception { public void putMetadata_metadataNull() { for (String pid : testData.pidList) { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null) - ); + IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null)); } } @@ -811,9 +796,7 @@ public void writeToTmpMetadataFile() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile( - newTmpFile, metadataStream - ); + boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); metadataStream.close(); assertTrue(metadataWritten); } @@ -821,7 +804,6 @@ public void writeToTmpMetadataFile() throws Exception { /** * Check that tmp metadata is actually written by verifying file size - * */ @Test public void writeToTmpMetadataFile_tmpFileSize() throws Exception { @@ -833,9 +815,7 @@ public void writeToTmpMetadataFile_tmpFileSize() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile( - newTmpFile, metadataStream - ); + boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); metadataStream.close(); assertTrue(metadataWritten); @@ -890,9 +870,8 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { } String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_cid_sha256" - ); + String sha256MetadataDigestFromTestData = + testData.pidData.get(pid).get("metadata_cid_sha256"); assertEquals(sha256Digest, sha256MetadataDigestFromTestData); // Close stream @@ -915,9 +894,7 @@ public void isStringInRefsFile_pidFound() throws Exception { String pidTwo = pid + ".test"; InputStream dataStreamDup = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStreamDup, pidTwo, null, null, null, -1 - ); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); dataStreamDup.close(); String cid = objInfo.getCid(); @@ -936,9 +913,7 @@ public void isStringInRefsFile_pidNotFound() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); String cid = objInfo.getCid(); @@ -969,9 +944,7 @@ public void deleteObjectByCid() throws Exception { Path storePath = Paths.get(fhsProperties.getProperty("storePath")); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, cid - ); + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path objRealPath = storePath.resolve("objects").resolve(objShardString); assertFalse(Files.exists(objRealPath)); @@ -979,8 +952,8 @@ public void deleteObjectByCid() throws Exception { } /** - * Confirm deleteObjectByCid method does not delete an object if a cid refs file - * exists (pids still referencing the cid). + * Confirm deleteObjectByCid method does not delete an object if a cid refs file exists (pids + * still referencing the cid). */ @Test public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { @@ -989,9 +962,7 @@ public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); String cid = objInfo.getCid(); @@ -1036,9 +1007,7 @@ public void getExpectedPath_objectPath() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); String cid = objInfo.getCid(); @@ -1046,9 +1015,7 @@ public void getExpectedPath_objectPath() throws Exception { Path storePath = Paths.get(fhsProperties.getProperty("storePath")); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, cid - ); + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); Path expectedObjCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -1083,17 +1050,15 @@ public void getExpectedPath_metadataPath() throws Exception { // Metadata directory of the given pid String metadataPidDirId = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String metadataPidDirIdSharded = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, metadataPidDirId - ); + String metadataPidDirIdSharded = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidDirId); // Complete path Path calculatedMetadataRealPath = storePath.resolve("metadata").resolve(metadataPidDirIdSharded).resolve(hashId); - Path expectedMetadataPidPath = fileHashStore.getExpectedPath( - pid, "metadata", storeFormatId - ); + Path expectedMetadataPidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); assertEquals(expectedMetadataPidPath, calculatedMetadataRealPath); } @@ -1109,9 +1074,7 @@ public void getExpectedPath_pidRefsPaths() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); String cid = objInfo.getCid(); @@ -1123,9 +1086,9 @@ public void getExpectedPath_pidRefsPaths() throws Exception { // Pid refs file String metadataPidHash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String metadataPidHashSharded = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, metadataPidHash - ); + String metadataPidHashSharded = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidHash); Path calculatedPidRefsRealPath = storePath.resolve("refs/pids").resolve(metadataPidHashSharded); @@ -1145,9 +1108,7 @@ public void getExpectedPath_cidRefsPaths() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); String cid = objInfo.getCid(); @@ -1157,9 +1118,7 @@ public void getExpectedPath_cidRefsPaths() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // Cid refs file - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, cid - ); + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path calculatedCidRefsRealPath = storePath.resolve("refs/cids").resolve(objShardString); Path expectedCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); @@ -1169,11 +1128,11 @@ public void getExpectedPath_cidRefsPaths() throws Exception { } /** - * Confirm getExpectedPath throws exception when requesting the path to a refs file - * with a formatId arg that is not "cid" or "pid" + * Confirm getExpectedPath throws exception when requesting the path to a refs file with a + * formatId arg that is not "cid" or "pid" */ @Test - public void getExpectedPath_incorrectRefsFormatId() { + public void getExpectedPath_incorrectRefsFormatId() { assertThrows(IllegalArgumentException.class, () -> { String cid = "testcid"; fileHashStore.getExpectedPath(cid, "refs", "not_cid_or_pid"); @@ -1181,14 +1140,25 @@ public void getExpectedPath_incorrectRefsFormatId() { } /** - * Confirm getExpectedPath throws exception when requesting path for an object - * that does not exist + * Confirm getExpectedPath throws exception when requesting path for an object that does not + * exist */ @Test - public void getExpectedPath_fileNotFound() { + public void getExpectedPath_fileNotFound() { assertThrows(FileNotFoundException.class, () -> { String pid = "dou.test.1"; fileHashStore.getExpectedPath(pid, "object", null); }); } + + /** + * Confirm getExpectedPath throws exception when requesting path for an object that does not + * exist + */ + @Test + public void fileHashStoreUtility_checkForEmptyString() { + assertThrows(IllegalArgumentException.class, () -> { + FileHashStoreUtility.checkForEmptyString("dou.test.1\n", "pid", "storeObject"); + }); + } } From 05ca2c83d968572b4c8932842505aa0a75b2843c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 10:58:26 -0700 Subject: [PATCH 328/553] Update 'findObject' javadoc for inaccurate return description --- src/main/java/org/dataone/hashstore/HashStore.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 9f36c36f..89a0270f 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -125,9 +125,10 @@ public void verifyObject( /** * Checks whether an object referenced by a pid exists and returns a map containing the * absolute path to the object, pid refs file, cid refs file and sysmeta document. - * + * * @param pid Authority-based identifier - * @return Content identifier (cid) + * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, + * pid_refs_path, sysmeta_path * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs * file's absolute address is not valid * @throws IOException Unable to read from a pid refs file or pid refs From e0b6a6b05975b32f7d2273494691c8e3b4b132fb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 11:07:11 -0700 Subject: [PATCH 329/553] Update 'HashStore' interface & 'FileHashStore' with new 'verifyObject' arg and respective junit tests --- .../java/org/dataone/hashstore/HashStore.java | 18 +++++++++++------- .../hashstore/filehashstore/FileHashStore.java | 3 ++- .../FileHashStoreReferencesTest.java | 10 +++++----- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 89a0270f..387a3c58 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -108,18 +108,22 @@ public void tagObject(String pid, String cid) throws IOException, * Confirms that an ObjectMetadata's content is equal to the given values. If it is not * equal, it will return False - otherwise True. * - * @param objectInfo ObjectMetadata object with values - * @param checksum Value of checksum to validate against - * @param checksumAlgorithm Algorithm of checksum submitted - * @param objSize Expected size of object to validate after storing + * @param objectInfo ObjectMetadata object with values + * @param checksum Value of checksum to validate against + * @param checksumAlgorithm Algorithm of checksum submitted + * @param objSize Expected size of object to validate after storing + * @param deleteInvalidObject If true, HashStore will attempt to remove the data object + * given to verify * @throws NonMatchingObjSizeException Given size =/= objMeta size value * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value * @throws UnsupportedHashAlgorithmException Given algo is not found or supported - * @throws IOException Issue with recalculating supported algo for checksum not found + * @throws IOException Issue with recalculating supported algo for + * checksum not found */ public void verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws NonMatchingObjSizeException, NonMatchingChecksumException, + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, + boolean deleteInvalidObject) + throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, IOException; /** diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7d4e0a37..3435b690 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -685,7 +685,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi @Override public void verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, + boolean deleteInvalidObject ) throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, IOException { logFileHashStore.debug( diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 9b88da20..bea3f890 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -422,7 +422,7 @@ public void verifyObject_correctValues() throws Exception { long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false ); } } @@ -446,7 +446,7 @@ public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); fileHashStore.verifyObject( - objInfo, expectedChecksum, "MD2", expectedSize + objInfo, expectedChecksum, "MD2", expectedSize, false ); } } @@ -467,7 +467,7 @@ public void verifyObject_unsupportedAlgo() throws Exception { assertThrows(UnsupportedHashAlgorithmException.class, () -> { fileHashStore.verifyObject( - objInfo, "ValueNotRelevant", "BLAKE2S", 1000 + objInfo, "ValueNotRelevant", "BLAKE2S", 1000, false ); }); } @@ -494,7 +494,7 @@ public void verifyObject_mismatchedValuesNonMatchingSize() throws Exception { assertThrows(NonMatchingObjSizeException.class, () -> { fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false ); }); } @@ -521,7 +521,7 @@ public void verifyObject_mismatchedValuesNonMatchingChecksum() throws Exception assertThrows(NonMatchingChecksumException.class, () -> { fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false ); }); } From c2b148c0ffd15b66c6a91a890986dfe3fe0891f5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 12:28:52 -0700 Subject: [PATCH 330/553] Refactor 'getExpectedPath' - extract new method 'getHashStoreRefsPath' --- .../filehashstore/FileHashStore.java | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3435b690..07b446c3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2333,8 +2333,8 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea protected Path getExpectedPath(String abId, String entity, String formatId) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; - String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); if (entity.equalsIgnoreCase("object")) { + String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); // `hashId` here is the address of the pid refs file, and contains the cid String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId @@ -2357,6 +2357,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) ); realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); } else if (entity.equalsIgnoreCase("metadata")) { + String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); // Get the pid metadata directory (the sharded path of the hashId) String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId @@ -2369,25 +2370,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) ); } else if (entity.equalsIgnoreCase("refs")) { - if (formatId.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { - // `hashId` here is the pid refs file string to split - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId - ); - realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); - } else if (formatId.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { - // If refs type is 'cid', use the abId directly provided - String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId - ); - realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); - } else { - String errMsg = - "FileHashStore.getExpectedPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } - + realPath = getHashStoreRefsPath(abId, formatId); } else { throw new IllegalArgumentException( "FileHashStore.getExpectedPath - entity must be 'object', 'metadata' or 'refs'" @@ -2395,4 +2378,33 @@ protected Path getExpectedPath(String abId, String entity, String formatId) } return realPath; } + + /** + * Get the absolute path to a HashStore pid or cid ref file + * @param abpcId Authority-based identifier, persistent identifier or content identifier + * @param refType "cid" or "pid + * @return Path to the requested refs file + * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported + */ + private Path getHashStoreRefsPath(String abpcId, String refType) throws NoSuchAlgorithmException { + Path realPath; + if (refType.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { + String hashedId = FileHashStoreUtility.getPidHexDigest(abpcId, OBJECT_STORE_ALGORITHM); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId + ); + realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); + } else if (refType.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { + String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, abpcId + ); + realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); + } else { + String errMsg = + "FileHashStore.getExpectedPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + return realPath; + } } From 11dea30ed408703e8ce2ddd491ec0d020b89da27 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 12:34:28 -0700 Subject: [PATCH 331/553] Refactor 'getExpectedPath' - extract new method 'getHashStoreMetadataPath' --- .../filehashstore/FileHashStore.java | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 07b446c3..bc120bb7 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2357,18 +2357,7 @@ protected Path getExpectedPath(String abId, String entity, String formatId) ); realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); } else if (entity.equalsIgnoreCase("metadata")) { - String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); - // Get the pid metadata directory (the sharded path of the hashId) - String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId - ); - // The file name for the metadata document is the hash of the supplied 'pid + 'formatId' - String metadataDocHash = - FileHashStoreUtility.getPidHexDigest(abId + formatId, OBJECT_STORE_ALGORITHM); - realPath = METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve( - metadataDocHash - ); - + realPath = getHashStoreMetadataPath(abId, formatId); } else if (entity.equalsIgnoreCase("refs")) { realPath = getHashStoreRefsPath(abId, formatId); } else { @@ -2379,6 +2368,28 @@ protected Path getExpectedPath(String abId, String entity, String formatId) return realPath; } + /** + * Get the absolute path to a HashStore metadata document + * @param abpId Authority-based or persistent identifier + * @param formatId Metadata formatId or namespace + * @return Path to the requested metadata document + * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported + */ + private Path getHashStoreMetadataPath(String abpId, String formatId) throws NoSuchAlgorithmException { + // Get the pid metadata directory (the sharded path of the hashId) + String hashId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); + String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId + ); + // The file name for the metadata document is the hash of the supplied 'pid + 'formatId' + String metadataDocHash = + FileHashStoreUtility.getPidHexDigest(abpId + formatId, OBJECT_STORE_ALGORITHM); + // Real path to metadata doc + return METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve( + metadataDocHash + ); + } + /** * Get the absolute path to a HashStore pid or cid ref file * @param abpcId Authority-based identifier, persistent identifier or content identifier From 1afce09d1452d5b45cab29c445dd6d2ee187f73d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 12:39:17 -0700 Subject: [PATCH 332/553] Refactor 'getExpectedPath' - extract new method 'getHashStoreDataObject' --- .../filehashstore/FileHashStore.java | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bc120bb7..b551397b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2322,44 +2322,23 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea /** * Get the absolute path of a HashStore object, metadata or refs file * - * @param abId Authority-based, persistent or content identifier - * @param entity "object" or "metadata" + * @param abpcId Authority-based, persistent or content identifier + * @param entity "object", "metadata" or "refs" * @param formatId Metadata namespace or reference type (pid/cid) * @return Actual path to object * @throws IllegalArgumentException If entity is not object or metadata * @throws NoSuchAlgorithmException If store algorithm is not supported * @throws IOException If unable to retrieve cid */ - protected Path getExpectedPath(String abId, String entity, String formatId) + protected Path getExpectedPath(String abpcId, String entity, String formatId) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { Path realPath; if (entity.equalsIgnoreCase("object")) { - String hashId = FileHashStoreUtility.getPidHexDigest(abId, OBJECT_STORE_ALGORITHM); - // `hashId` here is the address of the pid refs file, and contains the cid - String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId - ); - Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); - // Attempt to retrieve the cid - String objectCid; - if (!Files.exists(pathToPidRefsFile)) { - String errMsg = - "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid: " + abId - + " with object address: " + pathToPidRefsFile + ". Cannot retrieve cid."; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } else { - objectCid = new String(Files.readAllBytes(pathToPidRefsFile)); - } - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); - realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + realPath = getHashStoreDataObject(abpcId); } else if (entity.equalsIgnoreCase("metadata")) { - realPath = getHashStoreMetadataPath(abId, formatId); + realPath = getHashStoreMetadataPath(abpcId, formatId); } else if (entity.equalsIgnoreCase("refs")) { - realPath = getHashStoreRefsPath(abId, formatId); + realPath = getHashStoreRefsPath(abpcId, formatId); } else { throw new IllegalArgumentException( "FileHashStore.getExpectedPath - entity must be 'object', 'metadata' or 'refs'" @@ -2368,6 +2347,39 @@ protected Path getExpectedPath(String abId, String entity, String formatId) return realPath; } + /** + * Get the absolute path to a HashStore data object + * @param abpId Authority-based or persistent identifier + * @return Path to the HasHStore data object + * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported + * @throws IOException Issue when reading a pid refs file to retrieve a 'cid' + */ + private Path getHashStoreDataObject(String abpId) throws NoSuchAlgorithmException, IOException { + String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); + // `hashId` here is used to calculate the address of the pid refs file + String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId + ); + Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); + // Attempt to retrieve the cid from the pid refs file + String objectCid; + if (!Files.exists(pathToPidRefsFile)) { + String errMsg = + "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid: " + abpId + + " with object address: " + pathToPidRefsFile + ". Cannot retrieve cid."; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } else { + objectCid = new String(Files.readAllBytes(pathToPidRefsFile)); + } + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid + ); + // Real path to the data object + return OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + } + /** * Get the absolute path to a HashStore metadata document * @param abpId Authority-based or persistent identifier From 1694d32aba4806010479248560d98b34a6d8ece3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 13:15:04 -0700 Subject: [PATCH 333/553] Cleanup 'getExpectedPath' and related extracted methods --- .../filehashstore/FileHashStore.java | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b551397b..75c50a00 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2322,39 +2322,38 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea /** * Get the absolute path of a HashStore object, metadata or refs file * - * @param abpcId Authority-based, persistent or content identifier - * @param entity "object", "metadata" or "refs" - * @param formatId Metadata namespace or reference type (pid/cid) + * @param abpcId Authority-based, persistent or content identifier + * @param entity "object", "metadata" or "refs" + * @param detail Metadata namespace, ref type (pid/cid) or other value * @return Actual path to object * @throws IllegalArgumentException If entity is not object or metadata * @throws NoSuchAlgorithmException If store algorithm is not supported * @throws IOException If unable to retrieve cid */ - protected Path getExpectedPath(String abpcId, String entity, String formatId) + protected Path getExpectedPath(String abpcId, String entity, String detail) throws IllegalArgumentException, NoSuchAlgorithmException, IOException { - Path realPath; if (entity.equalsIgnoreCase("object")) { - realPath = getHashStoreDataObject(abpcId); + return getHashStoreDataObjectPath(abpcId); } else if (entity.equalsIgnoreCase("metadata")) { - realPath = getHashStoreMetadataPath(abpcId, formatId); + return getHashStoreMetadataPath(abpcId, detail); } else if (entity.equalsIgnoreCase("refs")) { - realPath = getHashStoreRefsPath(abpcId, formatId); + return getHashStoreRefsPath(abpcId, detail); } else { throw new IllegalArgumentException( - "FileHashStore.getExpectedPath - entity must be 'object', 'metadata' or 'refs'" - ); + "FileHashStore.getExpectedPath - entity must be 'object', 'metadata' or 'refs'"); } - return realPath; } /** * Get the absolute path to a HashStore data object + * * @param abpId Authority-based or persistent identifier * @return Path to the HasHStore data object * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported * @throws IOException Issue when reading a pid refs file to retrieve a 'cid' */ - private Path getHashStoreDataObject(String abpId) throws NoSuchAlgorithmException, IOException { + protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmException, + IOException { String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); // `hashId` here is used to calculate the address of the pid refs file String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -2365,8 +2364,9 @@ private Path getHashStoreDataObject(String abpId) throws NoSuchAlgorithmExceptio String objectCid; if (!Files.exists(pathToPidRefsFile)) { String errMsg = - "FileHashStore.getExpectedPath - Pid Refs file does not exist for pid: " + abpId - + " with object address: " + pathToPidRefsFile + ". Cannot retrieve cid."; + "FileHashStore.getHashStoreDataObjectPath - Pid Refs file does not exist for pid: " + + abpId + " with object address: " + pathToPidRefsFile + ". Cannot retrieve " + + "cid."; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } else { @@ -2382,12 +2382,14 @@ private Path getHashStoreDataObject(String abpId) throws NoSuchAlgorithmExceptio /** * Get the absolute path to a HashStore metadata document + * * @param abpId Authority-based or persistent identifier * @param formatId Metadata formatId or namespace * @return Path to the requested metadata document * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported */ - private Path getHashStoreMetadataPath(String abpId, String formatId) throws NoSuchAlgorithmException { + protected Path getHashStoreMetadataPath(String abpId, String formatId) + throws NoSuchAlgorithmException { // Get the pid metadata directory (the sharded path of the hashId) String hashId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( @@ -2404,12 +2406,14 @@ private Path getHashStoreMetadataPath(String abpId, String formatId) throws NoSu /** * Get the absolute path to a HashStore pid or cid ref file + * * @param abpcId Authority-based identifier, persistent identifier or content identifier * @param refType "cid" or "pid * @return Path to the requested refs file * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported */ - private Path getHashStoreRefsPath(String abpcId, String refType) throws NoSuchAlgorithmException { + protected Path getHashStoreRefsPath(String abpcId, String refType) + throws NoSuchAlgorithmException { Path realPath; if (refType.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { String hashedId = FileHashStoreUtility.getPidHexDigest(abpcId, OBJECT_STORE_ALGORITHM); @@ -2424,7 +2428,8 @@ private Path getHashStoreRefsPath(String abpcId, String refType) throws NoSuchAl realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } else { String errMsg = - "FileHashStore.getExpectedPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; + "FileHashStore.getHashStoreRefsPath - formatId must be 'pid' or 'cid' when entity" + + " is 'refs'"; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } From 4c3afaf535c12a8e2b93139345438b90e7557dfa Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 13:41:16 -0700 Subject: [PATCH 334/553] Delete 'getExpectedPath' method and replace codebase calls to it with extracted method calls to improve clarity --- .../filehashstore/FileHashStore.java | 74 ++++------ .../FileHashStoreInterfaceTest.java | 134 ++++++++---------- .../FileHashStoreProtectedTest.java | 70 ++++----- .../FileHashStoreReferencesTest.java | 31 ++-- 4 files changed, 122 insertions(+), 187 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 75c50a00..7a1e88e4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -495,7 +495,7 @@ private ObjectMetadata syncPutObject( tagObject(pid, cid); logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid - + ". Permanent address: " + getExpectedPath(pid, "object", null) + + ". Permanent address: " + getHashStoreDataObjectPath(pid) ); objInfo.setPid(pid); return objInfo; @@ -590,8 +590,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); // Both files found, confirm that reference files are where they are expected to be if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { @@ -621,8 +621,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi return; } else { // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = getExpectedPath( - retrievedCid, "refs", HashStoreIdTypes.cid.getName() + Path retrievedAbsCidRefsPath = getHashStoreRefsPath( + retrievedCid, HashStoreIdTypes.cid.getName() ); if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( pid, retrievedAbsCidRefsPath @@ -761,11 +761,11 @@ public Map findObject(String pid) throws NoSuchAlgorithmExceptio FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); // Get path of the pid references file - Path absPidRefsPath = getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); if (Files.exists(absPidRefsPath)) { String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { @@ -792,9 +792,8 @@ public Map findObject(String pid) throws NoSuchAlgorithmExceptio objInfoMap.put("cid_refs_path", absCidRefsPath.toString()); objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); // If the default system metadata exists, include it - Path metadataPidExpectedPath = getExpectedPath( - pid, "metadata", DEFAULT_METADATA_NAMESPACE - ); + Path metadataPidExpectedPath = + getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); if (Files.exists(metadataPidExpectedPath)) { objInfoMap.put("sysmeta_path", metadataPidExpectedPath.toString()); } else { @@ -944,7 +943,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getExpectedPath(pid, "object", null); + Path objRealPath = getHashStoreDataObjectPath(pid); // Check to see if object exists if (!Files.exists(objRealPath)) { @@ -987,7 +986,7 @@ public InputStream retrieveMetadata(String pid, String formatId) FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getExpectedPath(pid, "metadata", formatId); + Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { @@ -1031,7 +1030,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getExpectedPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); + Path metadataCidPath = getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { @@ -1149,13 +1148,13 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Proceed with comprehensive deletion - cid exists, nothing out of place // Get all the required paths to streamline deletion process // Permanent address of the object - Path objRealPath = getExpectedPath(pid, "object", null); + Path objRealPath = getHashStoreDataObjectPath(pid); // Cid refs file Path absCidRefsPath = - getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); // Pid refs file Path absPidRefsPath = - getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); // Rename pid refs file to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1201,7 +1200,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Begin by renaming pid refs file for deletion Path absPidRefsPath = - getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); @@ -1218,7 +1217,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Get the cid from the pid refs file before renaming it for deletion Path absPidRefsPath = - getExpectedPath(id, "refs", HashStoreIdTypes.pid.getName()); + getHashStoreRefsPath(id, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); // Since we must access the cid reference file, the `cid` must be synchronized @@ -1248,7 +1247,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Remove the pid from the cid refs file Path absCidRefsPath = - getExpectedPath(cidRead, "refs", HashStoreIdTypes.cid.getName()); + getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { @@ -1280,7 +1279,7 @@ public void deleteObject(String idType, String id) throws IllegalArgumentExcepti // Rename pid refs file for deletion Path absPidRefsPath = - getExpectedPath(pid, "refs", HashStoreIdTypes.pid.getName()); + getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); @@ -1353,7 +1352,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx try { // Get permanent address of the metadata document - Path metadataDocPath = getExpectedPath(pid, "metadata", formatId); + Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); if (!Files.exists(metadataDocPath)) { String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid @@ -1465,8 +1464,8 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE return objInfoMap.get("cid"); } else { - // Get permanent address of the pid - Path objRealPath = getExpectedPath(pid, "object", null); + // Get permanent address of the pid object + Path objRealPath = getHashStoreDataObjectPath(pid); // Check to see if object exists if (!Files.exists(objRealPath)) { @@ -1991,7 +1990,7 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug("FileHashStore - deleteObjectByCid: called to delete cid: " + cid); // Get expected path of the cid refs file - Path absCidRefsPath = getExpectedPath(cid, "refs", HashStoreIdTypes.cid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); // Get permanent address of the actual cid String objRelativePath = FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); @@ -2262,7 +2261,7 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // Get permanent address for the given metadata document // All metadata documents for a pid are stored in a directory that is formed // by using the hash of the 'pid', with the file name being the hash of the 'pid+formatId' - Path pathToStoredMetadata = getExpectedPath(pid, "metadata", checkedFormatId); + Path pathToStoredMetadata = getHashStoreMetadataPath(pid, checkedFormatId); // Store metadata to tmpMetadataFile File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( @@ -2319,31 +2318,6 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea } } - /** - * Get the absolute path of a HashStore object, metadata or refs file - * - * @param abpcId Authority-based, persistent or content identifier - * @param entity "object", "metadata" or "refs" - * @param detail Metadata namespace, ref type (pid/cid) or other value - * @return Actual path to object - * @throws IllegalArgumentException If entity is not object or metadata - * @throws NoSuchAlgorithmException If store algorithm is not supported - * @throws IOException If unable to retrieve cid - */ - protected Path getExpectedPath(String abpcId, String entity, String detail) - throws IllegalArgumentException, NoSuchAlgorithmException, IOException { - if (entity.equalsIgnoreCase("object")) { - return getHashStoreDataObjectPath(abpcId); - } else if (entity.equalsIgnoreCase("metadata")) { - return getHashStoreMetadataPath(abpcId, detail); - } else if (entity.equalsIgnoreCase("refs")) { - return getHashStoreRefsPath(abpcId, detail); - } else { - throw new IllegalArgumentException( - "FileHashStore.getExpectedPath - entity must be 'object', 'metadata' or 'refs'"); - } - } - /** * Get the absolute path to a HashStore data object * diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index a81c8618..e7b12e76 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -290,7 +290,7 @@ public void storeObject_overloadInputStreamOnly() throws Exception { fileHashStore.findObject(pid); }); - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertFalse(Files.exists(cidRefsFilePath)); } } @@ -310,7 +310,7 @@ public void storeObject_validateChecksumValue() throws Exception { fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); dataStream.close(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); assertTrue(Files.exists(objCidAbsPath)); } @@ -468,7 +468,7 @@ public void storeObject_duplicate() throws Exception { dataStreamDup.close(); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); } @@ -504,7 +504,7 @@ public void storeObject_largeSparseFile() throws Exception { fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); assertTrue(Files.exists(objCidAbsPath)); } @@ -586,9 +586,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -607,9 +607,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -628,9 +628,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -649,9 +649,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -670,9 +670,9 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -722,7 +722,7 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { // Check cid refs file that every pid is found String cidSha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - Path cidRefsFilePath = fileHashStore.getExpectedPath(cidSha256DigestFromTestData, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cidSha256DigestFromTestData, "cid"); Set stringSet = new HashSet<>(pidModifiedList); List lines = Files.readAllLines(cidRefsFilePath); boolean allFoundPidsFound = true; @@ -758,9 +758,8 @@ public void storeMetadata() throws Exception { metadataStream.close(); // Calculate absolute path - Path metadataPidExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", testFormatId - ); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, testFormatId); assertEquals(metadataPidExpectedPath.toString(), metadataPath); assertTrue(Files.exists(metadataPidExpectedPath)); @@ -784,9 +783,8 @@ public void storeMetadata_defaultFormatId_overload() throws Exception { // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); assertEquals(metadataPidExpectedPath.toString(), metadataPath); assertTrue(Files.exists(metadataPidExpectedPath)); @@ -844,13 +842,11 @@ public void storeMetadata_multipleFormatIds() throws Exception { metadataStreamDup.close(); // Calculate absolute path - Path metadataTestFormatIdExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", testFormatId - ); + Path metadataTestFormatIdExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, testFormatId); String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataDefaultExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path metadataDefaultExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); assertEquals(metadataTestFormatIdExpectedPath.toString(), metadataPath); assertTrue(Files.exists(metadataTestFormatIdExpectedPath)); @@ -982,9 +978,8 @@ public void storeMetadata_metadataLockedIds() throws Exception { metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (IOException | NoSuchAlgorithmException | InterruptedException e) { e.printStackTrace(); @@ -998,9 +993,8 @@ public void storeMetadata_metadataLockedIds() throws Exception { metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (Exception e) { e.printStackTrace(); @@ -1014,9 +1008,8 @@ public void storeMetadata_metadataLockedIds() throws Exception { metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (Exception e) { e.printStackTrace(); @@ -1034,7 +1027,7 @@ public void storeMetadata_metadataLockedIds() throws Exception { // Confirm metadata file is written Path storePath = Paths.get(fhsProperties.getProperty("storePath")); String formatId = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataCidAbsPath = fileHashStore.getExpectedPath(pid, "metadata", formatId); + Path metadataCidAbsPath = fileHashStore.getHashStoreMetadataPath(pid, formatId); assertTrue(Files.exists(metadataCidAbsPath)); // Confirm there are only three files in HashStore - 'hashstore.yaml', the metadata file written @@ -1393,7 +1386,7 @@ public void deleteObject_stringPidAll() throws Exception { metadataStream.close(); InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStreamTwo, pid); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path metadataPath = Paths.get(metadataPathString); Path metadataDefaultPath = Paths.get(metadataDefaultPathString); metadataStreamTwo.close(); @@ -1428,7 +1421,7 @@ public void deleteObject_stringPidNoMetadataDocs() throws Exception { dataStream.close(); // Get metadata file - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); // Confirm expected documents exist assertTrue(Files.exists(objCidAbsPath)); @@ -1454,7 +1447,7 @@ public void deleteObject_pidType_objectDeleted() throws Exception { fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); fileHashStore.deleteObject(fhsDeleteTypePid, pid); // Check that file doesn't exist @@ -1487,8 +1480,8 @@ public void deleteObject_pidType_referencesDeleted() throws Exception { String cid = objInfo.getCid(); // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.deleteObject(fhsDeleteTypePid, pid); assertFalse(Files.exists(absPathPidRefsPath)); assertFalse(Files.exists(absPathCidRefsPath)); @@ -1515,9 +1508,9 @@ public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws E String cid = objInfo.getCid(); fileHashStore.tagObject(pidExtra, cid); - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.deleteObject(fhsDeleteTypePid, pid); assertFalse(Files.exists(absPathPidRefsPath)); @@ -1543,10 +1536,10 @@ public void deleteObject_pidType_pidOrphan() throws Exception { dataStream.close(); String cid = objInfo.getCid(); String pidExtra = "dou.test" + pid; - Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); // Manually change the pid found in the cid refs file - Path absPathCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, "add"); // Create an orphaned pid refs file fileHashStore.updateRefsFile(pid, absPathCidRefsPath, "remove"); @@ -1556,7 +1549,7 @@ public void deleteObject_pidType_pidOrphan() throws Exception { // Confirm cid refs file still exists assertTrue(Files.exists(absPathCidRefsPath)); // Confirm the original (and now orphaned) pid refs file is deleted - Path absPathPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); assertFalse(Files.exists(absPathPidRefsPath)); // Confirm the object has not been deleted assertTrue(Files.exists(objRealPath)); @@ -1618,8 +1611,8 @@ public void deleteObject_orphanRefsFiles() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path absPathCidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path absPathPidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.deleteObject("pid", pid); assertFalse(Files.exists(absPathCidRefsPath)); @@ -1674,10 +1667,10 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { fileHashStore.deleteObject(fhsDeleteTypeCid, cid); // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); assertTrue(Files.exists(objRealPath)); // Confirm cid refs file still exists - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(cidRefsPath)); } } @@ -1748,7 +1741,7 @@ public void deleteMetadata() throws Exception { fileHashStore.deleteMetadata(pid, storeFormatId); // Check that file doesn't exist - Path metadataCidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); + Path metadataCidPath = fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); assertFalse(Files.exists(metadataCidPath)); // Check that parent directories are not deleted @@ -1789,11 +1782,9 @@ public void deleteMetadata_overload() throws Exception { // Check that file doesn't exist String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - Path metadataCidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); - Path metadataCidPathTwo = fileHashStore.getExpectedPath(pid, "metadata", formatIdTwo); - Path metadataCidPathThree = fileHashStore.getExpectedPath( - pid, "metadata", formatIdThree - ); + Path metadataCidPath = fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); + Path metadataCidPathTwo = fileHashStore.getHashStoreMetadataPath(pid, formatIdTwo); + Path metadataCidPathThree = fileHashStore.getHashStoreMetadataPath(pid, formatIdThree); assertFalse(Files.exists(metadataCidPath)); assertFalse(Files.exists(metadataCidPathTwo)); @@ -2041,8 +2032,8 @@ public void findObject_refsPaths() throws Exception { String cidRefsPath = objInfoMap.get("cid_refs_path"); String pidRefsPath = objInfoMap.get("pid_refs_path"); - Path cidRefsFilePath = fileHashStore.getExpectedPath(objInfo.getCid(), "refs", "cid"); - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), "cid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); assertEquals(cidRefsPath, cidRefsFilePath.toString()); assertEquals(pidRefsPath, pidRefsFilePath.toString()); @@ -2077,9 +2068,7 @@ public void findObject_sysmetaPath_exists() throws Exception { String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path sysmetaPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); + Path sysmetaPath = fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); System.out.println(sysmetaPath); assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); @@ -2105,11 +2094,6 @@ public void findObject_sysmetaPath_doesNotExist() throws Exception { Map objInfoMap = fileHashStore.findObject(pid); String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path sysmetaPath = fileHashStore.getExpectedPath( - pid, "metadata", storeMetadataNamespace - ); - assertEquals(objInfoSysmetaPath, "Does not exist"); } } @@ -2139,7 +2123,7 @@ public void findObject_cidRefsFileNotFound() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); Files.delete(cidRefsPath); assertThrows(OrphanPidRefsFileException.class, () -> { @@ -2158,7 +2142,7 @@ public void findObject_cidRefsFileMissingPid() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.updateRefsFile(pid, cidRefsPath, "remove"); assertThrows(PidNotFoundInCidRefsFileException.class, () -> { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 8531beb9..a59c05ec 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -707,7 +707,8 @@ public void putMetadata() throws Exception { // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = fileHashStore.getExpectedPath(pid, "metadata", storeMetadataNamespace); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); assertEquals(metadataPath, metadataPidExpectedPath.toString()); } } @@ -898,7 +899,7 @@ public void isStringInRefsFile_pidFound() throws Exception { dataStreamDup.close(); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); } } @@ -917,7 +918,7 @@ public void isStringInRefsFile_pidNotFound() throws Exception { dataStream.close(); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); } } @@ -970,38 +971,16 @@ public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { fileHashStore.deleteObjectByCid(cid); // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getExpectedPath(pid, "object", null); + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); assertTrue(Files.exists(objRealPath)); } } /** - * Confirm getExpectedPath returns a file path that exists + * Confirm getHashStoreDataObjectPath returns correct object path */ @Test - public void getExpectedPath() throws Exception { - // Get single test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - String cid = objInfo.getCid(); - - Path objCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); - assertTrue(Files.exists(objCidAbsPath)); - assertTrue(Files.exists(pidRefsPath)); - assertTrue(Files.exists(cidRefsPath)); - } - - /** - * Confirm getExpectedPath returns correct object path - */ - @Test - public void getExpectedPath_objectPath() throws Exception { + public void getHashStoreDataObjectPath() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1018,17 +997,17 @@ public void getExpectedPath_objectPath() throws Exception { String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); - Path expectedObjCidAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path expectedObjCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); assertEquals(expectedObjCidAbsPath, calculatedObjRealPath); } } /** - * Confirm getExpectedPath returns correct metadata path + * Confirm getHashStoreMetadataPath returns correct metadata path */ @Test - public void getExpectedPath_metadataPath() throws Exception { + public void getHashStoreMetadataPath() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); @@ -1058,17 +1037,18 @@ public void getExpectedPath_metadataPath() throws Exception { Path calculatedMetadataRealPath = storePath.resolve("metadata").resolve(metadataPidDirIdSharded).resolve(hashId); - Path expectedMetadataPidPath = fileHashStore.getExpectedPath(pid, "metadata", storeFormatId); + Path expectedMetadataPidPath = + fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); assertEquals(expectedMetadataPidPath, calculatedMetadataRealPath); } } /** - * Confirm getExpectedPath returns correct pid refs path + * Confirm getHashStoreRefsPath returns correct pid refs path */ @Test - public void getExpectedPath_pidRefsPaths() throws Exception { + public void getHashStoreRefsPath_pid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1092,17 +1072,17 @@ public void getExpectedPath_pidRefsPaths() throws Exception { Path calculatedPidRefsRealPath = storePath.resolve("refs/pids").resolve(metadataPidHashSharded); - Path expectedPidRefsPath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path expectedPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); assertEquals(expectedPidRefsPath, calculatedPidRefsRealPath); } } /** - * Confirm getExpectedPath returns correct cid refs path + * Confirm getHashStoreRefsPath returns correct cid refs path */ @Test - public void getExpectedPath_cidRefsPaths() throws Exception { + public void getHashStoreRefsPath_cid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1121,33 +1101,33 @@ public void getExpectedPath_cidRefsPaths() throws Exception { String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path calculatedCidRefsRealPath = storePath.resolve("refs/cids").resolve(objShardString); - Path expectedCidRefsPath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path expectedCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertEquals(expectedCidRefsPath, calculatedCidRefsRealPath); } } /** - * Confirm getExpectedPath throws exception when requesting the path to a refs file with a + * Confirm getHashStoreRefsPath throws exception when requesting the path to a refs file with a * formatId arg that is not "cid" or "pid" */ @Test - public void getExpectedPath_incorrectRefsFormatId() { + public void getHashStoreRefsPath_incorrectRefsType() { assertThrows(IllegalArgumentException.class, () -> { String cid = "testcid"; - fileHashStore.getExpectedPath(cid, "refs", "not_cid_or_pid"); + fileHashStore.getHashStoreRefsPath(cid, "not_cid_or_pid"); }); } /** - * Confirm getExpectedPath throws exception when requesting path for an object that does not - * exist + * Confirm getHashStoreDataObjectPath throws exception when requesting path for an object + * that does not exist */ @Test - public void getExpectedPath_fileNotFound() { + public void getHashStoreDataObjectPath_fileNotFound() { assertThrows(FileNotFoundException.class, () -> { String pid = "dou.test.1"; - fileHashStore.getExpectedPath(pid, "object", null); + fileHashStore.getHashStoreDataObjectPath(pid); }); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index bea3f890..648c4af5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -101,7 +101,7 @@ public void tagObject_pidRefsFileContent() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); assertTrue(Files.exists(pidRefsFilePath)); String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); @@ -117,7 +117,7 @@ public void tagObject_cidRefsFileContent() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(cidRefsFilePath)); String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); @@ -177,9 +177,8 @@ public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound String cidForOrphanPidRef = "987654321fedcba"; // Create orphaned pid refs file - Path absPidRefsPath = fileHashStore.getExpectedPath( - pid, "refs", HashStoreIdTypes.pid.getName() - ); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); File pidRefsTmpFile = fileHashStore.writeRefsFile( cidForOrphanPidRef, HashStoreIdTypes.pid.getName() ); @@ -204,7 +203,7 @@ public void tagObject_pidRefsFileFound_cidRefsFileNotFound() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); // Manually delete the cid refs file - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); Files.delete(cidRefsFilePath); fileHashStore.tagObject(pid, cid); @@ -230,13 +229,11 @@ public void tagObject_pidRefsFileNotFound_cidRefsFileFound() throws Exception { fileHashStore.tagObject(pidAdditional, cid); // Confirm missing pid refs file has been created - Path pidAdditionalRefsFilePath = fileHashStore.getExpectedPath( - pidAdditional, "refs", "pid" - ); + Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); assertTrue(Files.exists(pidAdditionalRefsFilePath)); // Check cid refs file - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( pidAdditional, cidRefsFilePath ); @@ -278,7 +275,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertThrows(CidNotFoundInPidRefsFileException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); @@ -300,7 +297,7 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); // Get path of the pid refs file - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); assertThrows(PidNotFoundInCidRefsFileException.class, () -> { fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); @@ -317,7 +314,7 @@ public void updateRefsFile_content() throws Exception { fileHashStore.tagObject(pid, cid); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); String pidAdditional = "dou.test.2"; fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "add"); @@ -346,7 +343,7 @@ public void deleteRefsFile_fileDeleted() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); fileHashStore.deleteRefsFile(pidRefsFilePath); assertFalse(Files.exists(pidRefsFilePath)); @@ -360,7 +357,7 @@ public void deletePidRefsFile_missingPidRefsFile() { String pid = "dou.test.1"; assertThrows(FileNotFoundException.class, () -> { - Path pidRefsFilePath = fileHashStore.getExpectedPath(pid, "refs", "pid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); fileHashStore.deleteRefsFile(pidRefsFilePath); }); } @@ -376,7 +373,7 @@ public void deleteCidRefsPid_pidRemoved() throws Exception { String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); @@ -393,7 +390,7 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { fileHashStore.tagObject(pid, cid); String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getExpectedPath(cid, "refs", "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "remove"); From 076e7a3ef6d1094ad546dcd006c402551dd44f73 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 13:54:45 -0700 Subject: [PATCH 335/553] Remove 'deleteObject(String, String)' overload method that was leaking implementation details, remove implemented method in 'FileHashStore' and revise junit tests --- .../java/org/dataone/hashstore/HashStore.java | 25 +- .../dataone/hashstore/HashStoreClient.java | 7 +- .../filehashstore/FileHashStore.java | 393 ++++++++---------- .../dataone/hashstore/HashStoreRunnable.java | 2 +- .../FileHashStoreInterfaceTest.java | 82 ++-- 5 files changed, 208 insertions(+), 301 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 387a3c58..5a21f552 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -224,17 +224,11 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** - * Deletes an object and its related data permanently from HashStore using a given - * persistent identifier. If the `idType` is 'pid', the object associated with the pid will - * be deleted if it is not referenced by any other pids, along with its reference files and - * all metadata documents found in its respective metadata directory. If the `idType` is - * 'cid', only the object will be deleted if it is not referenced by other pids. - * - * Notes: All objects are renamed at their existing path with a '_deleted' appended - * to their file name before they are deleted. + * Deletes an object and all relevant associated files (ex. system metadata, reference + * files, etc.) based on a given pid. If other pids still reference the pid's associated + * object, the object will not be deleted. * - * @param idType 'pid' or 'cid' - * @param id Authority-based identifier or content identifier + * @param pid Authority-based identifier * @throws IllegalArgumentException When pid is null or empty * @throws IOException I/O error when deleting empty directories, * modifying/deleting reference files @@ -242,17 +236,6 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * address is not supported * @throws InterruptedException When deletion synchronization is interrupted */ - public void deleteObject(String idType, String id) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException, InterruptedException; - - /** - * Deletes an object and all relevant associated files (ex. system metadata, reference - * files, etc.) based on a given pid. If other pids still reference the pid's associated - * object, the object will not be deleted. - * - * @param pid Authority-based identifier - * @see #deleteObject(String, String) for more details. - */ public void deleteObject(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index 6f9d9c69..a89f60fc 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -21,7 +21,6 @@ import java.sql.Statement; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; -import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -245,8 +244,7 @@ public static void main(String[] args) throws Exception { String pid = cmd.getOptionValue("pid"); FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - String deleteIdType = HashStoreIdTypes.pid.getName(); - hashStore.deleteObject(deleteIdType, pid); + hashStore.deleteObject(pid); System.out.println("Object for pid (" + pid + ") has been deleted."); } else if (cmd.hasOption("deletemetadata")) { @@ -718,8 +716,7 @@ private static void deleteObjectsFromStore(List> resultObjLi // Delete object System.out.println("Deleting object for guid: " + guid); - String deleteIdType = HashStoreIdTypes.pid.getName(); - hashStore.deleteObject(deleteIdType, guid); + hashStore.deleteObject(guid); } catch (FileNotFoundException fnfe) { String errMsg = "Unexpected Error: " + fnfe.fillInStackTrace(); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7a1e88e4..e139db06 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1062,259 +1062,220 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, } @Override - public void deleteObject(String idType, String id) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException { + public void deleteObject(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException { logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete object for id: " + id + "(" + idType - + ")"); + "FileHashStore.deleteObject - Called to delete object for pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(id, "id", "deleteObject"); - FileHashStoreUtility.checkForEmptyString(id, "id", "deleteObject"); - if (!idType.equals(HashStoreIdTypes.pid.getName()) && !idType.equals( - HashStoreIdTypes.cid.getName() + FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); + FileHashStoreUtility.checkForEmptyString(pid, "id", "deleteObject"); - )) { - String errMsg = "FileHashStore.deleteObject - 'idType' must be 'pid' or 'cid'"; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } - - // If 'idType' is cid, attempt to delete the object directly without checking refs files - if (idType.equals(HashStoreIdTypes.cid.getName())) { - // The following method is synchronized based on cids - deleteObjectByCid(id); + // Else 'idType' is pid + String cid; + List deleteList = new ArrayList<>(); - } else { - // Else 'idType' is pid - String cid; - String pid = id; - List deleteList = new ArrayList<>(); - - // Storing and deleting objects are synchronized together - // Duplicate store object requests for a pid are rejected, but deleting an object - // will wait for a pid to be released if it's found to be in use before proceeding. - synchronized (objectLockedIds) { - while (objectLockedIds.contains(pid)) { - try { - objectLockedIds.wait(TIME_OUT_MILLISEC); + // Storing and deleting objects are synchronized together + // Duplicate store object requests for a pid are rejected, but deleting an object + // will wait for a pid to be released if it's found to be in use before proceeding. + synchronized (objectLockedIds) { + while (objectLockedIds.contains(pid)) { + try { + objectLockedIds.wait(TIME_OUT_MILLISEC); - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - Delete request for pid: " + pid - + " has been interrupted."; - logFileHashStore.warn(errMsg); - throw new InterruptedException(errMsg); - } + } catch (InterruptedException ie) { + String errMsg = "FileHashStore.deleteObject - Delete request for pid: " + pid + + " has been interrupted."; + logFileHashStore.warn(errMsg); + throw new InterruptedException(errMsg); } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing objectLockedIds for pid: " + pid); - objectLockedIds.add(pid); } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing objectLockedIds for pid: " + pid); + objectLockedIds.add(pid); + } + try { + // Before we begin deletion process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. try { - // Before we begin deletion process, we look for the `cid` by calling - // `findObject` which will throw custom exceptions if there is an issue with - // the reference files, which help us determine the path to proceed with. - try { - Map objInfoMap = findObject(id); - cid = objInfoMap.get("cid"); - - // If no exceptions are thrown, we proceed to synchronization based on the `cid` - // Multiple threads may access the cid reference file (which contains a list of - // `pid`s that reference a `cid`) and this needs to be coordinated. Otherwise, - // we will run into a `OverlappingFileLockException` - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " - + pid + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " - + "pid: " - + pid + " with cid: " + cid); - referenceLockedCids.add(cid); - } - - try { - // Proceed with comprehensive deletion - cid exists, nothing out of place - // Get all the required paths to streamline deletion process - // Permanent address of the object - Path objRealPath = getHashStoreDataObjectPath(pid); - // Cid refs file - Path absCidRefsPath = - getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - // Pid refs file - Path absPidRefsPath = - getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - - // Rename pid refs file to prepare for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file - updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file **only** if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Rename empty cid refs file to prepare for deletion - deleteList.add( - FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - // Rename actual object to prepare for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); - } else { - String warnMsg = - "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object " - + "deletion."; - logFileHashStore.warn(warnMsg); - } - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - // Remove metadata files - deleteMetadata(pid); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath); - - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for " - + "pid: " - + pid + " with cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); + Map objInfoMap = findObject(pid); + cid = objInfoMap.get("cid"); + + // If no exceptions are thrown, we proceed to synchronization based on the `cid` + // Multiple threads may access the cid reference file (which contains a list of + // `pid`s that reference a `cid`) and this needs to be coordinated. Otherwise, + // we will run into a `OverlappingFileLockException` + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } } + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cid); + referenceLockedCids.add(cid); + } - } catch (OrphanPidRefsFileException oprfe) { - // `findObject` throws this exception when the cid refs file doesn't exist, - // so we only need to delete the pid refs file and related metadata documents - - // Begin by renaming pid refs file for deletion - Path absPidRefsPath = - getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + try { + // Proceed with comprehensive deletion - cid exists, nothing out of place + // Get all the required paths to streamline deletion process + // Permanent address of the object + Path objRealPath = getHashStoreDataObjectPath(pid); + // Cid refs file + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + // Pid refs file + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + // Rename pid refs file to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete obj and cid refs file **only** if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Rename empty cid refs file to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + // Rename actual object to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + } else { + String warnMsg = + "FileHashStore.deleteObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + + "deletion."; + logFileHashStore.warn(warnMsg); + } + // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); // Remove metadata files deleteMetadata(pid); - String warnMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid - + ". Deleted orphan pid refs file and metadata."; - logFileHashStore.warn(warnMsg); - - } catch (OrphanRefsFilesException orfe) { - // `findObject` throws this exception when the pid and cid refs file exists, - // but the actual object being referenced by the pid does not exist - - // Get the cid from the pid refs file before renaming it for deletion - Path absPidRefsPath = - getHashStoreRefsPath(id, HashStoreIdTypes.pid.getName()); - String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + logFileHashStore.info( + "FileHashStore.deleteObject - File and references deleted for: " + pid + + " with object address: " + objRealPath); - // Since we must access the cid reference file, the `cid` must be synchronized + } finally { + // Release lock synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cidRead)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " - + pid + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cidRead); - referenceLockedCids.add(cidRead); + "FileHashStore.deleteObject - Releasing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); } + } - try { - // Rename pid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - // Remove the pid from the cid refs file - Path absCidRefsPath = - getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); - updateRefsFile(pid, absCidRefsPath, "remove"); - // Add the cid reference file to deleteList if it's now empty - if (Files.size(absCidRefsPath) == 0) { - deleteList.add( - FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - // Remove metadata files - deleteMetadata(pid); - String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead - + " does not exist, but pid and cid reference file found for pid: " - + pid + ". Deleted pid and cid ref files and metadata."; - logFileHashStore.warn(warnMsg); + } catch (OrphanPidRefsFileException oprfe) { + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file and related metadata documents + + // Begin by renaming pid refs file for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + // Remove metadata files + deleteMetadata(pid); + String warnMsg = + "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file and metadata."; + logFileHashStore.warn(warnMsg); - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cidRead); - referenceLockedCids.remove(cidRead); - referenceLockedCids.notify(); + } catch (OrphanRefsFilesException orfe) { + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + + // Get the cid from the pid refs file before renaming it for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + + // Since we must access the cid reference file, the `cid` must be synchronized + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cidRead)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.deleteObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); } } - } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // `findObject` throws this exception when both the pid and cid refs file exists - // but the pid is not found in the cid refs file. + logFileHashStore.debug( + "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cidRead); + referenceLockedCids.add(cidRead); + } + try { // Rename pid refs file for deletion - Path absPidRefsPath = - getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + // Remove the pid from the cid refs file + Path absCidRefsPath = + getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); + updateRefsFile(pid, absCidRefsPath, "remove"); + // Add the cid reference file to deleteList if it's now empty + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } // Delete items FileHashStoreUtility.deleteListItems(deleteList); // Remove metadata files deleteMetadata(pid); - String warnMsg = - "FileHashStore.deleteObject - Pid not found in expected cid refs file for" - + " pid: " + pid + ". Deleted orphan pid refs file and metadata."; + String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cidRead); + referenceLockedCids.remove(cidRead); + referenceLockedCids.notify(); + } } - } finally { - // Release lock on the pid - synchronized (objectLockedIds) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing objectLockedIds for pid: " + pid); - objectLockedIds.remove(pid); - objectLockedIds.notify(); - } + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. + + // Rename pid refs file for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + // Remove metadata files + deleteMetadata(pid); + String warnMsg = + "FileHashStore.deleteObject - Pid not found in expected cid refs file for" + + " pid: " + pid + ". Deleted orphan pid refs file and metadata."; + logFileHashStore.warn(warnMsg); + } + } finally { + // Release lock on the pid + synchronized (objectLockedIds) { + logFileHashStore.debug( + "FileHashStore.deleteObject - Releasing objectLockedIds for pid: " + pid); + objectLockedIds.remove(pid); + objectLockedIds.notify(); } } } - - @Override - public void deleteObject(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException { - logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete all associated docs for pid: " + pid - ); - deleteObject(HashStoreIdTypes.pid.getName(), pid); - logFileHashStore.info( - "FileHashStore.deleteObject - Object, references and metadata deleted for: " + pid - ); - } - @Override public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 168b012a..a5b984d4 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -57,7 +57,7 @@ public void run() { break; case deleteObject: try { - hashstore.deleteObject("pid", pid); + hashstore.deleteObject(pid); } catch (Exception e) { throw new HashStoreServiceException(e.getMessage()); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index e7b12e76..95362f4c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -39,7 +39,6 @@ import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -56,8 +55,6 @@ public class FileHashStoreInterfaceTest { private Properties fhsProperties; private Path rootDirectory; private static final TestDataHarness testData = new TestDataHarness(); - private String fhsDeleteTypePid; - private String fhsDeleteTypeCid; /** * Initialize FileHashStore before each test to creates tmp directories @@ -65,8 +62,6 @@ public class FileHashStoreInterfaceTest { @BeforeEach public void initializeFileHashStore() { rootDirectory = tempFolder.resolve("hashstore"); - fhsDeleteTypePid = HashStoreIdTypes.pid.getName(); - fhsDeleteTypeCid = HashStoreIdTypes.cid.getName(); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -1340,34 +1335,10 @@ public void retrieveMetadata_verifyContent() throws Exception { } /** - * Confirm that deleteObject throws exceptions when not using HashStoreIdTypes + * Confirm that deleteObject deletes objects and all metadata documents. */ @Test - public void deleteObject_invalidIdTypes() { - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject( - "PID", "dou.2023.hashstore.1" - ) - ); - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject( - "CID", "dou.2023.hashstore.1" - ) - ); - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject( - "bad.value", "dou.2023.hashstore.1" - ) - ); - } - - - /** - * Confirm that deleteObject overload method with signature (String pid) deletes objects - * and all metadata documents. - */ - @Test - public void deleteObject_stringPidAll() throws Exception { + public void deleteObject_dataObjAndMetadataDocs() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1438,7 +1409,7 @@ public void deleteObject_stringPidNoMetadataDocs() throws Exception { * Confirm that deleteObject deletes object */ @Test - public void deleteObject_pidType_objectDeleted() throws Exception { + public void deleteObject_objectDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1448,7 +1419,7 @@ public void deleteObject_pidType_objectDeleted() throws Exception { dataStream.close(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - fileHashStore.deleteObject(fhsDeleteTypePid, pid); + fileHashStore.deleteObject(pid); // Check that file doesn't exist assertFalse(Files.exists(objCidAbsPath)); @@ -1467,7 +1438,7 @@ public void deleteObject_pidType_objectDeleted() throws Exception { * Confirm that deleteObject deletes reference files */ @Test - public void deleteObject_pidType_referencesDeleted() throws Exception { + public void deleteObject_referencesDeleted() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1482,7 +1453,7 @@ public void deleteObject_pidType_referencesDeleted() throws Exception { // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.deleteObject(fhsDeleteTypePid, pid); + fileHashStore.deleteObject(pid); assertFalse(Files.exists(absPathPidRefsPath)); assertFalse(Files.exists(absPathCidRefsPath)); } @@ -1494,7 +1465,7 @@ public void deleteObject_pidType_referencesDeleted() throws Exception { * has references). */ @Test - public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws Exception { + public void deleteObject_cidRefsFileNotEmptyObjectExistsStill() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1511,7 +1482,7 @@ public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws E Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.deleteObject(fhsDeleteTypePid, pid); + fileHashStore.deleteObject(pid); assertFalse(Files.exists(absPathPidRefsPath)); assertTrue(Files.exists(objCidAbsPath)); @@ -1524,7 +1495,7 @@ public void deleteObject_pidType_CidRefsFileNotEmptyObjectExistsStill() throws E * file does not contain the expected pid. */ @Test - public void deleteObject_pidType_pidOrphan() throws Exception { + public void deleteObject_pidOrphan() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1544,7 +1515,7 @@ public void deleteObject_pidType_pidOrphan() throws Exception { // Create an orphaned pid refs file fileHashStore.updateRefsFile(pid, absPathCidRefsPath, "remove"); - fileHashStore.deleteObject(fhsDeleteTypePid, pid); + fileHashStore.deleteObject(pid); // Confirm cid refs file still exists assertTrue(Files.exists(absPathCidRefsPath)); @@ -1562,11 +1533,9 @@ public void deleteObject_pidType_pidOrphan() throws Exception { * Confirm that deleteObject throws exception when associated pid obj not found */ @Test - public void deleteObject_pidType_NotFoundPid() { + public void deleteObject_pidNotFound() { assertThrows( - FileNotFoundException.class, () -> fileHashStore.deleteObject( - fhsDeleteTypePid, "dou.2023.hashstore.1" - ) + FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1") ); } @@ -1574,9 +1543,9 @@ public void deleteObject_pidType_NotFoundPid() { * Confirm that deleteObject throws exception when pid is null */ @Test - public void deleteObject_idNull() { + public void deleteObject_pidNull() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject(fhsDeleteTypePid, null) + IllegalArgumentException.class, () -> fileHashStore.deleteObject(null) ); } @@ -1584,9 +1553,9 @@ public void deleteObject_idNull() { * Confirm that deleteObject throws exception when pid is empty */ @Test - public void deleteObject_idEmpty() { + public void deleteObject_pidEmpty() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject(fhsDeleteTypePid, "") + IllegalArgumentException.class, () -> fileHashStore.deleteObject("") ); } @@ -1594,11 +1563,8 @@ public void deleteObject_idEmpty() { * Confirm that deleteObject throws exception when pid is empty spaces */ @Test - public void deleteObject_idEmptySpaces() { - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject( - fhsDeleteTypePid, " " - ) + public void deleteObject_pidEmptySpaces() { + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(" ") ); } @@ -1614,16 +1580,16 @@ public void deleteObject_orphanRefsFiles() throws Exception { Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.deleteObject("pid", pid); + fileHashStore.deleteObject(pid); assertFalse(Files.exists(absPathCidRefsPath)); assertFalse(Files.exists(absPathPidRefsPath)); } /** - * Confirm deleteObject with idType 'cid' deletes cid object + * Confirm deleteObjectByCid deletes cid object */ @Test - public void deleteObject_cidType() throws Exception { + public void deleteObjectByCid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1633,7 +1599,7 @@ public void deleteObject_cidType() throws Exception { dataStream.close(); String cid = objInfo.getCid(); - fileHashStore.deleteObject(fhsDeleteTypeCid, cid); + fileHashStore.deleteObjectByCid(cid); // Get permanent address of the actual cid int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -1648,7 +1614,7 @@ public void deleteObject_cidType() throws Exception { } /** - * Confirm deleteObject with idType 'cid' does not delete an object because a cid refs file + * Confirm deleteObjectByCid does not delete an object because a cid refs file * exists (there are still pids referencing the object) */ @Test @@ -1664,7 +1630,7 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { dataStream.close(); String cid = objInfo.getCid(); - fileHashStore.deleteObject(fhsDeleteTypeCid, cid); + fileHashStore.deleteObjectByCid(cid); // Get permanent address of the actual cid Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); From 52a7568bd113d0405059a67ff11a19386277c039 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 14:24:18 -0700 Subject: [PATCH 336/553] Update 'HashStore' interface javadocs for 'verifyObject' --- src/main/java/org/dataone/hashstore/HashStore.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 5a21f552..851636d3 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -117,6 +117,10 @@ public void tagObject(String pid, String cid) throws IOException, * @throws NonMatchingObjSizeException Given size =/= objMeta size value * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value * @throws UnsupportedHashAlgorithmException Given algo is not found or supported + * @throws NoSuchAlgorithmException When 'deleteInvalidObject' is true and an algo + * used to get a cid refs file is not supported + * @throws InterruptedException When 'deleteInvalidObject' is true and an issue + * with coordinating deleting objects occurs * @throws IOException Issue with recalculating supported algo for * checksum not found */ @@ -124,7 +128,8 @@ public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, boolean deleteInvalidObject) throws NonMatchingObjSizeException, NonMatchingChecksumException, - UnsupportedHashAlgorithmException, IOException; + UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, + IOException; /** * Checks whether an object referenced by a pid exists and returns a map containing the From 9b65a372cc8084295321f7b9bbb04d4fe408010b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 14:26:31 -0700 Subject: [PATCH 337/553] Implement updated 'verifyObject' functionality when boolean 'deleteInvalidObject' is true, and revise/add junit tests --- .../filehashstore/FileHashStore.java | 13 +- .../FileHashStoreReferencesTest.java | 132 +++++++++++++++++- 2 files changed, 138 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e139db06..bd0df87d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -686,9 +686,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi @Override public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, - boolean deleteInvalidObject - ) throws NonMatchingObjSizeException, NonMatchingChecksumException, - UnsupportedHashAlgorithmException, IOException { + boolean deleteInvalidObject) + throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, + IOException { logFileHashStore.debug( "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() ); @@ -733,6 +734,9 @@ public void verifyObject( } // Validate checksum if (!digestFromHexDigests.equals(checksum)) { + if (deleteInvalidObject) { + deleteObjectByCid(objCid); + } String errMsg = "FileHashStore.verifyObject - Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; @@ -741,6 +745,9 @@ public void verifyObject( } // Validate size if (objInfoRetrievedSize != objSize) { + if (deleteInvalidObject) { + deleteObjectByCid(objCid); + } String errMsg = "FileHashStore.verifyObject - Object size invalid for cid: " + objCid + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize; logFileHashStore.error(errMsg); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 648c4af5..68259e7b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -419,8 +419,18 @@ public void verifyObject_correctValues() throws Exception { long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true ); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); } } @@ -443,8 +453,18 @@ public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); fileHashStore.verifyObject( - objInfo, expectedChecksum, "MD2", expectedSize, false + objInfo, expectedChecksum, "MD2", expectedSize, true + ); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); } } @@ -467,6 +487,16 @@ public void verifyObject_unsupportedAlgo() throws Exception { objInfo, "ValueNotRelevant", "BLAKE2S", 1000, false ); }); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); } } @@ -474,7 +504,7 @@ public void verifyObject_unsupportedAlgo() throws Exception { * Check that verifyObject throws exception when non-matching size value provided */ @Test - public void verifyObject_mismatchedValuesNonMatchingSize() throws Exception { + public void verifyObject_mismatchedSize() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -494,6 +524,16 @@ public void verifyObject_mismatchedValuesNonMatchingSize() throws Exception { objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false ); }); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); } } @@ -501,7 +541,7 @@ public void verifyObject_mismatchedValuesNonMatchingSize() throws Exception { * Check that verifyObject throws exception with non-matching checksum value */ @Test - public void verifyObject_mismatchedValuesNonMatchingChecksum() throws Exception { + public void verifyObject_mismatchedChecksum() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -521,6 +561,90 @@ public void verifyObject_mismatchedValuesNonMatchingChecksum() throws Exception objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false ); }); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject throws exception when non-matching size value provided + */ + @Test + public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = 123456789; + + assertThrows(NonMatchingObjSizeException.class, () -> { + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true + ); + }); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject throws exception with non-matching checksum value + */ + @Test + public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = "intentionallyWrongValue"; + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + assertThrows(NonMatchingChecksumException.class, () -> { + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true + ); + }); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); } } } From f17c967ab852a80a62cbdf91b6327d7b7b467f5f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 14:44:22 -0700 Subject: [PATCH 338/553] Cleanup 'validateTmpObject' javadoc --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bd0df87d..bf1d57f9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1579,7 +1579,6 @@ protected ObjectMetadata putObject( * @param objSize Expected size of object * @param storedObjFileSize Actual size of object stored * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent - * @throws IOException Issue with deleting tmpFile */ private void validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, From 2f7b790f6272f384128fecdce8c0a9f1efb47200 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 14:51:57 -0700 Subject: [PATCH 339/553] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c0ba9c5a..6036e7d4 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ To create or interact with a HashStore, instantiate a HashStore object with the - storeAlgorithm - storeMetadataNamespace -```java +``` String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; Path rootDirectory = tempFolder.resolve("metacat"); @@ -70,7 +70,7 @@ hashStore.storeObject(stream, pid) In HashStore, objects are first saved as temporary files while their content identifiers are calculated. Once the default hash algorithm list and their hashes are generated, objects are stored in their permanent location using the store's algorithm's corresponding hash value, the store depth and the store width. Lastly, reference files are created for the object so that they can be found and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored once and only once. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: -```java +``` // All-in-one process which stores, validates and tags an object objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) @@ -78,7 +78,7 @@ objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, chec // Store object objectMetadata objInfo = storeObject(InputStream) // Validate object, returns False if there is a mismatch and deletes the associated file -verifyObject(objInfo, checksum, checksumAlgorithn, objSize) +verifyObject(objInfo, checksum, checksumAlgorithn, objSize, true) // Tag object, makes the object discoverable (find, retrieve, delete) tagObject(pid, cid) ``` From 9b4d57cd3cd2bb82ca2ae1ada65ba0fb134c4341 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 15:04:29 -0700 Subject: [PATCH 340/553] Revise 'junit' test for 'RunnableTest' to check runtime for number of CPUs available to configure threads --- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 95362f4c..25cf5f4b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1655,7 +1655,9 @@ public void deleteObject_50Pids_1Obj_viaRunnable() throws Exception { pidModifiedList.add(pid + ".dou.test." + i); } - ExecutorService executorService = Executors.newFixedThreadPool(5); + Runtime runtime = Runtime.getRuntime(); + int numCores = runtime.availableProcessors(); + ExecutorService executorService = Executors.newFixedThreadPool(numCores); // Store 50 for (String pidAdjusted : pidModifiedList) { From 25a970b704c41ff5fa88cff34b40efab5d58300d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 4 Jul 2024 15:11:44 -0700 Subject: [PATCH 341/553] Update 'deleteObject' Runnable test to check for 1000 pids instead of 50 --- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 25cf5f4b..36188c65 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1645,13 +1645,13 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { * Test deleteObject synchronization using a Runnable class */ @Test - public void deleteObject_50Pids_1Obj_viaRunnable() throws Exception { + public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Get single test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); List pidModifiedList = new ArrayList<>(); - for (int i = 1; i <= 50; i++) { + for (int i = 1; i <= 1000; i++) { pidModifiedList.add(pid + ".dou.test." + i); } From 2734f03e4d85247bf999ac120ad93c61494d1821 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 5 Jul 2024 11:43:18 -0700 Subject: [PATCH 342/553] Update 'HashStoreRunnable' test class exception messaging to 'e.getCause()' instead of 'e.getMessage()' --- .../java/org/dataone/hashstore/HashStoreRunnable.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index a5b984d4..8b62c77e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -51,7 +51,9 @@ public void run() { try { hashstore.storeObject(objStream, pid, null, null, null, -1); } catch (Exception e) { - throw new HashStoreServiceException(e.getMessage()); + String errMsg = + "HashStoreRunnableUnexpectedError - storeObject: " + e.getCause(); + throw new HashStoreServiceException(errMsg); } objStream.close(); break; @@ -59,7 +61,9 @@ public void run() { try { hashstore.deleteObject(pid); } catch (Exception e) { - throw new HashStoreServiceException(e.getMessage()); + String errMsg = + "HashStoreRunnableUnexpectedError - deleteObject: " + e.getCause(); + throw new HashStoreServiceException(errMsg); } break; } From 9a99113333461a69dd8483a54f2ee4b7ef2730e0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 5 Jul 2024 11:54:28 -0700 Subject: [PATCH 343/553] Clean up 'HashStoreRunnable' and add/revise logging statements --- .../dataone/hashstore/HashStoreRunnable.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 8b62c77e..9fe924ea 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -3,6 +3,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.exceptions.HashStoreServiceException; +import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import java.io.IOException; @@ -14,6 +15,7 @@ * provided by the Executor service. */ public class HashStoreRunnable implements Runnable { + private static final Log logHashStoreRunnable = LogFactory.getLog(HashStoreRunnable.class); public static final int storeObject = 1; public static final int deleteObject = 2; private HashStore hashstore; @@ -21,8 +23,6 @@ public class HashStoreRunnable implements Runnable { private String pid; private InputStream objStream; - private static final Log logHssr = LogFactory.getLog(HashStoreRunnable.class); - public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", @@ -44,7 +44,7 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { } public void run() { - logHssr.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); + logHashStoreRunnable.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); try { switch (publicAPIMethod) { case storeObject: @@ -52,7 +52,8 @@ public void run() { hashstore.storeObject(objStream, pid, null, null, null, -1); } catch (Exception e) { String errMsg = - "HashStoreRunnableUnexpectedError - storeObject: " + e.getCause(); + "HashStoreRunnable: UnexpectedError - storeObject: " + e.getCause(); + logHashStoreRunnable.error(errMsg); throw new HashStoreServiceException(errMsg); } objStream.close(); @@ -62,15 +63,14 @@ public void run() { hashstore.deleteObject(pid); } catch (Exception e) { String errMsg = - "HashStoreRunnableUnexpectedError - deleteObject: " + e.getCause(); + "HashStoreRunnable: UnexpectedError - deleteObject: " + e.getCause(); + logHashStoreRunnable.error(errMsg); throw new HashStoreServiceException(errMsg); } break; } - } catch (HashStoreServiceException hse) { - logHssr.error("HashStoreServiceRequest - Error: " + hse.getMessage()); - } catch (IOException ioe) { - logHssr.error("HashStoreServiceRequest - Error: " + ioe.getMessage()); + } catch (HashStoreServiceException | IOException hse) { + logHashStoreRunnable.error("HashStoreServiceRequest - Error: " + hse.getMessage()); } } } From 9e1d9e226ddfc0112c5b0ac10d56d9ee7fb191f2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 8 Jul 2024 17:09:41 -0700 Subject: [PATCH 344/553] Extract new protected method 'storeHashStoreRefsFiles' from 'tagObject' --- .../filehashstore/FileHashStore.java | 170 ++++++++++-------- 1 file changed, 91 insertions(+), 79 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index bf1d57f9..eff5aaa1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -590,97 +590,109 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } try { - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + storeHashStoreRefsFiles(pid, cid); - // Both files found, confirm that reference files are where they are expected to be - if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - // We throw an exception so the client is aware that everything is in place - String errMsg = "FileHashStore.tagObject - Object with cid: " + cid - + " already exists and is tagged with pid: " + pid; - logFileHashStore.error(errMsg); - throw new HashStoreRefsAlreadyExistException(errMsg); - - } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { - // If pid refs exists, it can only contain and reference one cid - // First, compare the cid retrieved from the pid refs file from the supplied cid - String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); - if (retrievedCid.equalsIgnoreCase(cid)) { - // The pid correctly references the cid, but the cid refs file is missing - // Create the file and verify tagging process - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "FileHashStore.tagObject - Pid refs file exists for pid: " + pid - + ", but cid refs file for: " + cid - + " is missing. Missing cid refs file created and tagging completed." - ); - return; - } else { - // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = getHashStoreRefsPath( - retrievedCid, HashStoreIdTypes.cid.getName() - ); - if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( - pid, retrievedAbsCidRefsPath - )) { - // This pid is accounted for and tagged as expected. - String errMsg = - "FileHashStore.tagObject - pid refs file already exists for pid: " + pid - + ", and the associated cid refs file contains the pid." - + " A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - } - // Orphaned pid refs file found, the retrieved cid refs file exists - // but doesn't contain the cid. Proceed to overwrite the pid refs file. - // There is no return statement, so we move out of this if block. - } - } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Only update cid refs file if pid is not in the file - if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, "add"); - } - // Get the pid refs file and verify tagging process - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); - File absPathPidRefsFile = absPidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid + + " with cid: " + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); + } + } + } + + /** + * Create the pid and cid references files in HashStore to establish the relationship + * between a 'pid' and a 'cid' + * + * @param pid Persistent or authority-based identifier + * @param cid Content identifier + * @throws NoSuchAlgorithmException If there is an issue related to calculating hashes + * @throws IOException If there is an issue reading/writing a refs file + */ + protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgorithmException, + IOException { + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + + // Both files found, confirm that reference files are where they are expected to be + if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + // We throw an exception so the client is aware that everything is in place + String errMsg = "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid + + " already exists and is tagged with pid: " + pid; + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { + // If pid refs exists, it can only contain and reference one cid + // First, compare the cid retrieved from the pid refs file from the supplied cid + String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); + if (retrievedCid.equalsIgnoreCase(cid)) { + // The pid correctly references the cid, but the cid refs file is missing + // Create the file and verify tagging process + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "FileHashStore.tagObject - Object with cid: " + cid - + " has been updated and tagged successfully with pid: " + pid - ); + "FileHashStore.storeHashStoreRefsFiles - Pid refs file exists for pid: " + + pid + ", but cid refs file for: " + cid + + " is missing. Missing cid refs file created and tagging completed."); return; + } else { + // Check if the retrieved cid refs file exists and pid is referenced + Path retrievedAbsCidRefsPath = getHashStoreRefsPath( + retrievedCid, HashStoreIdTypes.cid.getName() + ); + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath + )) { + // This pid is accounted for and tagged as expected. + String errMsg = + "FileHashStore.storeHashStoreRefsFiles - pid refs file already exists for" + + " pid: " + pid + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } + // Orphaned pid refs file found, the retrieved cid refs file exists + // but doesn't contain the cid. Proceed to overwrite the pid refs file. + // There is no return statement, so we move out of this if block. } - - // Get pid and cid refs files + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Only update cid refs file if pid is not in the file + if (!isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, "add"); + } + // Get the pid refs file and verify tagging process File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); - // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); - File absPathCidRefsFile = absCidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws an exception if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "FileHashStore.tagObject - Object with cid: " + cid - + " has been tagged successfully with pid: " + pid + "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid + + " has been updated and tagged successfully with pid: " + pid ); - - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid - + " with cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } + return; } + + // Get pid and cid refs files + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + // Move refs files to permanent location + File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws an exception if there's an issue + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid + + " has been tagged successfully with pid: " + pid + ); } @Override From d890db6952b40ef5df3e86a80c9985873b070610 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 8 Jul 2024 17:12:27 -0700 Subject: [PATCH 345/553] Organize new 'storeHashStoreRefsFile' method in 'FileHashStore' class and update javadoc --- .../filehashstore/FileHashStore.java | 182 +++++++++--------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index eff5aaa1..23369411 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -604,97 +604,6 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } } - /** - * Create the pid and cid references files in HashStore to establish the relationship - * between a 'pid' and a 'cid' - * - * @param pid Persistent or authority-based identifier - * @param cid Content identifier - * @throws NoSuchAlgorithmException If there is an issue related to calculating hashes - * @throws IOException If there is an issue reading/writing a refs file - */ - protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgorithmException, - IOException { - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - - // Both files found, confirm that reference files are where they are expected to be - if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - // We throw an exception so the client is aware that everything is in place - String errMsg = "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid - + " already exists and is tagged with pid: " + pid; - logFileHashStore.error(errMsg); - throw new HashStoreRefsAlreadyExistException(errMsg); - - } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { - // If pid refs exists, it can only contain and reference one cid - // First, compare the cid retrieved from the pid refs file from the supplied cid - String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); - if (retrievedCid.equalsIgnoreCase(cid)) { - // The pid correctly references the cid, but the cid refs file is missing - // Create the file and verify tagging process - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "FileHashStore.storeHashStoreRefsFiles - Pid refs file exists for pid: " - + pid + ", but cid refs file for: " + cid - + " is missing. Missing cid refs file created and tagging completed."); - return; - } else { - // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = getHashStoreRefsPath( - retrievedCid, HashStoreIdTypes.cid.getName() - ); - if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath - )) { - // This pid is accounted for and tagged as expected. - String errMsg = - "FileHashStore.storeHashStoreRefsFiles - pid refs file already exists for" - + " pid: " + pid + ", and the associated cid refs file contains the " - + "pid. A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - } - // Orphaned pid refs file found, the retrieved cid refs file exists - // but doesn't contain the cid. Proceed to overwrite the pid refs file. - // There is no return statement, so we move out of this if block. - } - } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Only update cid refs file if pid is not in the file - if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, "add"); - } - // Get the pid refs file and verify tagging process - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); - File absPathPidRefsFile = absPidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid - + " has been updated and tagged successfully with pid: " + pid - ); - return; - } - - // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); - // Move refs files to permanent location - File absPathPidRefsFile = absPidRefsPath.toFile(); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws an exception if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid - + " has been tagged successfully with pid: " + pid - ); - } - @Override public void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, @@ -2023,6 +1932,97 @@ protected void deleteObjectByCid(String cid) } } + /** + * Create the pid refs file and create/update cid refs files in HashStore to establish + * the relationship between a 'pid' and a 'cid' + * + * @param pid Persistent or authority-based identifier + * @param cid Content identifier + * @throws NoSuchAlgorithmException If there is an issue related to calculating hashes + * @throws IOException If there is an issue reading/writing a refs file + */ + protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgorithmException, + IOException { + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + + // Both files found, confirm that reference files are where they are expected to be + if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + // We throw an exception so the client is aware that everything is in place + String errMsg = "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid + + " already exists and is tagged with pid: " + pid; + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { + // If pid refs exists, it can only contain and reference one cid + // First, compare the cid retrieved from the pid refs file from the supplied cid + String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); + if (retrievedCid.equalsIgnoreCase(cid)) { + // The pid correctly references the cid, but the cid refs file is missing + // Create the file and verify tagging process + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.storeHashStoreRefsFiles - Pid refs file exists for pid: " + + pid + ", but cid refs file for: " + cid + + " is missing. Missing cid refs file created and tagging completed."); + return; + } else { + // Check if the retrieved cid refs file exists and pid is referenced + Path retrievedAbsCidRefsPath = getHashStoreRefsPath( + retrievedCid, HashStoreIdTypes.cid.getName() + ); + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath + )) { + // This pid is accounted for and tagged as expected. + String errMsg = + "FileHashStore.storeHashStoreRefsFiles - pid refs file already exists for" + + " pid: " + pid + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } + // Orphaned pid refs file found, the retrieved cid refs file exists + // but doesn't contain the cid. Proceed to overwrite the pid refs file. + // There is no return statement, so we move out of this if block. + } + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Only update cid refs file if pid is not in the file + if (!isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, "add"); + } + // Get the pid refs file and verify tagging process + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); + File absPathPidRefsFile = absPidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid + + " has been updated and tagged successfully with pid: " + pid + ); + return; + } + + // Get pid and cid refs files + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + // Move refs files to permanent location + File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws an exception if there's an issue + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid + + " has been tagged successfully with pid: " + pid + ); + } + /** * Verifies that the reference files for the given pid and cid exist and contain the expected * values. From 90207751befa055db91a3d4762733e0a50c09477 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 8 Jul 2024 17:44:58 -0700 Subject: [PATCH 346/553] Revise junit tests for 'tagObject', add new junit tests for 'storeHashStoreRefsFiles' and add todo items --- .../FileHashStoreReferencesTest.java | 138 ++++++++++-------- 1 file changed, 78 insertions(+), 60 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 68259e7b..a203c59d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -77,13 +77,63 @@ public void initializeFileHashStore() { /** - * Check that tagObject creates reference files + * Check that tagObject does not throw exception when creating a fresh set + * of reference files */ @Test public void tagObject() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); + } + + /** + * Check that tagObject throws HashStoreRefsAlreadyExistException exception when pid and cid + * refs file already exists + */ + @Test + public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // This exception only needs to be re-raised + assertThrows(HashStoreRefsAlreadyExistException.class, () -> { + fileHashStore.tagObject(pid, cid); + }); + } + + // TODO: Add tagObject test to confirm 'PidRefsFileExistsException' is handled correctly + + // TODO: Add tagObject test to confirm that pid and cid refs file was deleted when tagObject + // encounters an exception + + // TODO: Add tagObject test to confirm that only the pid refs file is deleted and that the cid + // refs file is updated when a cid refs file is already being referenced + // (the pid is removed from the cid refs file) + + // TODO: Add tagObject tests for the handling of exceptions thrown by verifyHashStoreRefsFiles + + /** + * Check that the cid supplied is written into the file given + */ + @Test + public void writeRefsFile_content() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); + assertEquals(cidRead, cidToWrite); + } + + /** + * Check that storeHashStoreRefsFiles creates reference files + */ + @Test + public void storeHashStoreRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); @@ -93,13 +143,14 @@ public void tagObject() throws Exception { } /** - * Check that tagObject writes expected pid refs files + * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content + * is correct */ @Test - public void tagObject_pidRefsFileContent() throws Exception { + public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); assertTrue(Files.exists(pidRefsFilePath)); @@ -109,13 +160,14 @@ public void tagObject_pidRefsFileContent() throws Exception { } /** - * Check that tagObject writes expected cid refs files + * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content + * is correct */ @Test - public void tagObject_cidRefsFileContent() throws Exception { + public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertTrue(Files.exists(cidRefsFilePath)); @@ -125,17 +177,17 @@ public void tagObject_cidRefsFileContent() throws Exception { } /** - * Check that tagObject does not throw exception when pid and cid refs - * file already exists + * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException + * when refs files already exist */ @Test - public void tagObject_refsFileAlreadyExists() throws Exception { + public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); assertThrows(HashStoreRefsAlreadyExistException.class, () -> { - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); }); // Confirm that there is only 1 of each ref file @@ -147,30 +199,30 @@ public void tagObject_refsFileAlreadyExists() throws Exception { } /** - * Check tagObject throws exception when the supplied cid is different from what is - * found in the pid refs file, and the associated cid refs file from the pid refs file + * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what + * is found in the pid refs file, and the associated cid refs file from the pid refs file * is correctly tagged (everything is where it's expected to be) */ @Test - public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileFound() + public void storeHashStoreRefsFiles_PidRefsFileExistsException() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; String existingCid = "987654321fedcba"; - fileHashStore.tagObject(pid, existingCid); + fileHashStore.storeHashStoreRefsFiles(pid, existingCid); // This will throw an exception because the pid and cid refs file are in sync assertThrows(PidRefsFileExistsException.class, () -> { - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); }); } - /** - * Check tagObject overwrites an orphaned pid refs file. + * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it + * references does not exist (does not have a cid refs file) */ @Test - public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound() + public void storeHashStoreRefsFiles_pidRefsOrphanedFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; @@ -185,7 +237,7 @@ public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound File absPathPidRefsFile = absPidRefsPath.toFile(); fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); // There should only be 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); @@ -195,38 +247,17 @@ public void tagObject_pidRefsFileFound_differentCidRetrieved_cidRefsFileNotFound } /** - * Check that tagObject creates a missing cid refs file + * Check that storeHashStoreRefsFiles creates a pid refs file and updates an existing cid refs + * file */ @Test - public void tagObject_pidRefsFileFound_cidRefsFileNotFound() throws Exception { + public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - // Manually delete the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - Files.delete(cidRefsFilePath); - - fileHashStore.tagObject(pid, cid); - // Confirm that there is only 1 of each ref file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); - assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); - assertEquals(1, cidRefsFiles.length); - } - - - /** - * Check that tagObject creates a pid refs file and updates an existing cid refs file - */ - @Test - public void tagObject_pidRefsFileNotFound_cidRefsFileFound() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); String pidAdditional = "another.pid.2"; - fileHashStore.tagObject(pidAdditional, cid); + fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); // Confirm missing pid refs file has been created Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); @@ -247,19 +278,6 @@ public void tagObject_pidRefsFileNotFound_cidRefsFileFound() throws Exception { assertEquals(1, cidRefsFiles.length); } - - /** - * Check that the cid supplied is written into the file given - */ - @Test - public void writeRefsFile_content() throws Exception { - String cidToWrite = "test_cid_123"; - File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); - - String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); - assertEquals(cidRead, cidToWrite); - } - /** * Check that exception is thrown when incorrect cid in a pid refs file. */ From 6c727fafce037e258b5e3e794fa1d648e303976b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 15:50:36 -0700 Subject: [PATCH 347/553] Add new method 'unTagObject' --- .../filehashstore/FileHashStore.java | 204 +++++++++++++++++- 1 file changed, 203 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 23369411..c8ebf929 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1003,7 +1003,7 @@ public void deleteObject(String pid) String cid; List deleteList = new ArrayList<>(); - // Storing and deleting objects are synchronized together + // Storing, deleting and untagging objects are synchronized together // Duplicate store object requests for a pid are rejected, but deleting an object // will wait for a pid to be released if it's found to be in use before proceeding. synchronized (objectLockedIds) { @@ -2023,6 +2023,208 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo ); } + /** + * Untags a data object in HashStore by deleting the 'pid reference file' and removing the 'pid' + * from the 'cid reference file'. + * + * @param pid Persistent or authority-based identifier + * @param cid Content identifier of data object + */ + protected void unTagObject(String pid, String cid) throws Exception { + // Validate input parameters + FileHashStoreUtility.ensureNotNull(pid, "pid", "unTagObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "unTagObject"); + FileHashStoreUtility.ensureNotNull(cid, "cid", "unTagObject"); + FileHashStoreUtility.checkForEmptyString(cid, "cid", "unTagObject"); + + List deleteList = new ArrayList<>(); + + // Storing, deleting and untagging objects are synchronized together + // Duplicate store object requests for a pid are rejected, but deleting an object + // will wait for a pid to be released if it's found to be in use before proceeding. + synchronized (objectLockedIds) { + while (objectLockedIds.contains(pid)) { + try { + objectLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = "FileHashStore.unTagObject - Untag request for pid: " + pid + + " has been interrupted."; + logFileHashStore.warn(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.unTagObject - Synchronizing objectLockedIds for pid: " + pid); + objectLockedIds.add(pid); + } + + try { + // Before we begin untagging process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. + try { + Map objInfoMap = findObject(pid); + cid = objInfoMap.get("cid"); + + // If no exceptions are thrown, we proceed to synchronization based on the `cid` + // Multiple threads may access the cid reference file (which contains a list of + // `pid`s that reference a `cid`) and this needs to be coordinated. Otherwise, + // we will run into a `OverlappingFileLockException` + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.unTagObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.unTagObject - Synchronizing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cid); + referenceLockedCids.add(cid); + } + + try { + // Cid refs file + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + // Pid refs file + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + // Rename pid refs file to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Remove pid from cid refs file + updateRefsFile(pid, absCidRefsPath, "remove"); + // Delete the cid refs file **only** if the cid refs file is empty + if (Files.size(absCidRefsPath) == 0) { + // Rename empty cid refs file to prepare for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } else { + String warnMsg = + "FileHashStore.unTagObject - cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + + "deletion."; + logFileHashStore.warn(warnMsg); + } + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.unTagObject - Untagged pid: " + pid + " with cid: " + cid); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.unTagObject - Releasing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); + } + } + + } catch (OrphanPidRefsFileException oprfe) { + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file + + // Begin by renaming pid refs file for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = + "FileHashStore.unTagObject - Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + + } catch (OrphanRefsFilesException orfe) { + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + + // Get the cid from the pid refs file before renaming it for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + + // Since we must access the cid reference file, the `cid` must be synchronized + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cidRead)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.unTagObject - referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.unTagObject - Synchronizing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cidRead); + referenceLockedCids.add(cidRead); + } + + try { + // Rename pid refs file for deletion + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + // Remove the pid from the cid refs file + Path absCidRefsPath = + getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); + updateRefsFile(pid, absCidRefsPath, "remove"); + // Add the cid reference file to deleteList if it's now empty + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "FileHashStore.unTagObject - Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files and metadata."; + logFileHashStore.warn(warnMsg); + + } finally { + // Release lock + synchronized (referenceLockedCids) { + logFileHashStore.debug( + "FileHashStore.unTagObject - Releasing referenceLockedCids for " + + "pid: " + pid + " with cid: " + cidRead); + referenceLockedCids.remove(cidRead); + referenceLockedCids.notify(); + } + } + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. + + // Rename pid refs file for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = + "FileHashStore.unTagObject - Pid not found in expected cid refs file for" + + " pid: " + pid + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + } + } finally { + // Release lock on the pid + synchronized (objectLockedIds) { + logFileHashStore.debug( + "FileHashStore.unTagObject - Releasing objectLockedIds for pid: " + pid); + objectLockedIds.remove(pid); + objectLockedIds.notify(); + } + } + } + /** * Verifies that the reference files for the given pid and cid exist and contain the expected * values. From bfe2f27dbfbe5ea979702058442a3609c91a127d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:11:43 -0700 Subject: [PATCH 348/553] Revise comment for accuracy --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c8ebf929..8592f589 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2187,7 +2187,7 @@ protected void unTagObject(String pid, String cid) throws Exception { FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.unTagObject - Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted pid and cid ref files and metadata."; + + ". Deleted pid and cid ref files."; logFileHashStore.warn(warnMsg); } finally { From 5a69daf02654ddb01e63ce37a6d32708246a174b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:12:06 -0700 Subject: [PATCH 349/553] Add new junit test for 'unTagObject' and fix bug in existing 'tagObject' tests when checking for number of files --- .../FileHashStoreReferencesTest.java | 75 +++++++++++++++---- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index a203c59d..cc81f3ba 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -114,6 +114,27 @@ public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { // TODO: Add tagObject tests for the handling of exceptions thrown by verifyHashStoreRefsFiles + /** + * Check that unTagObject deletes reference files + */ + @Test + public void unTagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + fileHashStore.unTagObject(pid, cid); + + // Confirm refs files do not exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + assertFalse(Files.exists(absCidRefsPath)); + assertFalse(Files.exists(absPidRefsPath)); + } + /** * Check that the cid supplied is written into the file given */ @@ -135,11 +156,24 @@ public void storeHashStoreRefsFiles() throws Exception { String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); + // Confirm refs files exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + assertTrue(Files.exists(absCidRefsPath)); + assertTrue(Files.exists(absPidRefsPath)); + + // Confirm no additional files were created Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); - assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); - assertEquals(1, cidRefsFiles.length); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** @@ -192,10 +226,13 @@ public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); - assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); - assertEquals(1, cidRefsFiles.length); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** @@ -240,10 +277,13 @@ public void storeHashStoreRefsFiles_pidRefsOrphanedFile() fileHashStore.storeHashStoreRefsFiles(pid, cid); // There should only be 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); - assertEquals(1, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); - assertEquals(1, cidRefsFiles.length); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** @@ -272,10 +312,13 @@ public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { // There should be 2 pid refs file, and 1 cid refs file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - File[] pidRefsFiles = storePath.resolve("refs/pids").toFile().listFiles(); - assertEquals(2, pidRefsFiles.length); - File[] cidRefsFiles = storePath.resolve("refs/cids").toFile().listFiles(); - assertEquals(1, cidRefsFiles.length); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(2, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** From 259bf549829c0d6bc1c31c8880b82b82ab26c946 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:25:33 -0700 Subject: [PATCH 350/553] Add new junit tests for 'unTagObject' --- .../FileHashStoreReferencesTest.java | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index cc81f3ba..73df08b5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -135,6 +135,77 @@ public void unTagObject() throws Exception { assertFalse(Files.exists(absPidRefsPath)); } + /** + * Check that unTagObject deletes pid refs file for a cid that is referenced by + * multiple pids, and that the cid refs file is not deleted. + */ + @Test + public void unTagObject_cidWithMultiplePidReferences() throws Exception { + String pid = "dou.test.1"; + String pidTwo = "dou.test.2"; + String pidThree = "dou.test.3"; + String pidFour = "dou.test.4"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + fileHashStore.tagObject(pidTwo, cid); + fileHashStore.tagObject(pidThree, cid); + fileHashStore.tagObject(pidFour, cid); + + fileHashStore.unTagObject(pid, cid); + + // Confirm refs files do not exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + assertFalse(Files.exists(absPidRefsPath)); + assertTrue(Files.exists(absCidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(3, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that unTagObject deletes an orphaned pid refs file + */ + @Test + public void unTagObject_orphanPidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Delete cid refs file to create orphaned pid refs file + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Files.delete(absCidRefsPath); + assertFalse(Files.exists(absCidRefsPath)); + + fileHashStore.unTagObject(pid, cid); + + // Confirm pid refs is deleted + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + assertFalse(Files.exists(absPidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(0, pidRefsFiles.size()); + assertEquals(0, cidRefsFiles.size()); + } + /** * Check that the cid supplied is written into the file given */ From b06d8c62b49a31b79fae8d7dea66540fc141f1bd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:28:54 -0700 Subject: [PATCH 351/553] Revise 'findObject' to throw custom exception 'PidRefsFileNotFoundException' when it cannot find a pid refs file instead of generic 'FileNotFoundException' --- .../exceptions/PidRefsFileNotFoundException.java | 10 ++++++++++ .../dataone/hashstore/filehashstore/FileHashStore.java | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java new file mode 100644 index 00000000..0c116bfe --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java @@ -0,0 +1,10 @@ +package org.dataone.hashstore.exceptions; + +import java.io.FileNotFoundException; + +public class PidRefsFileNotFoundException extends FileNotFoundException { + public PidRefsFileNotFoundException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8592f589..8fd2eed6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -43,6 +43,7 @@ import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; /** @@ -748,7 +749,7 @@ public Map findObject(String pid) throws NoSuchAlgorithmExceptio String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid + ". Pid refs file does not exist at: " + absPidRefsPath; logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); + throw new PidRefsFileNotFoundException(errMsg); } } From 18d2e258e51d54484283ed3e17265836117a3cd3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:31:38 -0700 Subject: [PATCH 352/553] Add new junit test for 'unTagObject' and revise 'findObject' junit test to assert newly added custom exception --- .../filehashstore/FileHashStoreInterfaceTest.java | 3 ++- .../filehashstore/FileHashStoreReferencesTest.java | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 36188c65..4bef58d8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -39,6 +39,7 @@ import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -2124,7 +2125,7 @@ public void findObject_cidRefsFileMissingPid() throws Exception { @Test public void findObject_pidNotFound() { String pid = "dou.test.1"; - assertThrows(FileNotFoundException.class, () -> { + assertThrows(PidRefsFileNotFoundException.class, () -> { fileHashStore.findObject(pid); }); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 73df08b5..62b7d2e7 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -24,6 +24,7 @@ import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; @@ -206,6 +207,19 @@ public void unTagObject_orphanPidRefsFile() throws Exception { assertEquals(0, cidRefsFiles.size()); } + /** + * Check that unTagObject throws exception when a pid refs file does not exist + */ + @Test + public void unTagObject_missingPidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + assertThrows(PidRefsFileNotFoundException.class, () -> { + fileHashStore.unTagObject(pid, cid); + }); + } + /** * Check that the cid supplied is written into the file given */ From cbd494da89814e291b14678f2f6671ba2957f870 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:44:53 -0700 Subject: [PATCH 353/553] Add clean-up code to 'unTagObject' when a pid refs file does not exist, but pid is tagged in the cid refs file, and add new junit test --- .../filehashstore/FileHashStore.java | 11 ++++++ .../FileHashStoreReferencesTest.java | 36 ++++++++++++++++--- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8fd2eed6..126379ca 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2214,6 +2214,17 @@ protected void unTagObject(String pid, String cid) throws Exception { "FileHashStore.unTagObject - Pid not found in expected cid refs file for" + " pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); + } catch (PidRefsFileNotFoundException prfnfe) { + // If pid refs file is not found, check to see if it's in the `cid refs file` + // and attempt to remove it + Path absCidRefsPath = + getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, "remove"); + String errMsg = "FileHashStore.unTagObject - Pid refs file not found, " + + "removed pid found in cid refs file: " + absCidRefsPath; + logFileHashStore.warn(errMsg); + } } } finally { // Release lock on the pid diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 62b7d2e7..a8f6fba4 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -154,7 +154,7 @@ public void unTagObject_cidWithMultiplePidReferences() throws Exception { fileHashStore.unTagObject(pid, cid); - // Confirm refs files do not exist + // Confirm refs files state Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); Path absPidRefsPath = @@ -208,16 +208,42 @@ public void unTagObject_orphanPidRefsFile() throws Exception { } /** - * Check that unTagObject throws exception when a pid refs file does not exist + * Check that unTagObject does not throw exception when a pid refs file and cid refs file + * does not exist + */ + @Test + public void unTagObject_missingRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + fileHashStore.unTagObject(pid, cid); + } + + /** + * Check that unTagObject does not throw exception when a pid refs file and cid refs file + * does not exist */ @Test public void unTagObject_missingPidRefsFile() throws Exception { String pid = "dou.test.1"; + String pidTwo = "dou.test.2"; + String pidThree = "dou.test.3"; String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + fileHashStore.tagObject(pidTwo, cid); + fileHashStore.tagObject(pidThree, cid); - assertThrows(PidRefsFileNotFoundException.class, () -> { - fileHashStore.unTagObject(pid, cid); - }); + // Delete pid refs to create scenario + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Files.delete(absPidRefsPath); + assertFalse(Files.exists(absPidRefsPath)); + + fileHashStore.unTagObject(pid, cid); + + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + assertFalse(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); } /** From 30e1e09a3c3eb8285090352a0ab1e72f74341c5c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 16:52:36 -0700 Subject: [PATCH 354/553] Revise 'unTagObject' javadoc and comments --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 126379ca..10b6c4db 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2026,12 +2026,16 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo /** * Untags a data object in HashStore by deleting the 'pid reference file' and removing the 'pid' - * from the 'cid reference file'. + * from the 'cid reference file'. This method will never delete a data object. * * @param pid Persistent or authority-based identifier * @param cid Content identifier of data object + * @throws InterruptedException When there is a synchronization issue + * @throws NoSuchAlgorithmException When there is an algorithm used that is not supported + * @throws IOException When there is an issue deleting refs files */ - protected void unTagObject(String pid, String cid) throws Exception { + protected void unTagObject(String pid, String cid) throws InterruptedException, + NoSuchAlgorithmException, IOException { // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "unTagObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "unTagObject"); @@ -2215,8 +2219,8 @@ protected void unTagObject(String pid, String cid) throws Exception { + " pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); } catch (PidRefsFileNotFoundException prfnfe) { - // If pid refs file is not found, check to see if it's in the `cid refs file` - // and attempt to remove it + // `findObject` throws this exception if the pid refs file is not found + // Check to see if pid is in the `cid refs file`and attempt to remove it Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { From 5e2e7f1817e017df6ab0805998587806c4a75cca Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 9 Jul 2024 17:35:35 -0700 Subject: [PATCH 355/553] Add print statements to 'HashStoreRunnable' to assist with debugging inconsistent test failures --- .../java/org/dataone/hashstore/HashStoreRunnable.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 9fe924ea..971b404a 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -52,7 +52,8 @@ public void run() { hashstore.storeObject(objStream, pid, null, null, null, -1); } catch (Exception e) { String errMsg = - "HashStoreRunnable: UnexpectedError - storeObject: " + e.getCause(); + "HashStoreRunnable ~ UnexpectedError - storeObject: " + e.getCause(); + System.out.println(errMsg); logHashStoreRunnable.error(errMsg); throw new HashStoreServiceException(errMsg); } @@ -63,14 +64,16 @@ public void run() { hashstore.deleteObject(pid); } catch (Exception e) { String errMsg = - "HashStoreRunnable: UnexpectedError - deleteObject: " + e.getCause(); + "HashStoreRunnable ~ UnexpectedError - deleteObject: " + e.getCause(); + System.out.println(errMsg); logHashStoreRunnable.error(errMsg); throw new HashStoreServiceException(errMsg); } break; } } catch (HashStoreServiceException | IOException hse) { - logHashStoreRunnable.error("HashStoreServiceRequest - Error: " + hse.getMessage()); + logHashStoreRunnable.error( + "HashStoreServiceRequest ~ Unexpected Error: " + hse.getMessage()); } } } From 742eeacd940b3285dae86b5363cdc87dcf5f4bd7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 09:40:10 -0700 Subject: [PATCH 356/553] Add 'unTagObject' to 'tagObject' process --- .../hashstore/filehashstore/FileHashStore.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 10b6c4db..d3763e27 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -494,6 +494,7 @@ private ObjectMetadata syncPutObject( // Tag object String cid = objInfo.getCid(); tagObject(pid, cid); + logFileHashStore.info( "FileHashStore.syncPutObject - Object stored for pid: " + pid + ". Permanent address: " + getHashStoreDataObjectPath(pid) @@ -593,6 +594,18 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi try { storeHashStoreRefsFiles(pid, cid); + } catch (HashStoreRefsAlreadyExistException hsrfae) { + // This exception is thrown when the pid and cid are already tagged appropriately + String errMsg = + "FileHashStore.tagObject - HashStore refs files already exist for pid " + pid + + " and cid: " + cid; + throw new HashStoreRefsAlreadyExistException(errMsg); + + } catch (Exception e) { + // Revert the process for all other exceptions + unTagObject(pid, cid); + throw e; + } finally { // Release lock synchronized (referenceLockedCids) { From e96a51730dce6156a2c4785425c8428c10d9bd9e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 10:05:01 -0700 Subject: [PATCH 357/553] Add new junit tests for 'updateRefsFile' --- .../FileHashStoreReferencesTest.java | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index a8f6fba4..893e51f0 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -489,7 +489,7 @@ public void updateRefsFile_content() throws Exception { Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); String pidAdditional = "dou.test.2"; - fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "add"); + fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "add"); List lines = Files.readAllLines(cidRefsFilePath); boolean pidOriginal_foundInCidRefFiles = false; @@ -506,6 +506,64 @@ public void updateRefsFile_content() throws Exception { assertTrue(pidAdditional_foundInCidRefFiles); } + /** + * Confirm that updateRefsFile does not throw any exception if called to remove a value + * that is not found in a cid refs file. + */ + @Test + public void updateRefsFile_cidRefsPidNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "remove"); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + int pidsFound = 0; + for (String line : lines) { + pidsFound++; + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertTrue(pidOriginal_foundInCidRefFiles); + assertEquals(1, pidsFound); + } + + /** + * Confirm that updateRefsFile does not throw any exception if called to remove a value + * from a cid refs file that is empty + */ + @Test + public void updateRefsFile_cidRefsEmpty() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + int pidsFound = 0; + for (String line : lines) { + pidsFound++; + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertFalse(pidOriginal_foundInCidRefFiles); + assertEquals(0, pidsFound); + + // Confirm that no exception is thrown and that the cid refs still exists + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + assertTrue(Files.exists(cidRefsFilePath)); + } + /** * Check that deleteRefsFile deletes file */ From b6d56cc4806e26102bb3a1ecd62ab8817611ad3d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 10:44:23 -0700 Subject: [PATCH 358/553] Add new junit tests for 'tagObject' and fix bug where a pid refs is deleted when 'tagObject' is called to tag the pid that is already referencing another cid --- .../filehashstore/FileHashStore.java | 6 ++ .../FileHashStoreReferencesTest.java | 61 ++++++++++++++++--- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d3763e27..a8a61236 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -601,6 +601,12 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi " and cid: " + cid; throw new HashStoreRefsAlreadyExistException(errMsg); + } catch (PidRefsFileExistsException prfe) { + String errMsg = + "FileHashStore.tagObject - pid: " + pid + " already references another cid." + + " A pid can only reference one cid."; + throw new PidRefsFileExistsException(errMsg); + } catch (Exception e) { // Revert the process for all other exceptions unTagObject(pid, cid); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 893e51f0..61effb00 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -104,16 +104,63 @@ public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { }); } - // TODO: Add tagObject test to confirm 'PidRefsFileExistsException' is handled correctly + /** + * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' + * that is already referencing another 'cid' + */ + @Test + public void tagObject_PidRefsFileExistsException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); - // TODO: Add tagObject test to confirm that pid and cid refs file was deleted when tagObject - // encounters an exception + // This exception only needs to be re-raised + assertThrows(PidRefsFileExistsException.class, () -> { + fileHashStore.tagObject(pid, "another.cid"); + }); - // TODO: Add tagObject test to confirm that only the pid refs file is deleted and that the cid - // refs file is updated when a cid refs file is already being referenced - // (the pid is removed from the cid refs file) + // Confirm there are only 1 of each refs files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - // TODO: Add tagObject tests for the handling of exceptions thrown by verifyHashStoreRefsFiles + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that tagObject untags an object/does not create any orphan refs files + * when the process is interrupted + */ + @Test + public void tagObject_interruptAndUnTagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + Thread toInterrupt = new Thread(() -> { + try { + fileHashStore.tagObject(pid, cid); + } catch (IOException | NoSuchAlgorithmException | InterruptedException ioe) { + ioe.printStackTrace(); + } + }); + + toInterrupt.start(); + toInterrupt.interrupt(); + toInterrupt.join(); + + // Confirm there are no files refs files created + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(0, pidRefsFiles.size()); + assertEquals(0, cidRefsFiles.size()); + } /** * Check that unTagObject deletes reference files From 9eef991b5ee2d592ee53bbd3adc066694009ea79 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 12:03:40 -0700 Subject: [PATCH 359/553] Revise comments and add additional print statement to assist with debugging for 'deleteObject_1000Pids_1Obj_viaRunnable' junit test --- .../filehashstore/FileHashStoreInterfaceTest.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 4bef58d8..9c1ccc2c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1660,14 +1660,14 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { int numCores = runtime.availableProcessors(); ExecutorService executorService = Executors.newFixedThreadPool(numCores); - // Store 50 + // Store 1000 for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); HashStoreRunnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); executorService.execute(request); } - // Delete 50 + // Delete 1000 for (String pidAdjusted : pidModifiedList) { HashStoreRunnable request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); @@ -1679,8 +1679,12 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { Path storePath = Paths.get(fhsProperties.getProperty("storePath")); // Check that no objects exist - List objects = FileHashStoreUtility.getFilesFromDir(storePath.resolve("objects")); - assertEquals(0, objects.size()); + List objectPaths = FileHashStoreUtility.getFilesFromDir(storePath.resolve("objects")); + // To assist with debugging + for (Path path : objectPaths) { + System.out.println("HashStoreRunnableTest ~ Path found in Objects Directory: " + path); + } + assertEquals(0, objectPaths.size()); // Check that no refs files exist List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); From 355dd1b754458c7e3fd733519e249eda7a8dea73 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 13:37:00 -0700 Subject: [PATCH 360/553] Fix inaccurate argument string in 'verifyChecksumParameters' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a8a61236..4b484242 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1648,12 +1648,12 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor // First ensure algorithm is compatible and values are valid if they aren't null if (checksumAlgorithm != null) { FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "checksumAlgorithm", "putObject" - ); + checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters"); validateAlgorithm(checksumAlgorithm); } if (checksum != null) { - FileHashStoreUtility.checkForEmptyString(checksum, "checksum", "putObject"); + FileHashStoreUtility.checkForEmptyString( + checksum, "checksum", "verifyChecksumParameters"); } // If checksum is supplied, checksumAlgorithm cannot be empty if (checksum != null && !checksum.trim().isEmpty()) { From 81cb5ed78a0f0adbdf5f8534ddf6eb20611f2084 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 14:40:03 -0700 Subject: [PATCH 361/553] Cleanup 'FileHashStoreReferencesTest' class --- .../FileHashStoreReferencesTest.java | 143 +++++++++--------- 1 file changed, 68 insertions(+), 75 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 61effb00..697951fb 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -24,7 +24,6 @@ import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; -import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; import org.dataone.hashstore.testdata.TestDataHarness; @@ -37,7 +36,6 @@ */ public class FileHashStoreReferencesTest { private FileHashStore fileHashStore; - private Path rootDirectory; private Properties fhsProperties; private static final TestDataHarness testData = new TestDataHarness(); @@ -46,7 +44,7 @@ public class FileHashStoreReferencesTest { */ @BeforeEach public void initializeFileHashStore() { - rootDirectory = tempFolder.resolve("hashstore"); + Path rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -78,7 +76,7 @@ public void initializeFileHashStore() { /** - * Check that tagObject does not throw exception when creating a fresh set + * Check tagObject does not throw exception when creating a fresh set * of reference files */ @Test @@ -89,37 +87,43 @@ public void tagObject() throws Exception { } /** - * Check that tagObject throws HashStoreRefsAlreadyExistException exception when pid and cid - * refs file already exists + * Check that tagObject successfully tags a cid refs file that already exists */ @Test - public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { + public void tagObject_cidRefsAlreadyExists() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - // This exception only needs to be re-raised - assertThrows(HashStoreRefsAlreadyExistException.class, () -> { - fileHashStore.tagObject(pid, cid); - }); + String pidTwo = "dou.test.2"; + fileHashStore.tagObject(pidTwo, cid); + + // Confirm number of ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(2, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** - * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' - * that is already referencing another 'cid' + * Check that tagObject throws HashStoreRefsAlreadyExistException exception when pid and cid + * refs file already exists (duplicate tag request) */ @Test - public void tagObject_PidRefsFileExistsException() throws Exception { + public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); // This exception only needs to be re-raised - assertThrows(PidRefsFileExistsException.class, () -> { - fileHashStore.tagObject(pid, "another.cid"); - }); + assertThrows( + HashStoreRefsAlreadyExistException.class, () -> fileHashStore.tagObject(pid, cid)); - // Confirm there are only 1 of each refs files + // Confirm there are only 1 of each ref files Path storePath = Paths.get(fhsProperties.getProperty("storePath")); List pidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); @@ -131,35 +135,28 @@ public void tagObject_PidRefsFileExistsException() throws Exception { } /** - * Check that tagObject untags an object/does not create any orphan refs files - * when the process is interrupted + * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' + * that is already referencing another 'cid' */ @Test - public void tagObject_interruptAndUnTagObject() throws Exception { + public void tagObject_PidRefsFileExistsException() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); - Thread toInterrupt = new Thread(() -> { - try { - fileHashStore.tagObject(pid, cid); - } catch (IOException | NoSuchAlgorithmException | InterruptedException ioe) { - ioe.printStackTrace(); - } - }); - - toInterrupt.start(); - toInterrupt.interrupt(); - toInterrupt.join(); + // This exception only needs to be re-raised + assertThrows( + PidRefsFileExistsException.class, () -> fileHashStore.tagObject(pid, "another.cid")); - // Confirm there are no files refs files created + // Confirm there are only 1 of each ref files Path storePath = Paths.get(fhsProperties.getProperty("storePath")); List pidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); List cidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - assertEquals(0, pidRefsFiles.size()); - assertEquals(0, cidRefsFiles.size()); + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** @@ -178,7 +175,6 @@ public void unTagObject() throws Exception { fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); Path absPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - assertFalse(Files.exists(absCidRefsPath)); assertFalse(Files.exists(absPidRefsPath)); } @@ -222,7 +218,7 @@ public void unTagObject_cidWithMultiplePidReferences() throws Exception { } /** - * Check that unTagObject deletes an orphaned pid refs file + * Check that unTagObject deletes an orphaned pid refs file (there is no cid refs file) */ @Test public void unTagObject_orphanPidRefsFile() throws Exception { @@ -259,7 +255,7 @@ public void unTagObject_orphanPidRefsFile() throws Exception { * does not exist */ @Test - public void unTagObject_missingRefsFile() throws Exception { + public void unTagObject_missingRefsFiles() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; @@ -378,9 +374,9 @@ public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); - assertThrows(HashStoreRefsAlreadyExistException.class, () -> { - fileHashStore.storeHashStoreRefsFiles(pid, cid); - }); + assertThrows( + HashStoreRefsAlreadyExistException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -407,9 +403,9 @@ public void storeHashStoreRefsFiles_PidRefsFileExistsException() fileHashStore.storeHashStoreRefsFiles(pid, existingCid); // This will throw an exception because the pid and cid refs file are in sync - assertThrows(PidRefsFileExistsException.class, () -> { - fileHashStore.storeHashStoreRefsFiles(pid, cid); - }); + assertThrows( + PidRefsFileExistsException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); } /** @@ -496,9 +492,10 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { // Get path of the cid refs file Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertThrows(CidNotFoundInPidRefsFileException.class, () -> { - fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); - }); + assertThrows( + CidNotFoundInPidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, + cidRefsFilePath)); } /** @@ -518,9 +515,10 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception // Get path of the pid refs file Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - assertThrows(PidNotFoundInCidRefsFileException.class, () -> { - fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); - }); + assertThrows( + PidNotFoundInCidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsTmpFilePath)); } /** @@ -759,11 +757,10 @@ public void verifyObject_unsupportedAlgo() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); dataStream.close(); - assertThrows(UnsupportedHashAlgorithmException.class, () -> { - fileHashStore.verifyObject( - objInfo, "ValueNotRelevant", "BLAKE2S", 1000, false - ); - }); + assertThrows( + UnsupportedHashAlgorithmException.class, + () -> fileHashStore.verifyObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000, + false)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -796,11 +793,10 @@ public void verifyObject_mismatchedSize() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; - assertThrows(NonMatchingObjSizeException.class, () -> { - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false - ); - }); + assertThrows( + NonMatchingObjSizeException.class, + () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize, false)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -833,11 +829,10 @@ public void verifyObject_mismatchedChecksum() throws Exception { String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertThrows(NonMatchingChecksumException.class, () -> { - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, false - ); - }); + assertThrows( + NonMatchingChecksumException.class, + () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize, false)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -870,11 +865,11 @@ public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Except String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; - assertThrows(NonMatchingObjSizeException.class, () -> { - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true - ); - }); + assertThrows( + NonMatchingObjSizeException.class, + () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize, true)); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -907,11 +902,9 @@ public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Ex String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertThrows(NonMatchingChecksumException.class, () -> { - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true - ); - }); + assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true + )); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); From 60127a4af0c77803039d5c34a95b5df88e47940d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 15:06:00 -0700 Subject: [PATCH 362/553] Remove 'findObject' from 'HashStore' interface and move javadoc to 'FileHashStore' --- .../java/org/dataone/hashstore/HashStore.java | 21 ------------------ .../filehashstore/FileHashStore.java | 22 +++++++++++++++++-- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 851636d3..1276e366 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -131,27 +131,6 @@ public void verifyObject( UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, IOException; - /** - * Checks whether an object referenced by a pid exists and returns a map containing the - * absolute path to the object, pid refs file, cid refs file and sysmeta document. - * - * @param pid Authority-based identifier - * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, - * pid_refs_path, sysmeta_path - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs - * file's absolute address is not valid - * @throws IOException Unable to read from a pid refs file or pid refs - * file does not exist - * @throws OrphanRefsFilesException pid and cid refs file found, but object does - * not exist - * @throws OrphanPidRefsFileException When pid refs file exists and the cid found - * inside does not exist. - * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the - * expected pid is not found in the cid refs file. - */ - public Map findObject(String pid) throws NoSuchAlgorithmException, IOException, - OrphanPidRefsFileException, PidNotFoundInCidRefsFileException; - /** * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a * persistent identifier (`pid`) and metadata format (`formatId`). All metadata documents diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4b484242..b93e50ba 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -701,8 +701,26 @@ public void verifyObject( logFileHashStore.info(infoMsg); } - @Override - public Map findObject(String pid) throws NoSuchAlgorithmException, IOException, + /** + * Checks whether an object referenced by a pid exists and returns a map containing the + * absolute path to the object, pid refs file, cid refs file and sysmeta document. + * + * @param pid Authority-based identifier + * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, + * pid_refs_path, sysmeta_path + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs + * file's absolute address is not valid + * @throws IOException Unable to read from a pid refs file or pid refs + * file does not exist + * @throws OrphanRefsFilesException pid and cid refs file found, but object does + * not exist + * @throws OrphanPidRefsFileException When pid refs file exists and the cid found + * inside does not exist. + * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the + * expected pid is not found in the cid refs file. + */ + protected Map findObject(String pid) throws NoSuchAlgorithmException, + IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); From 08e7c7e09cf7dbd2ae2d76e176762d45238941b0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 15:20:32 -0700 Subject: [PATCH 363/553] Remove 'findObject' from 'HashStoreClient' and update README.md --- README.md | 148 ++++++++++++------ .../dataone/hashstore/HashStoreClient.java | 25 --- .../hashstore/HashStoreClientTest.java | 64 -------- 3 files changed, 103 insertions(+), 134 deletions(-) diff --git a/README.md b/README.md index 6036e7d4..a29c418d 100644 --- a/README.md +++ b/README.md @@ -7,23 +7,41 @@ - Contact us: support@dataone.org - [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) -HashStore is a server-side java library that implements an object storage file system for storing and accessing data and metadata for DataONE services. The package is used in DataONE system components that need direct, filesystem-based access to data objects, their system metadata, and extended metadata about the objects. This package is a core component of the [DataONE federation](https://dataone.org), and supports large-scale object storage for a variety of repositories, including the [KNB Data Repository](http://knb.ecoinformatics.org), the [NSF Arctic Data Center](https://arcticdata.io/catalog/), the [DataONE search service](https://search.dataone.org), and other repositories. - -DataONE in general, and HashStore in particular, are open source, community projects. We [welcome contributions](https://github.com/DataONEorg/hashstore-java/blob/main/CONTRIBUTING.md) in many forms, including code, graphics, documentation, bug reports, testing, etc. Use the [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) to discuss these contributions with us. +HashStore is a server-side java library that implements an object storage file system for storing +and accessing data and metadata for DataONE services. The package is used in DataONE system +components that need direct, filesystem-based access to data objects, their system metadata, and +extended metadata about the objects. This package is a core component of +the [DataONE federation](https://dataone.org), and supports large-scale object storage for a variety +of repositories, including the [KNB Data Repository](http://knb.ecoinformatics.org), +the [NSF Arctic Data Center](https://arcticdata.io/catalog/), +the [DataONE search service](https://search.dataone.org), and other repositories. + +DataONE in general, and HashStore in particular, are open source, community projects. +We [welcome contributions](https://github.com/DataONEorg/hashstore-java/blob/main/CONTRIBUTING.md) +in many forms, including code, graphics, documentation, bug reports, testing, etc. Use +the [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) to discuss these +contributions with us. ## Documentation -Documentation is a work in progress, and can be found on the [Metacat repository](https://github.com/NCEAS/metacat/blob/feature-1436-storage-and-indexing/docs/user/metacat/source/storage-subsystem.rst#physical-file-layout) as part of the storage redesign planning. Future updates will include documentation here as the package matures. +Documentation is a work in progress, and can be found on +the [Metacat repository](https://github.com/NCEAS/metacat/blob/feature-1436-storage-and-indexing/docs/user/metacat/source/storage-subsystem.rst#physical-file-layout) +as part of the storage redesign planning. Future updates will include documentation here as the +package matures. ## HashStore Overview -HashStore is an object storage system that provides persistent file-based storage using content hashes to de-duplicate data. The system stores both objects, references (refs) and metadata in its respective directories and utilizes an identifier-based API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. +HashStore is an object storage system that provides persistent file-based storage using content +hashes to de-duplicate data. The system stores both objects, references (refs) and metadata in its +respective directories and utilizes an identifier-based API for interacting with the store. +HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure +the expected usage of HashStore. ###### Public API Methods + - storeObject - verifyObject - tagObject -- findObject - storeMetadata - retrieveObject - retrieveMetadata @@ -33,10 +51,11 @@ HashStore is an object storage system that provides persistent file-based storag For details, please see the HashStore interface (HashStore.java) - ###### How do I create a HashStore? -To create or interact with a HashStore, instantiate a HashStore object with the following set of properties: +To create or interact with a HashStore, instantiate a HashStore object with the following set of +properties: + - storePath - storeDepth - storeWidth @@ -64,12 +83,23 @@ hashStore.storeObject(stream, pid) // ... ``` - ###### Working with objects (store, retrieve, delete) -In HashStore, objects are first saved as temporary files while their content identifiers are calculated. Once the default hash algorithm list and their hashes are generated, objects are stored in their permanent location using the store's algorithm's corresponding hash value, the store depth and the store width. Lastly, reference files are created for the object so that they can be found and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored once and only once. +In HashStore, objects are first saved as temporary files while their content identifiers are +calculated. Once the default hash algorithm list and their hashes are generated, objects are stored +in their permanent location using the store's algorithm's corresponding hash value, the store depth +and the store width. Lastly, reference files are created for the object so that they can be found +and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored +once and only once. + +By calling the various interface methods for `storeObject`, the calling app/client can validate, +store and tag an object simultaneously if the relevant data is available. In the absence of an +identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. +The client is then expected to call `verifyObject` when the relevant metadata is available to +confirm that the object has been stored as expected. And to finalize the process (to make the object +discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an +object: -By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: ``` // All-in-one process which stores, validates and tags an object objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) @@ -84,51 +114,79 @@ tagObject(pid, cid) ``` **How do I retrieve an object if I have the pid?** -- To retrieve an object, call the Public API method `retrieveObject` which opens a stream to the object if it exists. -**How do I find an object or check that it exists if I have the pid?** -- To check if an object exists, call the Public API method `findObject` which will return the content identifier (cid) of the object if it exists. -- If desired, this cid can then be used to locate the object on disk by following HashStore's store configuration. +- To retrieve an object, call the Public API method `retrieveObject` which opens a stream to the + object if it exists. **How do I delete an object if I have the pid?** -- To delete an object, all its associated reference files and its metadata, call the Public API method `deleteObject()` with `idType` 'pid'. If an `idType` is not given (ex. calling `deleteObject(String pid)`), the `idType` will be assumed to be a 'pid' -- To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object if it is not referenced by any pids. -- Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. +- To delete an object, all its associated reference files and its metadata, call the Public API + method `deleteObject()` with `idType` 'pid'. If an `idType` is not given (ex. + calling `deleteObject(String pid)`), the `idType` will be assumed to be a 'pid' +- To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object + if it is not referenced by any pids. +- Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so + that the shared reference files are not unintentionally modified concurrently. An object that is + in the process of being deleted should not be tagged, and vice versa. These calls have been + implemented to occur sequentially to improve clarity in the event of an unexpected conflict or + issue. ###### Working with metadata (store, retrieve, delete) -HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata documents related to a 'pid' are stored in a directory determined by calculating the hash of the pid (based on the store's algorithm). Each specific metadata document is then stored by calculating the hash of its associated `pid+formatId`. By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). +HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata +documents related to a 'pid' are stored in a directory determined by calculating the hash of the +pid (based on the store's algorithm). Each specific metadata document is then stored by calculating +the hash of its associated `pid+formatId`. By default, calling `storeMetadata` will use HashStore's +default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to +store multiple metadata files about an object, the client app is expected to provide a 'formatId' +that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). **How do I retrieve a metadata file?** -- To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. -- If there are multiple metadata objects, a 'formatId' must be specified when calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) + +- To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to + the metadata file that's been stored with the default metadata namespace if it exists. +- If there are multiple metadata objects, a 'formatId' must be specified when + calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) **How do I delete a metadata file?** -- Like `retrieveMetadata`, call the Public API method `deleteMetadata(String pid, String formatId)` which will delete the metadata object associated with the given pid. -- To delete all metadata objects related to a given 'pid', call `deleteMetadata(String pid)` +- Like `retrieveMetadata`, call the Public API method `deleteMetadata(String pid, String formatId)` + which will delete the metadata object associated with the given pid. +- To delete all metadata objects related to a given 'pid', call `deleteMetadata(String pid)` ###### What are HashStore reference files? -HashStore assumes that every object to store has a respective identifier. This identifier is then used when storing, retrieving and deleting an object. In order to facilitate this process, we create two types of reference files: -- pid (persistent identifier) reference files +HashStore assumes that every object to store has a respective identifier. This identifier is then +used when storing, retrieving and deleting an object. In order to facilitate this process, we create +two types of reference files: + +- pid (persistent identifier) reference files - cid (content identifier) reference files -These reference files are implemented in HashStore underneath the hood with no expectation for modification from the calling app/client. The one and only exception to this process is when the calling client/app does not have an identifier, and solely stores an objects raw bytes in HashStore (calling `storeObject(InputStream)`). +These reference files are implemented in HashStore underneath the hood with no expectation for +modification from the calling app/client. The one and only exception to this process is when the +calling client/app does not have an identifier, and solely stores an objects raw bytes in +HashStore (calling `storeObject(InputStream)`). **'pid' Reference Files** + - Pid (persistent identifier) reference files are created when storing an object with an identifier. - Pid reference files are located in HashStores '/refs/pid' directory -- If an identifier is not available at the time of storing an object, the calling app/client must create this association between a pid and the object it represents by calling `tagObject` separately. -- Each pid reference file contains a string that represents the content identifier of the object it references -- Like how objects are stored once and only once, there is also only one pid reference file for each object. +- If an identifier is not available at the time of storing an object, the calling app/client must + create this association between a pid and the object it represents by calling `tagObject` + separately. +- Each pid reference file contains a string that represents the content identifier of the object it + references +- Like how objects are stored once and only once, there is also only one pid reference file for each + object. **'cid' Reference Files** -- Cid (content identifier) reference files are created at the same time as pid reference files when storing an object with an identifier. -- Cid reference files are located in HashStore's '/refs/cid' directory -- A cid reference file is a list of all the pids that reference a cid, delimited by a new line ("\n") character +- Cid (content identifier) reference files are created at the same time as pid reference files when + storing an object with an identifier. +- Cid reference files are located in HashStore's '/refs/cid' directory +- A cid reference file is a list of all the pids that reference a cid, delimited by a new line (" + \n") character ###### What does HashStore look like? @@ -168,15 +226,15 @@ These reference files are implemented in HashStore underneath the hood with no e └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e ``` - -## Development build +## Development Build HashStore is a Java package, and built using the [Maven](https://maven.apache.org/) build tool. To install `HashStore-java` locally, install Java and Maven on your local machine, and then install or build the package with `mvn install` or `mvn package`, respectively. -We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). +We also maintain a +parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). ## HashStore HashStoreClient Usage @@ -190,34 +248,31 @@ $ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.Hash # Step 2: Determine where your hashstore should live (ex. `/var/hashstore`) ## Create a HashStore (long option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=https://ns.dataone.org/service/types/v2.0#SystemMetadata ## Create a HashStore (short option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp https://ns.dataone.org/service/types/v2.0#SystemMetadata # Get the checksum of a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 -# Find an object in HashStore (returns the content identifier, path to the obj, path to refs file and sysmeta path) -$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -findobject -pid testpid1 - # Store a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id https://ns.dataone.org/service/types/v2.0#SystemMetadata # Retrieve a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id https://ns.dataone.org/service/types/v2.0#SystemMetadata # Delete a data object $ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id https://ns.dataone.org/service/types/v2.0#SystemMetadata ``` ## License @@ -243,9 +298,12 @@ limitations under the License. Work on this package was supported by: - DataONE Network -- Arctic Data Center: NSF-PLR grant #2042102 to M. B. Jones, A. Budden, M. Schildhauer, and J. Dozier +- Arctic Data Center: NSF-PLR grant #2042102 to M. B. Jones, A. Budden, M. Schildhauer, and J. + Dozier -Additional support was provided for collaboration by the National Center for Ecological Analysis and Synthesis, a Center funded by the University of California, Santa Barbara, and the State of California. +Additional support was provided for collaboration by the National Center for Ecological Analysis and +Synthesis, a Center funded by the University of California, Santa Barbara, and the State of +California. [![DataONE_footer](https://user-images.githubusercontent.com/6643222/162324180-b5cf0f5f-ae7a-4ca6-87c3-9733a2590634.png)](https://dataone.org) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index a89f60fc..d4b15c5a 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -141,27 +141,6 @@ public static void main(String[] args) throws Exception { String hexDigest = hashStore.getHexDigest(pid, algo); System.out.println(hexDigest); - } else if (cmd.hasOption("findobject")) { - String pid = cmd.getOptionValue("pid"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - - Map objInfoMap = hashStore.findObject(pid); - String cid = objInfoMap.get("cid"); - String cidPath = objInfoMap.get("cid_object_path"); - String cidRefsPath = objInfoMap.get("cid_refs_path"); - String pidRefsPath = objInfoMap.get("pid_refs_path"); - String sysmetaPath = objInfoMap.get("sysmeta_path"); - System.out.println("Content Identifier:"); - System.out.println(cid); - System.out.println("Object Path:"); - System.out.println(cidPath); - System.out.println("Cid Reference File Path:"); - System.out.println(cidRefsPath); - System.out.println("Pid Reference File Path:"); - System.out.println(pidRefsPath); - System.out.println("Sysmeta Path:"); - System.out.println(sysmetaPath); - } else if (cmd.hasOption("storeobject")) { System.out.println("Storing object"); String pid = cmd.getOptionValue("pid"); @@ -296,10 +275,6 @@ private static Options addHashStoreClientOptions() { "getchecksum", "client_getchecksum", false, "Flag to get the hex digest of a data object in a HashStore." ); - options.addOption( - "findobject", "client_findobject", false, - "Flag to get the hex digest of a data object in a HashStore." - ); options.addOption( "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore." ); diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index f272932b..014739d0 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -467,68 +467,4 @@ public void client_getHexDigest() throws Exception { assertEquals(testDataChecksum, pidStdOut.trim()); } } - - /** - * Test hashStore client returns the content identifier (cid) of an object - */ - @Test - public void client_findObject() throws Exception { - for (String pid : testData.pidList) { - // Redirect stdout to capture output - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream); - PrintStream old = System.out; - System.setOut(ps); - - // Store object - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - // Store metadata - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - hashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); - - // Call client - String optFindObject = "-findobject"; - String optStore = "-store"; - String optStorePath = hsProperties.getProperty("storePath"); - String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optFindObject, optStore, optStorePath, optPid, optPidValue}; - HashStoreClient.main(args); - - String contentIdentifier = testData.pidData.get(pid).get("sha256"); - Path absObjPath = getObjectAbsPath(testData.pidData.get(pid).get("sha256"), "object"); - Path sysMetaPath = getObjectAbsPath(pid, "metadata"); - String storeAlgo = hsProperties.getProperty("storeAlgorithm").toLowerCase().replace( - "-", ""); - Path cidRefsPath = getObjectAbsPath( - testData.pidData.get(pid).get(storeAlgo), "cid" - ); - Path pidRefsPath = getObjectAbsPath( - pid, "pid" - ); - - String expectedOutPutPt1 = "Content Identifier:\n" + contentIdentifier + "\n"; - String expectedOutPutPt2 = "Object Path:\n" + absObjPath.toString() + "\n"; - String expectedOutPutPt3 = "Cid Reference File Path:\n" + cidRefsPath + "\n"; - String expectedOutPutPt4 = "Pid Reference File Path:\n" + pidRefsPath + "\n"; - String expectedOutPutPt5 = "Sysmeta Path:\n" + sysMetaPath; - String expectedOutPutFull = - expectedOutPutPt1 + expectedOutPutPt2 + expectedOutPutPt3 + expectedOutPutPt4 + expectedOutPutPt5; - - - // Put things back - System.out.flush(); - System.setOut(old); - - // Confirm correct content identifier has been saved - String pidStdOut = outputStream.toString(); - assertEquals(expectedOutPutFull, pidStdOut.trim()); - } - } } From bb47568b8b64759f67e4c1a5533e797f48a5d435 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 15:49:07 -0700 Subject: [PATCH 364/553] Cleanup test classes for ide warnings and re-organize junit tests --- .../hashstore/HashStoreClientTest.java | 21 +- .../dataone/hashstore/HashStoreRunnable.java | 7 +- .../org/dataone/hashstore/HashStoreTest.java | 15 +- .../FileHashStoreInterfaceTest.java | 571 +++++++++++------- .../FileHashStoreProtectedTest.java | 208 ++++++- .../FileHashStoreReferencesTest.java | 561 ++++------------- 6 files changed, 690 insertions(+), 693 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 014739d0..a4bee7aa 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -184,9 +184,8 @@ public void client_storeObjects() throws Exception { String optPath = "-path"; String optObjectPath = testDataFile.toString(); String optPid = "-pid"; - String optPidValue = pid; String[] args = {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, - optPidValue}; + pid}; HashStoreClient.main(args); // Confirm object was stored @@ -225,11 +224,10 @@ public void client_storeMetadata() throws Exception { String optPath = "-path"; String optObjectPath = testDataFile.toString(); String optPid = "-pid"; - String optPidValue = pid; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); String[] args = {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, - optPid, optPidValue, optFormatId, optFormatIdValue}; + optPid, pid, optFormatId, optFormatIdValue}; HashStoreClient.main(args); // Confirm metadata was stored @@ -286,8 +284,7 @@ public void client_retrieveObjects() throws Exception { String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optRetrieveObject, optStore, optStorePath, optPid, optPidValue}; + String[] args = {optRetrieveObject, optStore, optStorePath, optPid, pid}; HashStoreClient.main(args); // Put things back @@ -323,10 +320,9 @@ public void client_retrieveMetadata() throws Exception { String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, optPidValue, + String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, pid, optFormatId, optFormatIdValue}; HashStoreClient.main(args); @@ -363,8 +359,7 @@ public void client_deleteObjects() throws Exception { String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optDeleteObject, optStore, optStorePath, optPid, optPidValue}; + String[] args = {optDeleteObject, optStore, optStorePath, optPid, pid}; HashStoreClient.main(args); // Confirm object was deleted @@ -404,10 +399,9 @@ public void client_deleteMetadata() throws Exception { String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, optPidValue, + String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, pid, optFormatId, optFormatIdValue}; HashStoreClient.main(args); @@ -448,10 +442,9 @@ public void client_getHexDigest() throws Exception { String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; String optAlgo = "-algo"; String optAlgoValue = "SHA-256"; - String[] args = {optGetChecksum, optStore, optStorePath, optPid, optPidValue, optAlgo, + String[] args = {optGetChecksum, optStore, optStorePath, optPid, pid, optAlgo, optAlgoValue}; HashStoreClient.main(args); diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 971b404a..20f04ac4 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -3,7 +3,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.exceptions.HashStoreServiceException; -import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import java.io.IOException; @@ -18,9 +17,9 @@ public class HashStoreRunnable implements Runnable { private static final Log logHashStoreRunnable = LogFactory.getLog(HashStoreRunnable.class); public static final int storeObject = 1; public static final int deleteObject = 2; - private HashStore hashstore; - private int publicAPIMethod; - private String pid; + private final HashStore hashstore; + private final int publicAPIMethod; + private final String pid; private InputStream objStream; public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 58cdb68a..92607ab0 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -157,9 +157,8 @@ public void getHashStore_objFolderExists() throws Exception { "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); - assertThrows(HashStoreFactoryException.class, () -> { - hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - }); + assertThrows(HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); } /** @@ -183,9 +182,8 @@ public void getHashStore_metadataFolderExists() throws Exception { "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); - assertThrows(HashStoreFactoryException.class, () -> { - hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - }); + assertThrows(HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); } /** @@ -209,8 +207,7 @@ public void getHashStore_refsFolderExists() throws Exception { "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); - assertThrows(HashStoreFactoryException.class, () -> { - hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); - }); + assertThrows(HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 9c1ccc2c..730ed88f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -36,10 +36,10 @@ import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; -import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; -import org.dataone.hashstore.exceptions.OrphanRefsFilesException; -import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -48,8 +48,6 @@ /** * Test class for FileHashStore HashStore Interface methods. - * - * Note: `tagObject` & `verifyObject` tests can be found in the `FileHashStoreReferences` class */ public class FileHashStoreInterfaceTest { private FileHashStore fileHashStore; @@ -215,7 +213,7 @@ public void storeObject_emptyPid() { * Check that store object throws exception when pid contains new line character */ @Test - public void storeObject_pidWithNewLine() throws Exception { + public void storeObject_pidWithNewLine() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -232,7 +230,7 @@ public void storeObject_pidWithNewLine() throws Exception { * Check that store object throws exception when pid contains tab character */ @Test - public void storeObject_pidWithTab() throws Exception { + public void storeObject_pidWithTab() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); @@ -282,9 +280,7 @@ public void storeObject_overloadInputStreamOnly() throws Exception { assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.findObject(pid); - }); + assertThrows(FileNotFoundException.class, () -> fileHashStore.findObject(pid)); Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); assertFalse(Files.exists(cidRefsFilePath)); @@ -472,13 +468,12 @@ public void storeObject_duplicate() throws Exception { /** * Test that storeObject successfully stores a 1GB file - * * Note 1: a 4GB successfully stored in approximately 1m30s * Note 2: Successfully stores 250GB file confirmed from knbvm */ @Test public void storeObject_largeSparseFile() throws Exception { - long fileSize = 1L * 1024L * 1024L * 1024L; // 1GB + long fileSize = 1024L * 1024L * 1024L; // 1GB // Get tmp directory to initially store test file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); Path testFilePath = storePath.resolve("random_file.bin"); @@ -511,7 +506,7 @@ public void storeObject_largeSparseFile() throws Exception { */ @Test public void storeObject_interruptProcess() throws Exception { - long fileSize = 1L * 1024L * 1024L * 1024L; // 1GB + long fileSize = 1024L * 1024L * 1024L; // 1GB // Get tmp directory to initially store test file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); Path testFilePath = storePath.resolve("random_file.bin"); @@ -737,6 +732,333 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { assertEquals(50, pidRefFiles.size()); } + /** + * Check tagObject does not throw exception when creating a fresh set of reference files + */ + @Test + public void tagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + } + + /** + * Check that tagObject successfully tags a cid refs file that already exists + */ + @Test + public void tagObject_cidRefsAlreadyExists() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String pidTwo = "dou.test.2"; + fileHashStore.tagObject(pidTwo, cid); + + // Confirm number of ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(2, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that tagObject throws HashStoreRefsAlreadyExistException exception when pid and cid + * refs file already exists (duplicate tag request) + */ + @Test + public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // This exception only needs to be re-raised + assertThrows( + HashStoreRefsAlreadyExistException.class, () -> fileHashStore.tagObject(pid, cid)); + + // Confirm there are only 1 of each ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' + * that is already referencing another 'cid' + */ + @Test + public void tagObject_PidRefsFileExistsException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // This exception only needs to be re-raised + assertThrows( + PidRefsFileExistsException.class, () -> fileHashStore.tagObject(pid, "another.cid")); + + // Confirm there are only 1 of each ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that verifyObject does not throw exception with matching values + */ + @Test + public void verifyObject_correctValues() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true + ); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject calculates and verifies a checksum with a supported algorithm that is + * not included in the default list + */ + @Test + public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("md2"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + fileHashStore.verifyObject( + objInfo, expectedChecksum, "MD2", expectedSize, true + ); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject calculates throws exception when given a checksumAlgorithm that is + * not supported + */ + @Test + public void verifyObject_unsupportedAlgo() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + assertThrows( + UnsupportedHashAlgorithmException.class, + () -> fileHashStore.verifyObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000, + false)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject throws exception when non-matching size value provided + */ + @Test + public void verifyObject_mismatchedSize() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = 123456789; + + assertThrows( + NonMatchingObjSizeException.class, + () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize, false)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject throws exception with non-matching checksum value + */ + @Test + public void verifyObject_mismatchedChecksum() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = "intentionallyWrongValue"; + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + assertThrows( + NonMatchingChecksumException.class, + () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize, false)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject throws exception when non-matching size value provided + */ + @Test + public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = 123456789; + + assertThrows( + NonMatchingObjSizeException.class, + () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize, true)); + + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + + /** + * Check that verifyObject throws exception with non-matching checksum value + */ + @Test + public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get verifyObject args + String expectedChecksum = "intentionallyWrongValue"; + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.verifyObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true + )); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } + } + /** * Test storeMetadata stores metadata as expected */ @@ -1060,9 +1382,9 @@ public void retrieveObject() throws Exception { */ @Test public void retrieveObject_pidDoesNotExist() { - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.retrieveObject("pid.whose.object.does.not.exist"); - }); + assertThrows( + FileNotFoundException.class, + () -> fileHashStore.retrieveObject("pid.whose.object.does.not.exist")); } /** @@ -1070,9 +1392,7 @@ public void retrieveObject_pidDoesNotExist() { */ @Test public void retrieveObject_pidNull() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.retrieveObject(null); - }); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.retrieveObject(null)); } /** @@ -1080,9 +1400,7 @@ public void retrieveObject_pidNull() { */ @Test public void retrieveObject_pidEmpty() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.retrieveObject(""); - }); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.retrieveObject("")); } /** @@ -1090,9 +1408,7 @@ public void retrieveObject_pidEmpty() { */ @Test public void retrieveObject_pidEmptySpaces() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.retrieveObject(" "); - }); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.retrieveObject(" ")); } /** @@ -1100,9 +1416,8 @@ public void retrieveObject_pidEmptySpaces() { */ @Test public void retrieveObject_pidNotFound() { - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.retrieveObject("dou.2023.hs.1"); - }); + assertThrows( + FileNotFoundException.class, () -> fileHashStore.retrieveObject("dou.2023.hs.1")); } /** @@ -1936,202 +2251,4 @@ public void getHexDigest_badAlgo() { }); } } - - /** - * Check that findObject returns cid as expected. - */ - @Test - public void findObject_cid() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - Map objInfoMap = fileHashStore.findObject(pid); - assertEquals(objInfoMap.get("cid"), objInfo.getCid()); - } - } - - /** - * Check that findObject returns the path to the object as expected. - */ - @Test - public void findObject_cidPath() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - Map objInfoMap = fileHashStore.findObject(pid); - String objectPath = objInfoMap.get("cid_object_path"); - - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); - - assertEquals(objectPath, realPath.toString()); - } - } - - /** - * Check that findObject returns the absolute path to the pid and cid refs file - */ - @Test - public void findObject_refsPaths() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - Map objInfoMap = fileHashStore.findObject(pid); - String cidRefsPath = objInfoMap.get("cid_refs_path"); - String pidRefsPath = objInfoMap.get("pid_refs_path"); - - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), "cid"); - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - - assertEquals(cidRefsPath, cidRefsFilePath.toString()); - assertEquals(pidRefsPath, pidRefsFilePath.toString()); - } - } - - /** - * Check that findObject returns the absolute path to sysmeta document if it exists - */ - @Test - public void findObject_sysmetaPath_exists() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - // Store Object - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - // Store Metadata - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); - System.out.println(metadataPath); - - - Map objInfoMap = fileHashStore.findObject(pid); - String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); - - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path sysmetaPath = fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); - System.out.println(sysmetaPath); - - assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); - } - } - - /** - * Check that findObject returns "Does not exist." when there is no sysmeta for the pid. - */ - @Test - public void findObject_sysmetaPath_doesNotExist() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - - Map objInfoMap = fileHashStore.findObject(pid); - String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); - - assertEquals(objInfoSysmetaPath, "Does not exist"); - } - } - - /** - * Confirm findObject throws exception when cid object does not exist but reference - * files exist. - */ - @Test - public void findObject_refsFileExistButObjectDoesNot() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - assertThrows(OrphanRefsFilesException.class, () -> { - fileHashStore.findObject(pid); - }); - } - - /** - * Confirm that findObject throws OrphanPidRefsFileException exception when - * pid refs file found but cid refs file is missing. - */ - @Test - public void findObject_cidRefsFileNotFound() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - Files.delete(cidRefsPath); - - assertThrows(OrphanPidRefsFileException.class, () -> { - fileHashStore.findObject(pid); - }); - } - - - /** - * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when - * pid refs file found but cid refs file is missing. - */ - @Test - public void findObject_cidRefsFileMissingPid() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pid, cidRefsPath, "remove"); - - assertThrows(PidNotFoundInCidRefsFileException.class, () -> { - fileHashStore.findObject(pid); - }); - } - - /** - * Check that exception is thrown when pid refs file doesn't exist - */ - @Test - public void findObject_pidNotFound() { - String pid = "dou.test.1"; - assertThrows(PidRefsFileNotFoundException.class, () -> { - fileHashStore.findObject(pid); - }); - } - } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index a59c05ec..f98b3b99 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -16,11 +16,16 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Map; +import java.util.Objects; import java.util.Properties; import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.OrphanRefsFilesException; +import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -32,6 +37,7 @@ public class FileHashStoreProtectedTest { private FileHashStore fileHashStore; private Properties fhsProperties; + private Path rootDirectory; private static final TestDataHarness testData = new TestDataHarness(); /** @@ -39,7 +45,7 @@ public class FileHashStoreProtectedTest { */ @BeforeEach public void initializeFileHashStore() { - Path rootDirectory = tempFolder.resolve("hashstore"); + rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -169,6 +175,195 @@ public void getHierarchicalPathString() { assertEquals(shardedPath, shardedPathExpected); } + /** + * Check that findObject returns cid as expected. + */ + @Test + public void findObject_cid() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + Map objInfoMap = fileHashStore.findObject(pid); + assertEquals(objInfoMap.get("cid"), objInfo.getCid()); + } + } + + /** + * Check that findObject returns the path to the object as expected. + */ + @Test + public void findObject_cidPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + Map objInfoMap = fileHashStore.findObject(pid); + String objectPath = objInfoMap.get("cid_object_path"); + + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); + + assertEquals(objectPath, realPath.toString()); + } + } + + /** + * Check that findObject returns the absolute path to the pid and cid refs file + */ + @Test + public void findObject_refsPaths() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + Map objInfoMap = fileHashStore.findObject(pid); + String cidRefsPath = objInfoMap.get("cid_refs_path"); + String pidRefsPath = objInfoMap.get("pid_refs_path"); + + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), "cid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + + assertEquals(cidRefsPath, cidRefsFilePath.toString()); + assertEquals(pidRefsPath, pidRefsFilePath.toString()); + } + } + + /** + * Check that findObject returns the absolute path to sysmeta document if it exists + */ + @Test + public void findObject_sysmetaPath_exists() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + // Store Object + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + // Store Metadata + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + metadataStream.close(); + System.out.println(metadataPath); + + + Map objInfoMap = fileHashStore.findObject(pid); + String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path sysmetaPath = fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + System.out.println(sysmetaPath); + + assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); + } + } + + /** + * Check that findObject returns "Does not exist." when there is no sysmeta for the pid. + */ + @Test + public void findObject_sysmetaPath_doesNotExist() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + dataStream.close(); + + + Map objInfoMap = fileHashStore.findObject(pid); + String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + + assertEquals(objInfoSysmetaPath, "Does not exist"); + } + } + + /** + * Confirm findObject throws exception when cid object does not exist but reference + * files exist. + */ + @Test + public void findObject_refsFileExistButObjectDoesNot() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + assertThrows(OrphanRefsFilesException.class, () -> fileHashStore.findObject(pid)); + } + + /** + * Confirm that findObject throws OrphanPidRefsFileException exception when + * pid refs file found but cid refs file is missing. + */ + @Test + public void findObject_cidRefsFileNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Files.delete(cidRefsPath); + + assertThrows(OrphanPidRefsFileException.class, () -> fileHashStore.findObject(pid)); + } + + + /** + * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when + * pid refs file found but cid refs file is missing. + */ + @Test + public void findObject_cidRefsFileMissingPid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile(pid, cidRefsPath, "remove"); + + assertThrows(PidNotFoundInCidRefsFileException.class, () -> fileHashStore.findObject(pid)); + } + + /** + * Check that exception is thrown when pid refs file doesn't exist + */ + @Test + public void findObject_pidNotFound() { + String pid = "dou.test.1"; + assertThrows(PidRefsFileNotFoundException.class, () -> fileHashStore.findObject(pid)); + } + /** * Verify that putObject returns correct id */ @@ -425,7 +620,7 @@ public void putObject_duplicateObject() throws Exception { // Confirm there are no files in 'objects/tmp' directory Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] files = storePath.resolve("objects/tmp").toFile().listFiles(); - assertEquals(0, files.length); + assertEquals(0, Objects.requireNonNull(files).length); } /** @@ -1054,9 +1249,8 @@ public void getHashStoreRefsPath_pid() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); dataStream.close(); - String cid = objInfo.getCid(); // Manually form the permanent address of the actual cid Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -1137,8 +1331,8 @@ public void getHashStoreDataObjectPath_fileNotFound() { */ @Test public void fileHashStoreUtility_checkForEmptyString() { - assertThrows(IllegalArgumentException.class, () -> { - FileHashStoreUtility.checkForEmptyString("dou.test.1\n", "pid", "storeObject"); - }); + assertThrows( + IllegalArgumentException.class, + () -> FileHashStoreUtility.checkForEmptyString("dou.test.1\n", "pid", "storeObject")); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index 697951fb..eff7c831 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -9,7 +9,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -17,16 +16,11 @@ import java.util.List; import java.util.Properties; -import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; -import org.dataone.hashstore.exceptions.NonMatchingChecksumException; -import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; -import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; -import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -37,7 +31,6 @@ public class FileHashStoreReferencesTest { private FileHashStore fileHashStore; private Properties fhsProperties; - private static final TestDataHarness testData = new TestDataHarness(); /** * Initialize FileHashStore before each test to creates tmp directories @@ -74,56 +67,135 @@ public void initializeFileHashStore() { @TempDir public Path tempFolder; + /** + * Check that storeHashStoreRefsFiles creates reference files + */ + @Test + public void storeHashStoreRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + // Confirm refs files exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + assertTrue(Files.exists(absCidRefsPath)); + assertTrue(Files.exists(absPidRefsPath)); + + // Confirm no additional files were created + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } /** - * Check tagObject does not throw exception when creating a fresh set - * of reference files + * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content + * is correct */ @Test - public void tagObject() throws Exception { + public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + assertTrue(Files.exists(pidRefsFilePath)); + + String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); + assertEquals(cid, retrievedCid); } /** - * Check that tagObject successfully tags a cid refs file that already exists + * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content + * is correct */ @Test - public void tagObject_cidRefsAlreadyExists() throws Exception { + public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); - String pidTwo = "dou.test.2"; - fileHashStore.tagObject(pidTwo, cid); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertTrue(Files.exists(cidRefsFilePath)); + + String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); + assertEquals(pid, retrievedPid); + } + + /** + * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException + * when refs files already exist + */ + @Test + public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + assertThrows( + HashStoreRefsAlreadyExistException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); - // Confirm number of ref files + // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); List pidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); List cidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - assertEquals(2, pidRefsFiles.size()); + assertEquals(1, pidRefsFiles.size()); assertEquals(1, cidRefsFiles.size()); } /** - * Check that tagObject throws HashStoreRefsAlreadyExistException exception when pid and cid - * refs file already exists (duplicate tag request) + * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what + * is found in the pid refs file, and the associated cid refs file from the pid refs file + * is correctly tagged (everything is where it's expected to be) */ @Test - public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { + public void storeHashStoreRefsFiles_PidRefsFileExistsException() + throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + String existingCid = "987654321fedcba"; + fileHashStore.storeHashStoreRefsFiles(pid, existingCid); - // This exception only needs to be re-raised + // This will throw an exception because the pid and cid refs file are in sync assertThrows( - HashStoreRefsAlreadyExistException.class, () -> fileHashStore.tagObject(pid, cid)); + PidRefsFileExistsException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + } - // Confirm there are only 1 of each ref files + /** + * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it + * references does not exist (does not have a cid refs file) + */ + @Test + public void storeHashStoreRefsFiles_pidRefsOrphanedFile() + throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + String cidForOrphanPidRef = "987654321fedcba"; + + // Create orphaned pid refs file + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + File pidRefsTmpFile = fileHashStore.writeRefsFile( + cidForOrphanPidRef, HashStoreIdTypes.pid.getName() + ); + File absPathPidRefsFile = absPidRefsPath.toFile(); + fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + + fileHashStore.storeHashStoreRefsFiles(pid, cid); + // There should only be 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); List pidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); @@ -135,27 +207,37 @@ public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { } /** - * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' - * that is already referencing another 'cid' + * Check that storeHashStoreRefsFiles creates a pid refs file and updates an existing cid refs + * file */ @Test - public void tagObject_PidRefsFileExistsException() throws Exception { + public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + fileHashStore.storeHashStoreRefsFiles(pid, cid); - // This exception only needs to be re-raised - assertThrows( - PidRefsFileExistsException.class, () -> fileHashStore.tagObject(pid, "another.cid")); + String pidAdditional = "another.pid.2"; + fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); + + // Confirm missing pid refs file has been created + Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); + assertTrue(Files.exists(pidAdditionalRefsFilePath)); + + // Check cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( + pidAdditional, cidRefsFilePath + ); + assertTrue(pidFoundInCidRefFiles); - // Confirm there are only 1 of each ref files + // There should be 2 pid refs file, and 1 cid refs file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); List pidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); List cidRefsFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - assertEquals(1, pidRefsFiles.size()); + assertEquals(2, pidRefsFiles.size()); assertEquals(1, cidRefsFiles.size()); } @@ -290,7 +372,7 @@ public void unTagObject_missingPidRefsFile() throws Exception { } /** - * Check that the cid supplied is written into the file given + * Check that the value supplied is written */ @Test public void writeRefsFile_content() throws Exception { @@ -302,177 +384,36 @@ public void writeRefsFile_content() throws Exception { } /** - * Check that storeHashStoreRefsFiles creates reference files - */ - @Test - public void storeHashStoreRefsFiles() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - // Confirm refs files exist - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - - assertTrue(Files.exists(absCidRefsPath)); - assertTrue(Files.exists(absPidRefsPath)); - - // Confirm no additional files were created - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content - * is correct + * Check that no exception is thrown when pid and cid are tagged correctly */ @Test - public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { + public void verifyHashStoreRefFiles() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); + fileHashStore.tagObject(pid, cid); + // Create a pid refs file with the incorrect cid Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - assertTrue(Files.exists(pidRefsFilePath)); - - String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); - assertEquals(cid, retrievedCid); - } - - /** - * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content - * is correct - */ - @Test - public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertTrue(Files.exists(cidRefsFilePath)); - - String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); - assertEquals(pid, retrievedPid); - } - - /** - * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException - * when refs files already exist - */ - @Test - public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - assertThrows( - HashStoreRefsAlreadyExistException.class, - () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); - - // Confirm that there is only 1 of each ref file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what - * is found in the pid refs file, and the associated cid refs file from the pid refs file - * is correctly tagged (everything is where it's expected to be) - */ - @Test - public void storeHashStoreRefsFiles_PidRefsFileExistsException() - throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - String existingCid = "987654321fedcba"; - fileHashStore.storeHashStoreRefsFiles(pid, existingCid); - - // This will throw an exception because the pid and cid refs file are in sync - assertThrows( - PidRefsFileExistsException.class, - () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); - } - - /** - * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it - * references does not exist (does not have a cid refs file) - */ - @Test - public void storeHashStoreRefsFiles_pidRefsOrphanedFile() - throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - String cidForOrphanPidRef = "987654321fedcba"; - - // Create orphaned pid refs file - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - File pidRefsTmpFile = fileHashStore.writeRefsFile( - cidForOrphanPidRef, HashStoreIdTypes.pid.getName() - ); - File absPathPidRefsFile = absPidRefsPath.toFile(); - fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - fileHashStore.storeHashStoreRefsFiles(pid, cid); - // There should only be 1 of each ref file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); + fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsFilePath); } /** - * Check that storeHashStoreRefsFiles creates a pid refs file and updates an existing cid refs - * file + * Check that an exception is thrown when a file is not found */ @Test - public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { + public void verifyHashStoreRefFiles_fileNotFound() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - String pidAdditional = "another.pid.2"; - fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); - - // Confirm missing pid refs file has been created - Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); - assertTrue(Files.exists(pidAdditionalRefsFilePath)); - - // Check cid refs file + // Create a pid refs file with the incorrect cid + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( - pidAdditional, cidRefsFilePath - ); - assertTrue(pidFoundInCidRefFiles); - - // There should be 2 pid refs file, and 1 cid refs file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - assertEquals(2, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); + assertThrows(FileNotFoundException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsFilePath)); } /** @@ -673,248 +614,4 @@ public void deleteCidRefsPid_allPidsRemoved() throws Exception { assertTrue(Files.exists(cidRefsFilePath)); assertEquals(0, Files.size(cidRefsFilePath)); } - - /** - * Check that verifyObject does not throw exception with matching values - */ - @Test - public void verifyObject_correctValues() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true - ); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that verifyObject calculates and verifies a checksum with a supported algorithm that is - * not included in the default list - */ - @Test - public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - // Get verifyObject args - String expectedChecksum = testData.pidData.get(pid).get("md2"); - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - fileHashStore.verifyObject( - objInfo, expectedChecksum, "MD2", expectedSize, true - ); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that verifyObject calculates throws exception when given a checksumAlgorithm that is - * not supported - */ - @Test - public void verifyObject_unsupportedAlgo() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - assertThrows( - UnsupportedHashAlgorithmException.class, - () -> fileHashStore.verifyObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000, - false)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that verifyObject throws exception when non-matching size value provided - */ - @Test - public void verifyObject_mismatchedSize() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = 123456789; - - assertThrows( - NonMatchingObjSizeException.class, - () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize, false)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that verifyObject throws exception with non-matching checksum value - */ - @Test - public void verifyObject_mismatchedChecksum() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = "intentionallyWrongValue"; - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - assertThrows( - NonMatchingChecksumException.class, - () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize, false)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that verifyObject throws exception when non-matching size value provided - */ - @Test - public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = 123456789; - - assertThrows( - NonMatchingObjSizeException.class, - () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize, true)); - - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that verifyObject throws exception with non-matching checksum value - */ - @Test - public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = "intentionallyWrongValue"; - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true - )); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } } From e0c3bb7bfede68adb6cd52ac6bbb07af511476d8 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 10 Jul 2024 15:53:36 -0700 Subject: [PATCH 365/553] Move 'findObject' code in 'FileHashStore' to core and supporting methods section --- .../filehashstore/FileHashStore.java | 178 +++++++++--------- 1 file changed, 89 insertions(+), 89 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b93e50ba..1cc552e7 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -701,95 +701,6 @@ public void verifyObject( logFileHashStore.info(infoMsg); } - /** - * Checks whether an object referenced by a pid exists and returns a map containing the - * absolute path to the object, pid refs file, cid refs file and sysmeta document. - * - * @param pid Authority-based identifier - * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, - * pid_refs_path, sysmeta_path - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs - * file's absolute address is not valid - * @throws IOException Unable to read from a pid refs file or pid refs - * file does not exist - * @throws OrphanRefsFilesException pid and cid refs file found, but object does - * not exist - * @throws OrphanPidRefsFileException When pid refs file exists and the cid found - * inside does not exist. - * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the - * expected pid is not found in the cid refs file. - */ - protected Map findObject(String pid) throws NoSuchAlgorithmException, - IOException, - OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { - logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); - FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); - - // Get path of the pid references file - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - - if (Files.exists(absPidRefsPath)) { - String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - - // Throw exception if the cid refs file doesn't exist - if (!Files.exists(absCidRefsPath)) { - String errMsg = - "FileHashStore.findObject - Cid refs file does not exist for cid: " + cid - + " with address: " + absCidRefsPath + ", but pid refs file exists."; - logFileHashStore.error(errMsg); - throw new OrphanPidRefsFileException(errMsg); - } - // If the pid is found in the expected cid refs file, and the object exists, return it - if (isStringInRefsFile(pid, absCidRefsPath)) { - logFileHashStore.info( - "FileHashStore.findObject - Cid (" + cid + ") found for pid: " + pid - ); - - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid - ); - Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - if (Files.exists(realPath)) { - Map objInfoMap = new HashMap<>(); - objInfoMap.put("cid", cid); - objInfoMap.put("cid_object_path", realPath.toString()); - objInfoMap.put("cid_refs_path", absCidRefsPath.toString()); - objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); - // If the default system metadata exists, include it - Path metadataPidExpectedPath = - getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); - if (Files.exists(metadataPidExpectedPath)) { - objInfoMap.put("sysmeta_path", metadataPidExpectedPath.toString()); - } else { - objInfoMap.put("sysmeta_path", "Does not exist"); - } - return objInfoMap; - - } else { - String errMsg = "FileHashStore.findObject - Object with cid: " + cid - + " does not exist, but pid and cid reference file found for pid: " + pid; - logFileHashStore.error(errMsg); - throw new OrphanRefsFilesException(errMsg); - } - - } else { - String errMsg = "FileHashStore.findObject - Pid refs file exists, but pid (" + pid - + ") not found in cid refs file for cid: " + cid + " with address: " - + absCidRefsPath; - logFileHashStore.error(errMsg); - throw new PidNotFoundInCidRefsFileException(errMsg); - } - - } else { - String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid - + ". Pid refs file does not exist at: " + absPidRefsPath; - logFileHashStore.error(errMsg); - throw new PidRefsFileNotFoundException(errMsg); - } - } - @Override public String storeMetadata(InputStream metadata, String pid, String formatId) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, @@ -1416,6 +1327,95 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // FileHashStore Core & Supporting Methods + /** + * Checks whether an object referenced by a pid exists and returns a map containing the + * absolute path to the object, pid refs file, cid refs file and sysmeta document. + * + * @param pid Authority-based identifier + * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, + * pid_refs_path, sysmeta_path + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs + * file's absolute address is not valid + * @throws IOException Unable to read from a pid refs file or pid refs + * file does not exist + * @throws OrphanRefsFilesException pid and cid refs file found, but object does + * not exist + * @throws OrphanPidRefsFileException When pid refs file exists and the cid found + * inside does not exist. + * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the + * expected pid is not found in the cid refs file. + */ + protected Map findObject(String pid) throws NoSuchAlgorithmException, + IOException, + OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { + logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); + FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); + FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); + + // Get path of the pid references file + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + + if (Files.exists(absPidRefsPath)) { + String cid = new String(Files.readAllBytes(absPidRefsPath)); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + + // Throw exception if the cid refs file doesn't exist + if (!Files.exists(absCidRefsPath)) { + String errMsg = + "FileHashStore.findObject - Cid refs file does not exist for cid: " + cid + + " with address: " + absCidRefsPath + ", but pid refs file exists."; + logFileHashStore.error(errMsg); + throw new OrphanPidRefsFileException(errMsg); + } + // If the pid is found in the expected cid refs file, and the object exists, return it + if (isStringInRefsFile(pid, absCidRefsPath)) { + logFileHashStore.info( + "FileHashStore.findObject - Cid (" + cid + ") found for pid: " + pid + ); + + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid + ); + Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + if (Files.exists(realPath)) { + Map objInfoMap = new HashMap<>(); + objInfoMap.put("cid", cid); + objInfoMap.put("cid_object_path", realPath.toString()); + objInfoMap.put("cid_refs_path", absCidRefsPath.toString()); + objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); + // If the default system metadata exists, include it + Path metadataPidExpectedPath = + getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); + if (Files.exists(metadataPidExpectedPath)) { + objInfoMap.put("sysmeta_path", metadataPidExpectedPath.toString()); + } else { + objInfoMap.put("sysmeta_path", "Does not exist"); + } + return objInfoMap; + + } else { + String errMsg = "FileHashStore.findObject - Object with cid: " + cid + + " does not exist, but pid and cid reference file found for pid: " + pid; + logFileHashStore.error(errMsg); + throw new OrphanRefsFilesException(errMsg); + } + + } else { + String errMsg = "FileHashStore.findObject - Pid refs file exists, but pid (" + pid + + ") not found in cid refs file for cid: " + cid + " with address: " + + absCidRefsPath; + logFileHashStore.error(errMsg); + throw new PidNotFoundInCidRefsFileException(errMsg); + } + + } else { + String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid + + ". Pid refs file does not exist at: " + absPidRefsPath; + logFileHashStore.error(errMsg); + throw new PidRefsFileNotFoundException(errMsg); + } + } + /** * Takes a given InputStream and writes it to its permanent address on disk based on the SHA-256 * hex digest value of an authority based identifier, usually provided as a persistent From a60586ccdc1a187a1a3fb87efb46c9bb0d5afcd6 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 09:39:16 -0700 Subject: [PATCH 366/553] Code clean-up: Review/revise comments, refactor 'retrieveMetadata' methods to be DRY by extracting method 'getMetadataDocInputStream' --- .../filehashstore/FileHashStore.java | 162 ++++++++---------- 1 file changed, 67 insertions(+), 95 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1cc552e7..b9ac2f3f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -110,11 +110,11 @@ enum HashStoreProperties { } /** - * Constructor to initialize HashStore, properties are required. - * - * Note: HashStore is not responsible for ensuring that the given store path is accurate. It - * will only check for an existing configuration, directories or objects at the supplied store - * path before initializing. + * Constructor to initialize FileHashStore, properties are required. FileHashStore is not + * responsible for ensuring that the given store path is accurate. Upon initialization, if + * an existing config file (hashstore.yaml) is present, it will confirm that it is accurate + * against the supplied properties. If not, FileHashSTore will check for 'hashstore' specific + * directories at the supplied store path before initializing. * * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, * storeWidth, storeAlgorithm, storeMetadataNamespace @@ -130,7 +130,6 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep ); // Get properties - // Note - Paths.get() throws NullPointerException if arg is null Path storePath = Paths.get( hashstoreProperties.getProperty(HashStoreProperties.storePath.name()) ); @@ -147,7 +146,6 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep HashStoreProperties.storeMetadataNamespace.name() ); - // Check given properties and/with existing HashStore verifyHashStoreProperties( storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace ); @@ -158,12 +156,9 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; DEFAULT_METADATA_NAMESPACE = storeMetadataNamespace; - // Resolve object/metadata/refs directories OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); METADATA_STORE_DIRECTORY = storePath.resolve("metadata"); REFS_STORE_DIRECTORY = storePath.resolve("refs"); - // Resolve tmp object/metadata directory paths, this is where objects are - // created before they are moved to their permanent address OBJECT_TMP_FILE_DIRECTORY = OBJECT_STORE_DIRECTORY.resolve("tmp"); METADATA_TMP_FILE_DIRECTORY = METADATA_STORE_DIRECTORY.resolve("tmp"); REFS_TMP_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("tmp"); @@ -171,7 +166,6 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("cids"); try { - // Physically create object & metadata store and tmp directories Files.createDirectories(OBJECT_STORE_DIRECTORY); Files.createDirectories(METADATA_STORE_DIRECTORY); Files.createDirectories(REFS_STORE_DIRECTORY); @@ -218,14 +212,11 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep /** * Determines whether FileHashStore can instantiate by validating a set of arguments and - * throwing exceptions. HashStore will not instantiate if an existing configuration file's - * properties (`hashstore.yaml`) are different from what is supplied - or if an object store - * exists at the given path, but it is missing the `hashstore.yaml` config file. - * - * If `hashstore.yaml` exists, it will retrieve its properties and compare them with the given - * values; and if there is a mismatch, an exception will be thrown. If not, it will look to see - * if any relevant HashStore directories exist (i.e. '/objects', '/metadata', '/refs') in the - * given store path and throw an exception if any of those directories exist. + * throwing exceptions. If HashStore configuration file (`hashstore.yaml`) exists, it will + * retrieve its properties and compare them with the given values; and if there is a + * mismatch, an exception will be thrown. If not, it will look to see if any relevant + * HashStore directories exist (i.e. '/objects', '/metadata', '/refs') in the given store + * path and throw an exception if any of those directories exist. * * @param storePath Path where HashStore will store objects * @param storeDepth Depth of directories @@ -247,9 +238,7 @@ protected void verifyHashStoreProperties( logFileHashStore.fatal(errMsg); throw new IllegalArgumentException(errMsg); } - // Ensure algorithm supplied is not empty, not null and supported validateAlgorithm(storeAlgorithm); - // Review metadata format (formatId) FileHashStoreUtility.ensureNotNull( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); @@ -272,7 +261,6 @@ protected void verifyHashStoreProperties( HashStoreProperties.storeMetadataNamespace.name() ); - // Verify properties when 'hashstore.yaml' found FileHashStoreUtility.checkObjectEquality("store depth", storeDepth, existingStoreDepth); FileHashStoreUtility.checkObjectEquality("store width", storeWidth, existingStoreWidth); FileHashStoreUtility.checkObjectEquality("store algorithm", storeAlgorithm, @@ -310,7 +298,7 @@ protected void verifyHashStoreProperties( } /** - * Get the properties of HashStore from 'hashstore.yaml' + * Get the properties of HashStore from an existing 'hashstore.yaml' * * @param storePath Path to root of store * @return HashMap of the properties @@ -430,8 +418,7 @@ public ObjectMetadata storeObject( logFileHashStore.debug( "FileHashStore.storeObject - Called to store object for pid: " + pid ); - - // Begin input validation + // Validate input parameters FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); @@ -552,7 +539,7 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // algorithm, etc.) is unavailable. // // Note: This method does not tag the object to make it discoverable, so the client can - // call 'verifyObject' (optional) to check that the object is valid, and 'tagObject' + // call 'verifyObject' (optional) to check that the object is valid, and then 'tagObject' // (required) to create the reference files needed to associate the respective pids/cids. return putObject(object, "HashStoreNoPid", null, null, null, -1); } @@ -634,6 +621,7 @@ public void verifyObject( logFileHashStore.debug( "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() ); + // Validate input parameters FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); @@ -714,7 +702,6 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); - // Determine metadata namespace // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { @@ -732,7 +719,7 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) */ private String syncPutMetadata(InputStream metadata, String pid, String checkedFormatId) throws InterruptedException, IOException, NoSuchAlgorithmException { - // Lock pid for thread safety, transaction control and atomic writing + // Get the metadata document id, which is the synchronization value // Metadata storage requests for the same pid must be written serially // However, the same pid could be used with different formatIds, so // synchronize ids with pid + formatId; @@ -773,7 +760,6 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF return pathToStoredMetadata; } catch (IOException ioe) { - // Covers FileNotFoundException String errMsg = "FileHashStore.storeMetadata - Unable to store metadata, IOException" + " encountered: " + ioe.getMessage(); logFileHashStore.error(errMsg); @@ -819,10 +805,8 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getHashStoreDataObjectPath(pid); - // Check to see if object exists + Path objRealPath = getHashStoreDataObjectPath(pid); if (!Files.exists(objRealPath)) { String errMsg = "FileHashStore.retrieveObject - File does not exist for pid: " + pid + " with object address: " + objRealPath; @@ -830,7 +814,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, throw new FileNotFoundException(errMsg); } - // If so, return an input stream for the object + // Return an InputStream to read from the data object try { InputStream objectCidInputStream = Files.newInputStream(objRealPath); logFileHashStore.info( @@ -862,34 +846,7 @@ public InputStream retrieveMetadata(String pid, String formatId) FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); - - // Check to see if metadata exists - if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " - + pid + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } - - // If so, return an input stream for the metadata - try { - InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); - logFileHashStore.info( - "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid - + " with formatId: " + formatId - ); - return metadataCidInputStream; - - } catch (IOException ioe) { - String errMsg = - "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" - + " for pid: " + pid + " with formatId: " + formatId + ". IOException: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } + return getMetadataDocInputStream(pid, formatId); } /** @@ -906,36 +863,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); - - // Check to see if metadata exists - if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " - + pid + " with formatId: " + DEFAULT_METADATA_NAMESPACE + ". Metadata address: " - + metadataCidPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } - - // If so, return an input stream for the metadata - InputStream metadataCidInputStream; - try { - metadataCidInputStream = Files.newInputStream(metadataCidPath); - logFileHashStore.info( - "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid - + " with formatId: " + DEFAULT_METADATA_NAMESPACE - ); - } catch (IOException ioe) { - String errMsg = - "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" - + " for pid: " + pid + " with formatId: " + DEFAULT_METADATA_NAMESPACE - + ". IOException: " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - - return metadataCidInputStream; + return getMetadataDocInputStream(pid, DEFAULT_METADATA_NAMESPACE); } @Override @@ -1638,7 +1566,7 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio /** * Checks whether the algorithm supplied is included in the DefaultHashAlgorithms - * + * * @param algorithm Algorithm to check * @return True if it's included */ @@ -2366,7 +2294,7 @@ protected File writeRefsFile(String ref, String refType) throws IOException { /** * Checks a given refs file for a ref. This is case-sensitive. - * + * * @param ref Authority-based or persistent identifier to search * @param absRefsPath Path to the refs file to check * @return True if cid is found, false otherwise @@ -2386,7 +2314,7 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce /** * Adds or removes a ref value from a refs file given an 'updateType' - * + * * @param ref Authority-based or persistent identifier * @param absRefsPath Path to the refs file to update * @param updateType "add" or "remove" @@ -2436,7 +2364,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) /** * Deletes a references file at the given path - * + * * @param absRefsPath Path to the refs file to delete * @throws IOException Unable to delete object or open pid refs file */ @@ -2552,6 +2480,50 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea } } + + /** + * Get an InputStream to a metadata document if it exists in FileHashStore + * + * @param pid Persistent or authority-based identifier + * @param formatId Metadata namespace + * @return InputStream to metadata doc + * @throws NoSuchAlgorithmException An algorithm used in the calculation is not supported + * @throws FileNotFoundException If the metadata document is not found + * @throws IOException If there is an issue returning an input stream + */ + protected InputStream getMetadataDocInputStream(String pid, String formatId) + throws NoSuchAlgorithmException, IOException, FileNotFoundException { + // Get permanent address of the pid by calculating its sha-256 hex digest + Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); + + // Check to see if metadata exists + if (!Files.exists(metadataCidPath)) { + String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " + + pid + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } + + // Return an InputStream to read from the metadata document + try { + InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); + logFileHashStore.info( + "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid + + " with formatId: " + formatId + ); + return metadataCidInputStream; + + } catch (IOException ioe) { + String errMsg = + "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" + + " for pid: " + pid + " with formatId: " + formatId + ". IOException: " + ioe + .getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } + + /** * Get the absolute path to a HashStore data object * From fd290ea5553421f186f03967ce818090979d1082 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 10:09:12 -0700 Subject: [PATCH 367/553] Code clean-up: Review/revise comments, refactor 'deleteMetadata' methods to be DRY by extracting method 'syncRenameMetadataDocForDeletion' --- .../filehashstore/FileHashStore.java | 204 +++++++----------- 1 file changed, 80 insertions(+), 124 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b9ac2f3f..b47566d4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -875,9 +875,6 @@ public void deleteObject(String pid) // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); FileHashStoreUtility.checkForEmptyString(pid, "id", "deleteObject"); - - // Else 'idType' is pid - String cid; List deleteList = new ArrayList<>(); // Storing, deleting and untagging objects are synchronized together @@ -899,14 +896,13 @@ public void deleteObject(String pid) "FileHashStore.deleteObject - Synchronizing objectLockedIds for pid: " + pid); objectLockedIds.add(pid); } - try { // Before we begin deletion process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { Map objInfoMap = findObject(pid); - cid = objInfoMap.get("cid"); + String cid = objInfoMap.get("cid"); // If no exceptions are thrown, we proceed to synchronization based on the `cid` // Multiple threads may access the cid reference file (which contains a list of @@ -935,22 +931,15 @@ public void deleteObject(String pid) try { // Proceed with comprehensive deletion - cid exists, nothing out of place // Get all the required paths to streamline deletion process - // Permanent address of the object Path objRealPath = getHashStoreDataObjectPath(pid); - // Cid refs file Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - // Pid refs file Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - // Rename pid refs file to prepare for deletion + // Begin deletion process deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete obj and cid refs file **only** if the cid refs file is empty if (Files.size(absCidRefsPath) == 0) { - // Rename empty cid refs file to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - // Rename actual object to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); } else { String warnMsg = @@ -961,7 +950,6 @@ public void deleteObject(String pid) } // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); - // Remove metadata files deleteMetadata(pid); logFileHashStore.info( "FileHashStore.deleteObject - File and references deleted for: " + pid @@ -981,13 +969,10 @@ public void deleteObject(String pid) } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file and related metadata documents - - // Begin by renaming pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); - // Remove metadata files deleteMetadata(pid); String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid @@ -997,11 +982,8 @@ public void deleteObject(String pid) } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when the pid and cid refs file exists, // but the actual object being referenced by the pid does not exist - - // Get the cid from the pid refs file before renaming it for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - // Since we must access the cid reference file, the `cid` must be synchronized synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cidRead)) { @@ -1024,20 +1006,15 @@ public void deleteObject(String pid) } try { - // Rename pid refs file for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - // Remove the pid from the cid refs file Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); - // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } // Delete items FileHashStoreUtility.deleteListItems(deleteList); - // Remove metadata files deleteMetadata(pid); String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid @@ -1057,13 +1034,9 @@ public void deleteObject(String pid) } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file. - - // Rename pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items FileHashStoreUtility.deleteListItems(deleteList); - // Remove metadata files deleteMetadata(pid); String warnMsg = "FileHashStore.deleteObject - Pid not found in expected cid refs file for" @@ -1071,7 +1044,7 @@ public void deleteObject(String pid) logFileHashStore.warn(warnMsg); } } finally { - // Release lock on the pid + // Release lock synchronized (objectLockedIds) { logFileHashStore.debug( "FileHashStore.deleteObject - Releasing objectLockedIds for pid: " + pid); @@ -1094,56 +1067,23 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); + List deleteList = new ArrayList<>(); + // Get the path to the metadata document and metadata document name/id + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest + ); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); String metadataDocId = FileHashStoreUtility.getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(metadataDocId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteMetadata - Metadata lock was interrupted while" - + " deleting metadata for: " + pid + " and formatId: " + formatId - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Synchronizing metadataLockedIds for pid: " + pid - ); - metadataLockedIds.add(metadataDocId); - } - - try { - // Get permanent address of the metadata document - Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); - - if (!Files.exists(metadataDocPath)) { - String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid - + " with metadata address: " + metadataDocPath; - logFileHashStore.warn(errMsg); - - } else { - // Proceed to delete - Files.delete(metadataDocPath); - logFileHashStore.info( - "FileHashStore.deleteMetadata - File deleted for: " + pid - + " with metadata address: " + metadataDocPath - ); - } - } finally { - // Release lock - synchronized (metadataLockedIds) { - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Releasing metadataLockedIds for pid: " + pid - + " and formatId " + formatId - ); - metadataLockedIds.remove(metadataDocId); - metadataLockedIds.notify(); - } - } + Path metadataDocPath = expectedPidMetadataDirectory.resolve(metadataDocId); + syncRenameMetadataDocForDeletion(pid, deleteList, metadataDocPath, metadataDocId); + // Delete all items in the list + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info( + "FileHashStore.deleteMetadata - Metadata document deleted for: " + pid + + " with metadata address: " + metadataDocId + ); } /** @@ -1159,52 +1099,18 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); List deleteList = new ArrayList<>(); - // Metadata directory + // Get the path to the metadata document and metadata document name/id String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest ); Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); // Add all metadata doc paths to a List to iterate over below - List metadataDocPaths = FileHashStoreUtility.getFilesFromDir( - expectedPidMetadataDirectory - ); + List metadataDocPaths = + FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); for (Path metadataDoc : metadataDocPaths) { String metadataDocId = metadataDoc.getFileName().toString(); - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(metadataDocId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteMetadata - Metadata lock was interrupted while" - + " deleting metadata doc: " + metadataDocId + " for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Synchronizing metadataLockedIds for pid: " + pid - ); - metadataLockedIds.add(metadataDocId); - } - - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDoc)); - } finally { - // Release lock - synchronized (metadataLockedIds) { - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Releasing metadataLockedIds for pid: " + pid - + " and doc " + metadataDocId - ); - metadataLockedIds.remove(metadataDocId); - metadataLockedIds.notify(); - } - } - + syncRenameMetadataDocForDeletion(pid, deleteList, metadataDoc, metadataDocId); } // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); @@ -1213,6 +1119,56 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept ); } + /** + * Synchronize deleting a metadata doc for deletion by renaming it and adding it to the supplied + * List. + * + * @param pid Persistent or authority-based identifier + * @param deleteList List to add the renamed metadata document + * @param metadataDocAbsPath Absolute path to the metadata document + * @param metadataDocId Metadata document name + * @throws InterruptedException When an issue with synchronization occurs + * @throws IOException If there is an issue renaming a document + */ + protected static void syncRenameMetadataDocForDeletion( + String pid, List deleteList, Path metadataDocAbsPath, String metadataDocId) + throws InterruptedException, IOException { + synchronized (metadataLockedIds) { + while (metadataLockedIds.contains(metadataDocId)) { + try { + metadataLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "FileHashStore.syncRenameMetadataDocForDeletion - Metadata lock was " + + "interrupted while deleting metadata doc: " + metadataDocId + + " for pid: " + pid + ". InterruptedException: " + ie.getMessage(); + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "FileHashStore.syncRenameMetadataDocForDeletion - Synchronizing metadataLockedIds" + + " for pid: " + pid); + metadataLockedIds.add(metadataDocId); + } + + try { + if (Files.exists(metadataDocAbsPath)) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDocAbsPath)); + } + } finally { + // Release lock + synchronized (metadataLockedIds) { + logFileHashStore.debug( + "FileHashStore.syncRenameMetadataDocForDeletion - Releasing metadataLockedIds" + + " for pid: " + pid + " and doc " + metadataDocId); + metadataLockedIds.remove(metadataDocId); + metadataLockedIds.notify(); + } + } + } + @Override public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { @@ -2498,8 +2454,9 @@ protected InputStream getMetadataDocInputStream(String pid, String formatId) // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " - + pid + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; + String errMsg = + "FileHashStore.getMetadataDocInputStream - Metadata does not exist for pid: " + pid + + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } @@ -2508,16 +2465,15 @@ protected InputStream getMetadataDocInputStream(String pid, String formatId) try { InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); logFileHashStore.info( - "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid - + " with formatId: " + formatId - ); + "FileHashStore.getMetadataDocInputStream - Retrieved metadata for pid: " + pid + + " with formatId: " + formatId); return metadataCidInputStream; } catch (IOException ioe) { String errMsg = - "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" - + " for pid: " + pid + " with formatId: " + formatId + ". IOException: " + ioe - .getMessage(); + "FileHashStore.getMetadataDocInputStream - Unexpected error when creating " + + "InputStream for pid: " + pid + " with formatId: " + formatId + + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } From ccda4c5f909a2a549a1d09ef4ef1512cce1b8f05 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 11:07:10 -0700 Subject: [PATCH 368/553] Code clean-up: Revise deleteMetadata to call existing method to get path to metadata document, refactor 'isDefaultAlgorithm' into 'shouldCalculateAlgorithm' to improve clarity --- .../filehashstore/FileHashStore.java | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b47566d4..66040fca 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1069,14 +1069,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx List deleteList = new ArrayList<>(); // Get the path to the metadata document and metadata document name/id - String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest - ); - Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); String metadataDocId = FileHashStoreUtility.getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); - Path metadataDocPath = expectedPidMetadataDirectory.resolve(metadataDocId); + Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); syncRenameMetadataDocForDeletion(pid, deleteList, metadataDocPath, metadataDocId); // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); @@ -1099,13 +1094,13 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); List deleteList = new ArrayList<>(); - // Get the path to the metadata document and metadata document name/id + // Get the path to the pid metadata document directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest ); Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); - // Add all metadata doc paths to a List to iterate over below + // Add all metadata docs found in the metadata doc directory to a list to iterate over List metadataDocPaths = FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); for (Path metadataDoc : metadataDocPaths) { @@ -1175,7 +1170,6 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE logFileHashStore.debug( "FileHashStore.getHexDigest - Called to calculate hex digest for pid: " + pid ); - FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); @@ -1188,8 +1182,6 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE } else { // Get permanent address of the pid object Path objRealPath = getHashStoreDataObjectPath(pid); - - // Check to see if object exists if (!Files.exists(objRealPath)) { String errMsg = "FileHashStore.getHexDigest - File does not exist for pid: " + pid + " with object address: " + objRealPath; @@ -1405,7 +1397,6 @@ protected ObjectMetadata putObject( ); } - // Create ObjectMetadata to return with pertinent data return new ObjectMetadata(pid, objectCid, storedObjFileSize, hexDigests); } @@ -1521,20 +1512,21 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio } /** - * Checks whether the algorithm supplied is included in the DefaultHashAlgorithms + * Determines if an algorithm should be generated by checking whether the algorithm supplied is + * included in the DefaultHashAlgorithms * * @param algorithm Algorithm to check - * @return True if it's included + * @return Boolean */ - private boolean isDefaultAlgorithm(String algorithm) { - boolean isDefaultAlgorithm = false; + private boolean shouldCalculateAlgorithm(String algorithm) { + boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { if (algorithm.equals(defAlgo.getName())) { - isDefaultAlgorithm = true; + shouldCalculateAlgorithm = false; break; } } - return isDefaultAlgorithm; + return shouldCalculateAlgorithm; } /** @@ -1611,7 +1603,7 @@ protected Map writeToTmpFileAndGenerateChecksums( additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(additionalAlgorithm); - generateAddAlgo = !isDefaultAlgorithm(additionalAlgorithm); + generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } boolean generateCsAlgo = false; if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { @@ -1619,7 +1611,7 @@ protected Map writeToTmpFileAndGenerateChecksums( checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(checksumAlgorithm); - generateCsAlgo = !isDefaultAlgorithm(checksumAlgorithm); + generateCsAlgo = shouldCalculateAlgorithm(checksumAlgorithm); } FileOutputStream os = new FileOutputStream(tmpFile); From 947459a9fec926fa0b66f316d21da06d0b7dea4e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 11:23:51 -0700 Subject: [PATCH 369/553] Code clean-up: Remove unused 'deleteRefsFile' method & update junit tests, revise comments, resolve linting warnings --- .../filehashstore/FileHashStore.java | 68 +++++-------------- .../FileHashStoreReferencesTest.java | 28 -------- 2 files changed, 17 insertions(+), 79 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 66040fca..92f46c87 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1295,12 +1295,10 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep /** * Takes a given InputStream and writes it to its permanent address on disk based on the SHA-256 * hex digest value of an authority based identifier, usually provided as a persistent - * identifier (pid). - * - * If an additional algorithm is provided and supported, its respective hex digest value will be - * included in hexDigests map. If a checksum and checksumAlgorithm is provided, FileHashStore - * will validate the given checksum against the hex digest produced of the supplied - * checksumAlgorithm. + * identifier (pid). If an additional algorithm is provided and supported, its respective hex + * digest value will be included in hexDigests map. If a checksum and checksumAlgorithm is + * provided, FileHashStore will validate the given checksum against the hex digest produced + * of the supplied checksumAlgorithm. * * @param object InputStream for file * @param pid Authority-based identifier @@ -1562,7 +1560,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor boolean requestValidation = false; if (checksumAlgorithm != null && !checksumAlgorithm.trim().isEmpty()) { requestValidation = validateAlgorithm(checksumAlgorithm); - // Ensure checksum is not null or empty if checksumAlgorithm is supplied in + // Ensure checksum is not null or empty if checksumAlgorithm is supplied if (requestValidation) { FileHashStoreUtility.ensureNotNull( checksum, "checksum", "verifyChecksumParameters" @@ -1578,9 +1576,8 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor /** * Write the input stream into a given file (tmpFile) and return a HashMap consisting of * algorithms and their respective hex digests. If an additional algorithm is supplied and - * supported, it and its checksum value will be included in the hex digests map. - * - * Default algorithms: MD5, SHA-1, SHA-256, SHA-384, SHA-512 + * supported, it and its checksum value will be included in the hex digests map. Default + * algorithms: MD5, SHA-1, SHA-256, SHA-384, SHA-512 * * @param tmpFile file to write input stream data into * @param dataStream input stream of data to store @@ -1722,7 +1719,6 @@ protected void move(File source, File target, String entity) throws IOException, // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); FileHashStoreUtility.checkForEmptyString(entity, "entity", "move"); - // Entity is only used when checking for an existence of an object if (entity.equals("object") && target.exists()) { String errMsg = "FileHashStore.move - File already exists for target: " + target; logFileHashStore.warn(errMsg); @@ -1733,7 +1729,6 @@ protected void move(File source, File target, String entity) throws IOException, // Create parent directory if it doesn't exist if (!destinationDirectory.exists()) { Path destinationDirectoryPath = destinationDirectory.toPath(); - try { Files.createDirectories(destinationDirectoryPath); @@ -1791,14 +1786,12 @@ protected void move(File source, File target, String entity) throws IOException, protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug("FileHashStore - deleteObjectByCid: called to delete cid: " + cid); - // Get expected path of the cid refs file + // Get expected path of the cid refs file & permanent address of the actual cid Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - // Get permanent address of the actual cid String objRelativePath = FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - // Minimize the amount of time the cid is locked synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cid)) { try { @@ -1860,8 +1853,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - // Both files found, confirm that reference files are where they are expected to be if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Confirm that reference files are where they are expected to be verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); // We throw an exception so the client is aware that everything is in place String errMsg = "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid @@ -1901,7 +1894,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo throw new PidRefsFileExistsException(errMsg); } // Orphaned pid refs file found, the retrieved cid refs file exists - // but doesn't contain the cid. Proceed to overwrite the pid refs file. + // but doesn't contain the pid. Proceed to overwrite the pid refs file. // There is no return statement, so we move out of this if block. } } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { @@ -1937,6 +1930,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo ); } + // TODO: Review to see if this can be more DRY or make use of existing methods /** * Untags a data object in HashStore by deleting the 'pid reference file' and removing the 'pid' * from the 'cid reference file'. This method will never delete a data object. @@ -2171,7 +2165,7 @@ protected void verifyHashStoreRefsFiles( String pid, String cid, Path absPidRefsPath, Path absCidRefsPath ) throws FileNotFoundException, CidNotFoundInPidRefsFileException, PidNotFoundInCidRefsFileException, IOException { - // First confirm that the refs files have been created + // First confirm that the refs files have been created/moved to where they need to be if (!Files.exists(absCidRefsPath)) { String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " + absCidRefsPath + " for pid: " + pid; @@ -2310,29 +2304,6 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) } } - /** - * Deletes a references file at the given path - * - * @param absRefsPath Path to the refs file to delete - * @throws IOException Unable to delete object or open pid refs file - */ - protected void deleteRefsFile(Path absRefsPath) throws IOException { - // Check to see if pid refs file exists - if (!Files.exists(absRefsPath)) { - String errMsg = "FileHashStore.deleteRefsFile - Refs file does not exist at: " - + absRefsPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - - } else { - // Proceed to delete - Files.delete(absRefsPath); - logFileHashStore.debug( - "FileHashStore.deleteRefsFile - Refs file deleted at: " + absRefsPath - ); - } - } - /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the @@ -2352,13 +2323,11 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) "FileHashStore.putMetadata - Called to put metadata for pid: " + pid + " , with metadata namespace: " + formatId ); - // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "putMetadata"); - // Determine metadata namespace // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { @@ -2373,7 +2342,6 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // by using the hash of the 'pid', with the file name being the hash of the 'pid+formatId' Path pathToStoredMetadata = getHashStoreMetadataPath(pid, checkedFormatId); - // Store metadata to tmpMetadataFile File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( "tmp", METADATA_TMP_FILE_DIRECTORY ); @@ -2441,7 +2409,6 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea */ protected InputStream getMetadataDocInputStream(String pid, String formatId) throws NoSuchAlgorithmException, IOException, FileNotFoundException { - // Get permanent address of the pid by calculating its sha-256 hex digest Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); // Check to see if metadata exists @@ -2482,14 +2449,13 @@ protected InputStream getMetadataDocInputStream(String pid, String formatId) */ protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmException, IOException { + // Retrieve the 'cid' from the pid refs file + String objectCid; String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); - // `hashId` here is used to calculate the address of the pid refs file String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId ); Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); - // Attempt to retrieve the cid from the pid refs file - String objectCid; if (!Files.exists(pathToPidRefsFile)) { String errMsg = "FileHashStore.getHashStoreDataObjectPath - Pid Refs file does not exist for pid: " @@ -2518,10 +2484,10 @@ protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmEx */ protected Path getHashStoreMetadataPath(String abpId, String formatId) throws NoSuchAlgorithmException { - // Get the pid metadata directory (the sharded path of the hashId) - String hashId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); + // Get the pid metadata directory + String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashId + DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId ); // The file name for the metadata document is the hash of the supplied 'pid + 'formatId' String metadataDocHash = diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java index eff7c831..267f09b1 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java @@ -550,34 +550,6 @@ public void updateRefsFile_cidRefsEmpty() throws Exception { assertTrue(Files.exists(cidRefsFilePath)); } - /** - * Check that deleteRefsFile deletes file - */ - @Test - public void deleteRefsFile_fileDeleted() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - fileHashStore.deleteRefsFile(pidRefsFilePath); - - assertFalse(Files.exists(pidRefsFilePath)); - } - - /** - * Check that deletePidRefsFile throws exception when there is no file to delete - */ - @Test - public void deletePidRefsFile_missingPidRefsFile() { - String pid = "dou.test.1"; - - assertThrows(FileNotFoundException.class, () -> { - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - fileHashStore.deleteRefsFile(pidRefsFilePath); - }); - } - /** * Check that deleteCidRefsPid deletes pid from its cid refs file */ From 487bbcea17beacdb57836fcadc0ee9e0ee67d7de Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 11:33:18 -0700 Subject: [PATCH 370/553] Resolve linting warning in 'putObject' by replacing .delete() call on tmpFile to using Files.delete() --- .../java/org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 92f46c87..29661753 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1385,7 +1385,7 @@ protected ObjectMetadata putObject( String errMsg = "FileHashStore.putObject - File already exists for pid: " + pid + ". Object address: " + objRealPath + ". Deleting temporary file."; logFileHashStore.warn(errMsg); - tmpFile.delete(); + Files.delete(tmpFilePath); } else { // Move object File permFile = objRealPath.toFile(); From c2b67dee8a11f2de2d7702737efb674b54d3164a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 12:51:32 -0700 Subject: [PATCH 371/553] Clean-up 'deleteObject' and 'unTagObject', refactor to begin with removing pid from cid refs file --- .../filehashstore/FileHashStore.java | 33 +++++-------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 29661753..d07f682d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -930,17 +930,15 @@ public void deleteObject(String pid) try { // Proceed with comprehensive deletion - cid exists, nothing out of place - // Get all the required paths to streamline deletion process - Path objRealPath = getHashStoreDataObjectPath(pid); Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); // Begin deletion process - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); updateRefsFile(pid, absCidRefsPath, "remove"); if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + Path objRealPath = getHashStoreDataObjectPath(pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid @@ -948,12 +946,12 @@ public void deleteObject(String pid) + "deletion."; logFileHashStore.warn(warnMsg); } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath); + "FileHashStore.deleteObject - File and references deleted for: " + pid); } finally { // Release lock @@ -1006,13 +1004,13 @@ public void deleteObject(String pid) } try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); @@ -2004,18 +2002,13 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } try { - // Cid refs file + // Get paths to reference files to work on Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - // Pid refs file Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - // Rename pid refs file to prepare for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Remove pid from cid refs file + // Begin deletion process updateRefsFile(pid, absCidRefsPath, "remove"); - // Delete the cid refs file **only** if the cid refs file is empty if (Files.size(absCidRefsPath) == 0) { - // Rename empty cid refs file to prepare for deletion deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { String warnMsg = @@ -2024,6 +2017,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, + "deletion."; logFileHashStore.warn(warnMsg); } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info( @@ -2043,8 +2037,6 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file - - // Begin by renaming pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items @@ -2057,11 +2049,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when the pid and cid refs file exists, // but the actual object being referenced by the pid does not exist - - // Get the cid from the pid refs file before renaming it for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - // Since we must access the cid reference file, the `cid` must be synchronized synchronized (referenceLockedCids) { while (referenceLockedCids.contains(cidRead)) { @@ -2084,17 +2073,13 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } try { - // Rename pid refs file for deletion - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - // Remove the pid from the cid refs file Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); - // Add the cid reference file to deleteList if it's now empty if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "FileHashStore.unTagObject - Object with cid: " + cidRead From 3b5b7d65e7c8eb1d678e042c74a0796a5ae3cd98 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 12:57:27 -0700 Subject: [PATCH 372/553] Rename method 'getMetadataDocInputStream' to 'getHashStoreMetadataInputStream' --- .../filehashstore/FileHashStore.java | 88 +++++++++---------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d07f682d..36808758 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -846,7 +846,7 @@ public InputStream retrieveMetadata(String pid, String formatId) FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); - return getMetadataDocInputStream(pid, formatId); + return getHashStoreMetadataInputStream(pid, formatId); } /** @@ -863,7 +863,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); - return getMetadataDocInputStream(pid, DEFAULT_METADATA_NAMESPACE); + return getHashStoreMetadataInputStream(pid, DEFAULT_METADATA_NAMESPACE); } @Override @@ -2381,49 +2381,6 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea } } - - /** - * Get an InputStream to a metadata document if it exists in FileHashStore - * - * @param pid Persistent or authority-based identifier - * @param formatId Metadata namespace - * @return InputStream to metadata doc - * @throws NoSuchAlgorithmException An algorithm used in the calculation is not supported - * @throws FileNotFoundException If the metadata document is not found - * @throws IOException If there is an issue returning an input stream - */ - protected InputStream getMetadataDocInputStream(String pid, String formatId) - throws NoSuchAlgorithmException, IOException, FileNotFoundException { - Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); - - // Check to see if metadata exists - if (!Files.exists(metadataCidPath)) { - String errMsg = - "FileHashStore.getMetadataDocInputStream - Metadata does not exist for pid: " + pid - + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } - - // Return an InputStream to read from the metadata document - try { - InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); - logFileHashStore.info( - "FileHashStore.getMetadataDocInputStream - Retrieved metadata for pid: " + pid - + " with formatId: " + formatId); - return metadataCidInputStream; - - } catch (IOException ioe) { - String errMsg = - "FileHashStore.getMetadataDocInputStream - Unexpected error when creating " - + "InputStream for pid: " + pid + " with formatId: " + formatId - + ". IOException: " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - } - - /** * Get the absolute path to a HashStore data object * @@ -2483,6 +2440,47 @@ protected Path getHashStoreMetadataPath(String abpId, String formatId) ); } + /** + * Get an InputStream to a metadata document if it exists in FileHashStore + * + * @param pid Persistent or authority-based identifier + * @param formatId Metadata namespace + * @return InputStream to metadata doc + * @throws NoSuchAlgorithmException An algorithm used in the calculation is not supported + * @throws FileNotFoundException If the metadata document is not found + * @throws IOException If there is an issue returning an input stream + */ + protected InputStream getHashStoreMetadataInputStream(String pid, String formatId) + throws NoSuchAlgorithmException, IOException, FileNotFoundException { + Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); + + // Check to see if metadata exists + if (!Files.exists(metadataCidPath)) { + String errMsg = + "FileHashStore.getMetadataDocInputStream - Metadata does not exist for pid: " + pid + + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } + + // Return an InputStream to read from the metadata document + try { + InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); + logFileHashStore.info( + "FileHashStore.getMetadataDocInputStream - Retrieved metadata for pid: " + pid + + " with formatId: " + formatId); + return metadataCidInputStream; + + } catch (IOException ioe) { + String errMsg = + "FileHashStore.getMetadataDocInputStream - Unexpected error when creating " + + "InputStream for pid: " + pid + " with formatId: " + formatId + + ". IOException: " + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } + /** * Get the absolute path to a HashStore pid or cid ref file * From 83382f9c292a0cba57d6065bfe3b50572c305aff Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 16:02:44 -0700 Subject: [PATCH 373/553] Code clean-up: remove redundant logging texts --- .../filehashstore/FileHashStore.java | 650 +++++++----------- 1 file changed, 248 insertions(+), 402 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 36808758..05df41a8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -124,7 +124,7 @@ enum HashStoreProperties { */ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { - logFileHashStore.info("FileHashStore - Call received to instantiate FileHashStore"); + logFileHashStore.info("Initializing FileHashStore"); FileHashStoreUtility.ensureNotNull( hashstoreProperties, "hashstoreProperties", "FileHashStore - constructor" ); @@ -174,21 +174,17 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); - logFileHashStore.debug("FileHashStore - Created store and store tmp directories."); + logFileHashStore.debug("Created store and store tmp directories."); } catch (IOException ioe) { - logFileHashStore.fatal( - "FileHashStore - Failed to initialize FileHashStore - unable to create" - + " directories. Exception: " + ioe.getMessage() - ); + logFileHashStore.fatal("Failed to initialize FileHashStore - unable to create" + + " directories. Exception: " + ioe.getMessage()); throw ioe; } logFileHashStore.debug( - "FileHashStore - HashStore initialized. Store Depth: " + DIRECTORY_DEPTH - + ". Store Width: " + DIRECTORY_WIDTH + ". Store Algorithm: " - + OBJECT_STORE_ALGORITHM + ". Store Metadata Namespace: " - + DEFAULT_METADATA_NAMESPACE - ); + "HashStore initialized. Store Depth: " + DIRECTORY_DEPTH + ". Store Width: " + + DIRECTORY_WIDTH + ". Store Algorithm: " + OBJECT_STORE_ALGORITHM + + ". Store Metadata Namespace: " + DEFAULT_METADATA_NAMESPACE); // Write configuration file 'hashstore.yaml' to store HashStore properties Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); @@ -198,13 +194,10 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep ); writeHashStoreYaml(hashstoreYamlContent); logFileHashStore.info( - "FileHashStore - 'hashstore.yaml' written to storePath: " + hashstoreYaml - ); + "hashstore.yaml written to storePath: " + hashstoreYaml); } else { - logFileHashStore.info( - "FileHashStore - 'hashstore.yaml' exists and has been verified." - + " Initializing FileHashStore." - ); + logFileHashStore.info("hashstore.yaml exists and has been verified." + + " Initializing FileHashStore."); } } @@ -233,8 +226,8 @@ protected void verifyHashStoreProperties( String storeMetadataNamespace ) throws NoSuchAlgorithmException, IOException, IllegalArgumentException, IllegalStateException { if (storeDepth <= 0 || storeWidth <= 0) { - String errMsg = "FileHashStore - Depth and width must be greater than 0." + " Depth: " - + storeDepth + ". Width: " + storeWidth; + String errMsg = + "Depth and width must be > than 0. Depth: " + storeDepth + ". Width: " + storeWidth; logFileHashStore.fatal(errMsg); throw new IllegalArgumentException(errMsg); } @@ -249,7 +242,7 @@ protected void verifyHashStoreProperties( // Check to see if configuration exists before initializing Path hashstoreYamlPredictedPath = Paths.get(storePath + "/hashstore.yaml"); if (Files.exists(hashstoreYamlPredictedPath)) { - logFileHashStore.debug("FileHashStore - 'hashstore.yaml' found, checking properties."); + logFileHashStore.debug("hashstore.yaml found, checking properties."); HashMap hsProperties = loadHashStoreYaml(storePath); int existingStoreDepth = (int) hsProperties.get(HashStoreProperties.storeDepth.name()); @@ -267,14 +260,12 @@ protected void verifyHashStoreProperties( existingStoreAlgorithm); FileHashStoreUtility.checkObjectEquality( "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs); - logFileHashStore.info("FileHashStore - 'hashstore.yaml' found and HashStore verified"); + logFileHashStore.info("hashstore.yaml found and HashStore verified"); } else { // Check if HashStore related folders exist at the given store path - logFileHashStore.debug( - "FileHashStore - 'hashstore.yaml' not found, checking store path for" - + " `/objects`, `/metadata` and `/refs` directories." - ); + logFileHashStore.debug("hashstore.yaml not found, checking store path for" + + " `/objects`, `/metadata` and `/refs` directories."); if (Files.isDirectory(storePath)) { Path[] conflictingDirectories = { storePath.resolve("objects"), @@ -292,8 +283,7 @@ protected void verifyHashStoreProperties( } } } - logFileHashStore.debug( - "FileHashStore - 'hashstore.yaml' not found. Supplied properties accepted."); + logFileHashStore.debug("hashstore.yaml not found. Supplied properties accepted."); } } @@ -331,9 +321,7 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc } catch (IOException ioe) { logFileHashStore.fatal( - "FileHashStore.getHashStoreYaml() - Unable to retrieve 'hashstore.yaml'." - + " IOException: " + ioe.getMessage() - ); + " Unable to retrieve 'hashstore.yaml'. IOException: " + ioe.getMessage()); throw ioe; } @@ -356,9 +344,7 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { } catch (IOException ioe) { logFileHashStore.fatal( - "FileHashStore.writeHashStoreYaml() - Unable to write 'hashstore.yaml'." - + " IOException: " + ioe.getMessage() - ); + "Unable to write 'hashstore.yaml'. IOException: " + ioe.getMessage()); throw ioe; } } @@ -415,9 +401,7 @@ public ObjectMetadata storeObject( String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException, PidRefsFileExistsException { - logFileHashStore.debug( - "FileHashStore.storeObject - Called to store object for pid: " + pid - ); + logFileHashStore.debug("Storing data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); @@ -456,24 +440,20 @@ private ObjectMetadata syncPutObject( // An object is stored once and only once synchronized (objectLockedIds) { if (objectLockedIds.contains(pid)) { - String errMsg = - "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " - + pid + ". Already in progress."; + String errMsg = "Duplicate object request encountered for pid: " + pid + + ". Already in progress."; logFileHashStore.warn(errMsg); throw new RuntimeException(errMsg); } - logFileHashStore.debug( - "FileHashStore.syncPutObject - Synchronizing objectLockedIds for pid: " + pid - ); + logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); objectLockedIds.add(pid); } try { logFileHashStore.debug( - "FileHashStore.syncPutObject - calling .putObject() to store pid: " + pid - + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum - + ". checksumAlgorithm: " + checksumAlgorithm - ); + "putObject() called to store pid: " + pid + ". additionalAlgorithm: " + + additionalAlgorithm + ". checksum: " + checksum + ". checksumAlgorithm: " + + checksumAlgorithm); // Store object ObjectMetadata objInfo = putObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize @@ -481,46 +461,43 @@ private ObjectMetadata syncPutObject( // Tag object String cid = objInfo.getCid(); tagObject(pid, cid); - - logFileHashStore.info( - "FileHashStore.syncPutObject - Object stored for pid: " + pid - + ". Permanent address: " + getHashStoreDataObjectPath(pid) - ); objInfo.setPid(pid); + logFileHashStore.info( + "Object stored for pid: " + pid + " at " + getHashStoreDataObjectPath(pid)); return objInfo; } catch (NoSuchAlgorithmException nsae) { - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". NoSuchAlgorithmException: " + nsae.getMessage(); + String errMsg = + "Unable to store object for pid: " + pid + ". NoSuchAlgorithmException: " + + nsae.getMessage(); logFileHashStore.error(errMsg); throw nsae; } catch (PidRefsFileExistsException prfee) { - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". PidRefsFileExistsException: " + prfee.getMessage(); + String errMsg = + "Unable to store object for pid: " + pid + ". PidRefsFileExistsException: " + + prfee.getMessage(); logFileHashStore.error(errMsg); throw prfee; } catch (IOException ioe) { // Covers AtomicMoveNotSupportedException, FileNotFoundException - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". IOException: " + ioe.getMessage(); + String errMsg = + "Unable to store object for pid: " + pid + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; } catch (RuntimeException re) { // Covers SecurityException, IllegalArgumentException, NullPointerException - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". Runtime Exception: " + re.getMessage(); + String errMsg = "Unable to store object for pid: " + pid + ". Runtime Exception: " + + re.getMessage(); logFileHashStore.error(errMsg); throw re; } finally { // Release lock synchronized (objectLockedIds) { - logFileHashStore.debug( - "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid - ); + logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); objectLockedIds.remove(pid); objectLockedIds.notify(); } @@ -548,9 +525,7 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce @Override public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, InterruptedException { - logFileHashStore.debug( - "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid - ); + logFileHashStore.debug("Tagging cid (" + cid + ") with pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); @@ -565,16 +540,13 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } catch (InterruptedException ie) { String errMsg = - "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" - + " waiting to tag pid: " + pid + " with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); + "referenceLockedCids lock was interrupted while waiting to tag pid: " + pid + + " with cid: " + cid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedCids for pid: " + pid - ); + logFileHashStore.debug("Synchronizing referenceLockedCids for pid: " + pid); referenceLockedCids.add(cid); } @@ -584,14 +556,12 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } catch (HashStoreRefsAlreadyExistException hsrfae) { // This exception is thrown when the pid and cid are already tagged appropriately String errMsg = - "FileHashStore.tagObject - HashStore refs files already exist for pid " + pid + - " and cid: " + cid; + "HashStore refs files already exist for pid " + pid + " and cid: " + cid; throw new HashStoreRefsAlreadyExistException(errMsg); } catch (PidRefsFileExistsException prfe) { - String errMsg = - "FileHashStore.tagObject - pid: " + pid + " already references another cid." + - " A pid can only reference one cid."; + String errMsg = "pid: " + pid + " already references another cid." + + " A pid can only reference one cid."; throw new PidRefsFileExistsException(errMsg); } catch (Exception e) { @@ -603,8 +573,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.tagObject - Releasing referenceLockedCids for pid: " + pid - + " with cid: " + cid); + "Releasing referenceLockedCids for pid: " + pid + " with cid: " + cid); referenceLockedCids.remove(cid); referenceLockedCids.notify(); } @@ -618,9 +587,7 @@ public void verifyObject( throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, IOException { - logFileHashStore.debug( - "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() - ); + logFileHashStore.debug("Verifying data object for cid: " + objectInfo.getCid()); // Validate input parameters FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); @@ -646,17 +613,15 @@ public void verifyObject( checksumAlgorithm); } catch (IOException ioe) { String errMsg = - "FileHashStore.verifyObject - Unexpected error when calculating a checksum" - + " for cid: " + objCid + " with algorithm (" + checksumAlgorithm - + ") that is not part of the default list. " + ioe.getMessage(); + "Unexpected error when calculating a checksum for cid: " + objCid + + " with algorithm (" + checksumAlgorithm + + ") that is not part of the default list. " + ioe.getMessage(); throw new IOException(errMsg); } } catch (NoSuchAlgorithmException nsae) { - String errMsg = - "FileHashStore.verifyObject - checksumAlgorithm given: " + checksumAlgorithm - + " is not supported. Supported algorithms: " + Arrays.toString( - SUPPORTED_HASH_ALGORITHMS - ); + String errMsg = "checksumAlgorithm given: " + checksumAlgorithm + + " is not supported. Supported algorithms: " + Arrays.toString( + SUPPORTED_HASH_ALGORITHMS); logFileHashStore.error(errMsg); throw new UnsupportedHashAlgorithmException(errMsg); } @@ -666,9 +631,10 @@ public void verifyObject( if (deleteInvalidObject) { deleteObjectByCid(objCid); } - String errMsg = "FileHashStore.verifyObject - Object content invalid for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; + String errMsg = + "Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum + + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + + checksumAlgorithm + ")"; logFileHashStore.error(errMsg); throw new NonMatchingChecksumException(errMsg); } @@ -677,15 +643,16 @@ public void verifyObject( if (deleteInvalidObject) { deleteObjectByCid(objCid); } - String errMsg = "FileHashStore.verifyObject - Object size invalid for cid: " + objCid - + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize; + String errMsg = "Object size invalid for cid: " + objCid + ". Expected size: " + objSize + + ". Actual size: " + objInfoRetrievedSize; logFileHashStore.error(errMsg); throw new NonMatchingObjSizeException(errMsg); } - String infoMsg = "FileHashStore.verifyObject - Object has been validated for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")"; + String infoMsg = + "Object has been validated for cid: " + objCid + ". Expected checksum: " + checksum + + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + + checksumAlgorithm + ")"; logFileHashStore.info(infoMsg); } @@ -693,10 +660,7 @@ public void verifyObject( public String storeMetadata(InputStream metadata, String pid, String formatId) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.storeMetadata - Called to store metadata for pid: " + pid - + ", with formatId: " + formatId - ); + logFileHashStore.debug("Storing metadata for pid: " + pid + ", with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); @@ -733,42 +697,36 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF } catch (InterruptedException ie) { String errMsg = - "FileHashStore.storeMetadata - Metadata lock was interrupted while" - + " storing metadata for: " + pid + " and formatId: " + checkedFormatId - + ". InterruptedException: " + ie.getMessage(); + "Metadata lock was interrupted while storing metadata for: " + pid + + " and formatId: " + checkedFormatId + ". InterruptedException: " + + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid - ); + logFileHashStore.debug("Synchronizing metadataLockedIds for pid: " + pid); metadataLockedIds.add(metadataDocId); } try { logFileHashStore.debug( - "FileHashStore.storeMetadata - .putMetadata() request for pid: " + pid - + ". formatId: " + checkedFormatId - ); + "putMetadata() called to store metadata for pid: " + pid + ", with formatId: " + + checkedFormatId + " for metadata document: " + metadataDocId); // Store metadata String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( - "FileHashStore.storeMetadata - Metadata stored for pid: " + pid + " at: " - + pathToStoredMetadata - ); + "Metadata stored for pid: " + pid + " at: " + pathToStoredMetadata); return pathToStoredMetadata; } catch (IOException ioe) { - String errMsg = "FileHashStore.storeMetadata - Unable to store metadata, IOException" - + " encountered: " + ioe.getMessage(); + String errMsg = + "Unable to store metadata, IOException encountered: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; } catch (NoSuchAlgorithmException nsae) { - String errMsg = - "FileHashStore.storeMetadata - Unable to store metadata, algorithm to calculate" - + " permanent address is not supported: " + nsae.getMessage(); + String errMsg = "Unable to store metadata, algorithm to calculate" + + " permanent address is not supported: " + nsae.getMessage(); logFileHashStore.error(errMsg); throw nsae; @@ -776,9 +734,8 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF // Release lock synchronized (metadataLockedIds) { logFileHashStore.debug( - "FileHashStore.storeMetadata - Releasing metadataLockedIds for pid: " + pid - + " and formatId " + checkedFormatId - ); + "Releasing metadataLockedIds for pid: " + pid + " and formatId " + + checkedFormatId); metadataLockedIds.remove(metadataDocId); metadataLockedIds.notify(); } @@ -798,9 +755,7 @@ public String storeMetadata(InputStream metadata, String pid) throws IOException @Override public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.retrieveObject - Called to retrieve object for pid: " + pid - ); + logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); @@ -808,8 +763,8 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, // Check to see if object exists Path objRealPath = getHashStoreDataObjectPath(pid); if (!Files.exists(objRealPath)) { - String errMsg = "FileHashStore.retrieveObject - File does not exist for pid: " + pid - + " with object address: " + objRealPath; + String errMsg = + "File does not exist for pid: " + pid + " with object address: " + objRealPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } @@ -817,15 +772,13 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, // Return an InputStream to read from the data object try { InputStream objectCidInputStream = Files.newInputStream(objRealPath); - logFileHashStore.info( - "FileHashStore.retrieveObject - Retrieved object for pid: " + pid - ); + logFileHashStore.info("Retrieved object for pid: " + pid); return objectCidInputStream; } catch (IOException ioe) { String errMsg = - "FileHashStore.retrieveObject - Unexpected error when creating InputStream" - + " for pid: " + pid + ", IOException: " + ioe.getMessage(); + "Unexpected error when creating InputStream for pid: " + pid + ", IOException: " + + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -837,9 +790,7 @@ public InputStream retrieveMetadata(String pid, String formatId) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( - "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid - + " with formatId: " + formatId - ); + "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); @@ -856,9 +807,7 @@ public InputStream retrieveMetadata(String pid, String formatId) public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( - "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid - + " with default metadata namespace: " + DEFAULT_METADATA_NAMESPACE - ); + "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); @@ -870,8 +819,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, public void deleteObject(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { - logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete object for pid: " + pid); + logFileHashStore.debug("Deleting object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); FileHashStoreUtility.checkForEmptyString(pid, "id", "deleteObject"); @@ -886,16 +834,15 @@ public void deleteObject(String pid) objectLockedIds.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { - String errMsg = "FileHashStore.deleteObject - Delete request for pid: " + pid - + " has been interrupted."; + String errMsg = "Delete request for pid: " + pid + " has been interrupted."; logFileHashStore.warn(errMsg); throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing objectLockedIds for pid: " + pid); + logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); objectLockedIds.add(pid); } + try { // Before we begin deletion process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with @@ -914,17 +861,15 @@ public void deleteObject(String pid) referenceLockedCids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); + String errMsg = "referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cid); + "Synchronizing referenceLockedCids for pid: " + pid + " with cid: " + cid); referenceLockedCids.add(cid); } @@ -940,25 +885,22 @@ public void deleteObject(String pid) deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { - String warnMsg = - "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object " - + "deletion."; + String warnMsg = "cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; logFileHashStore.warn(warnMsg); } deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid); + logFileHashStore.info("Data file and references deleted for: " + pid); } finally { // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cid); + "Releasing referenceLockedCids for pid: " + pid + " with cid: " + + cid); referenceLockedCids.remove(cid); referenceLockedCids.notify(); } @@ -972,9 +914,8 @@ public void deleteObject(String pid) // Delete items FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); - String warnMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for pid: " + pid - + ". Deleted orphan pid refs file and metadata."; + String warnMsg = "Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); } catch (OrphanRefsFilesException orfe) { @@ -989,17 +930,16 @@ public void deleteObject(String pid) referenceLockedCids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); + String errMsg = "referenceLockedCids lock was " + + "interrupted while waiting to delete objects for pid: " + pid + + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cidRead); + "Synchronizing referenceLockedCids for pid: " + pid + " with cid: " + + cidRead); referenceLockedCids.add(cidRead); } @@ -1014,7 +954,7 @@ public void deleteObject(String pid) // Delete items FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); - String warnMsg = "FileHashStore.deleteObject - Object with cid: " + cidRead + String warnMsg = "Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); @@ -1023,8 +963,8 @@ public void deleteObject(String pid) // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cidRead); + "Releasing referenceLockedCids for pid: " + pid + " with cid: " + + cidRead); referenceLockedCids.remove(cidRead); referenceLockedCids.notify(); } @@ -1036,16 +976,14 @@ public void deleteObject(String pid) deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); - String warnMsg = - "FileHashStore.deleteObject - Pid not found in expected cid refs file for" - + " pid: " + pid + ". Deleted orphan pid refs file and metadata."; + String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + + ". Deleted orphan pid refs file and metadata."; logFileHashStore.warn(warnMsg); } } finally { // Release lock synchronized (objectLockedIds) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing objectLockedIds for pid: " + pid); + logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); objectLockedIds.remove(pid); objectLockedIds.notify(); } @@ -1056,9 +994,7 @@ public void deleteObject(String pid) public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug( - "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid + " with " - + "formatId: " + formatId - ); + "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); @@ -1074,9 +1010,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info( - "FileHashStore.deleteMetadata - Metadata document deleted for: " + pid - + " with metadata address: " + metadataDocId - ); + "Metadata document deleted for: " + pid + " with metadata address: " + metadataDocId); } /** @@ -1085,9 +1019,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx @Override public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Called to delete all metadata for pid: " + pid - ); + logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); @@ -1107,9 +1039,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept } // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info( - "FileHashStore.deleteMetadata - All related metadata deleted for: " + pid - ); + logFileHashStore.info("All metadata documents deleted for: " + pid); } /** @@ -1132,17 +1062,14 @@ protected static void syncRenameMetadataDocForDeletion( metadataLockedIds.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.syncRenameMetadataDocForDeletion - Metadata lock was " - + "interrupted while deleting metadata doc: " + metadataDocId - + " for pid: " + pid + ". InterruptedException: " + ie.getMessage(); + String errMsg = "Metadata lock was interrupted while deleting metadata doc: " + + metadataDocId + " for pid: " + pid + ". InterruptedException: " + + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "FileHashStore.syncRenameMetadataDocForDeletion - Synchronizing metadataLockedIds" - + " for pid: " + pid); + logFileHashStore.debug("Synchronizing metadataLockedIds for pid: " + pid); metadataLockedIds.add(metadataDocId); } @@ -1154,8 +1081,7 @@ protected static void syncRenameMetadataDocForDeletion( // Release lock synchronized (metadataLockedIds) { logFileHashStore.debug( - "FileHashStore.syncRenameMetadataDocForDeletion - Releasing metadataLockedIds" - + " for pid: " + pid + " and doc " + metadataDocId); + "Releasing metadataLockedIds for pid: " + pid + " and doc " + metadataDocId); metadataLockedIds.remove(metadataDocId); metadataLockedIds.notify(); } @@ -1165,9 +1091,7 @@ protected static void syncRenameMetadataDocForDeletion( @Override public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.getHexDigest - Called to calculate hex digest for pid: " + pid - ); + logFileHashStore.debug("Calculating hex digest for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); @@ -1181,8 +1105,8 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // Get permanent address of the pid object Path objRealPath = getHashStoreDataObjectPath(pid); if (!Files.exists(objRealPath)) { - String errMsg = "FileHashStore.getHexDigest - File does not exist for pid: " + pid - + " with object address: " + objRealPath; + String errMsg = + "File does not exist for pid: " + pid + " with object address: " + objRealPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } @@ -1192,8 +1116,8 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE dataStream, algorithm ); logFileHashStore.info( - "FileHashStore.getHexDigest - Hex digest calculated for pid: " + pid - + ", with hex digest value: " + mdObjectHexDigest + "Hex digest calculated for pid: " + pid + ", with hex digest value: " + + mdObjectHexDigest ); return mdObjectHexDigest; } @@ -1222,7 +1146,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE protected Map findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { - logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); + logFileHashStore.debug("Finding object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); @@ -1235,17 +1159,14 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { - String errMsg = - "FileHashStore.findObject - Cid refs file does not exist for cid: " + cid - + " with address: " + absCidRefsPath + ", but pid refs file exists."; + String errMsg = "Cid refs file does not exist for cid: " + cid + " with address: " + + absCidRefsPath + ", but pid refs file exists."; logFileHashStore.error(errMsg); throw new OrphanPidRefsFileException(errMsg); } // If the pid is found in the expected cid refs file, and the object exists, return it if (isStringInRefsFile(pid, absCidRefsPath)) { - logFileHashStore.info( - "FileHashStore.findObject - Cid (" + cid + ") found for pid: " + pid - ); + logFileHashStore.info("cid (" + cid + ") found for pid: " + pid); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid @@ -1268,14 +1189,14 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep return objInfoMap; } else { - String errMsg = "FileHashStore.findObject - Object with cid: " + cid + String errMsg = "Object with cid: " + cid + " does not exist, but pid and cid reference file found for pid: " + pid; logFileHashStore.error(errMsg); throw new OrphanRefsFilesException(errMsg); } } else { - String errMsg = "FileHashStore.findObject - Pid refs file exists, but pid (" + pid + String errMsg = "Pid refs file exists, but pid (" + pid + ") not found in cid refs file for cid: " + cid + " with address: " + absCidRefsPath; logFileHashStore.error(errMsg); @@ -1283,8 +1204,9 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep } } else { - String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid - + ". Pid refs file does not exist at: " + absPidRefsPath; + String errMsg = + "Unable to find cid for pid: " + pid + ". Pid refs file does not exist at: " + + absPidRefsPath; logFileHashStore.error(errMsg); throw new PidRefsFileNotFoundException(errMsg); } @@ -1325,7 +1247,7 @@ protected ObjectMetadata putObject( ) throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, PidRefsFileExistsException, IllegalArgumentException, NullPointerException, AtomicMoveNotSupportedException { - logFileHashStore.debug("FileHashStore.putObject - Called to put object for pid: " + pid); + logFileHashStore.debug("Begin writing data object for pid: " + pid); // If validation is desired, checksumAlgorithm and checksum must both be present boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); // Validate additional algorithm if not null or empty, throws exception if not supported @@ -1340,7 +1262,7 @@ protected ObjectMetadata putObject( } // Generate tmp file and write to it - logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); + logFileHashStore.debug("Generating tmpFile"); File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); Path tmpFilePath = tmpFile.toPath(); Map hexDigests; @@ -1352,13 +1274,12 @@ protected ObjectMetadata putObject( // If the process to write to the tmpFile is interrupted for any reason, // we will delete the tmpFile. boolean deleteStatus = tmpFile.delete(); - String errMsg = - "FileHashStore.putObject - Unexpected Exception while storing object for: " + pid; + String errMsg = "Unexpected Exception while storing object for: " + pid; if (deleteStatus) { errMsg = errMsg + ". Deleting temp file: " + tmpFile + ". Aborting request."; } else { - errMsg = errMsg + ". Failed to delete temp file: " + tmpFile - + ". Aborting request."; + errMsg = + errMsg + ". Failed to delete temp file: " + tmpFile + ". Aborting request."; } logFileHashStore.error(errMsg); throw new IOException(errMsg); @@ -1380,17 +1301,16 @@ protected ObjectMetadata putObject( // Confirm that the object does not yet exist, delete tmpFile if so if (Files.exists(objRealPath)) { - String errMsg = "FileHashStore.putObject - File already exists for pid: " + pid - + ". Object address: " + objRealPath + ". Deleting temporary file."; + String errMsg = + "File already exists for pid: " + pid + ". Object address: " + objRealPath + + ". Deleting temporary file."; logFileHashStore.warn(errMsg); Files.delete(tmpFilePath); } else { // Move object File permFile = objRealPath.toFile(); move(tmpFile, permFile, "object"); - logFileHashStore.debug( - "FileHashStore.putObject - Move object success, permanent address: " + objRealPath - ); + logFileHashStore.debug("Successfully moved data object: " + objRealPath); } return new ObjectMetadata(pid, objectCid, storedObjFileSize, hexDigests); @@ -1422,34 +1342,28 @@ private void validateTmpObject( } catch (Exception ge) { String errMsg = - "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " - + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + ". " - + ge.getMessage(); + "objSize given is not equal to the stored object size. ObjSize: " + objSize + + ". storedObjFileSize: " + storedObjFileSize + + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); throw new NonMatchingObjSizeException(errMsg); } String errMsg = - "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " - + storedObjFileSize + ". Deleting tmpFile: " + tmpFile; + "objSize given is not equal to the stored object size. ObjSize: " + objSize + + ". storedObjFileSize: " + storedObjFileSize + ". Deleting tmpFile: " + + tmpFile; logFileHashStore.error(errMsg); throw new NonMatchingObjSizeException(errMsg); } } if (requestValidation) { - logFileHashStore.info( - "FileHashStore.validateTmpObject - Validating object, checksum arguments" - + " supplied and valid." - ); + logFileHashStore.info("Validating object, checksum arguments supplied and valid."); String digestFromHexDigests = hexDigests.get(checksumAlgorithm); if (digestFromHexDigests == null) { - String errMsg = - "FileHashStore.validateTmpObject - checksum not found in hex digest map" - + " when validating object." + " checksumAlgorithm checked: " - + checksumAlgorithm; + String errMsg = "Checksum not found in hex digest map when validating object. " + + "checksumAlgorithm checked: " + checksumAlgorithm; logFileHashStore.error(errMsg); throw new NoSuchAlgorithmException(errMsg); } @@ -1460,19 +1374,17 @@ private void validateTmpObject( Files.delete(tmpFile); } catch (Exception ge) { - String errMsg = - "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" - + " is not equal to the calculated hex digest: " + digestFromHexDigests - + ". Checksum" + " provided: " + checksum - + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); + String errMsg = "Object cannot be validated. Checksum given is not equal to " + + "the calculated hex digest: " + digestFromHexDigests + + ". Checksum provided: " + checksum + ". Failed to delete tmpFile: " + + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); throw new NonMatchingChecksumException(errMsg); } - String errMsg = - "FileHashStore.validateTmpObject - Checksum given is not equal to the" - + " calculated hex digest: " + digestFromHexDigests + ". Checksum" - + " provided: " + checksum + ". tmpFile has been deleted: " + tmpFile; + String errMsg = "Checksum given is not equal to the calculated hex digest: " + + digestFromHexDigests + ". Checksum" + " provided: " + checksum + + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); throw new NonMatchingChecksumException(errMsg); } @@ -1496,10 +1408,8 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { - String errMsg = "FileHashStore - validateAlgorithm: Algorithm not supported: " - + algorithm + ". Supported algorithms: " + Arrays.toString( - SUPPORTED_HASH_ALGORITHMS - ); + String errMsg = "Algorithm not supported: " + algorithm + ". Supported algorithms: " + + Arrays.toString(SUPPORTED_HASH_ALGORITHMS); logFileHashStore.error(errMsg); throw new NoSuchAlgorithmException(errMsg); } @@ -1619,16 +1529,12 @@ protected Map writeToTmpFileAndGenerateChecksums( MessageDigest checksumAlgo = null; if (generateAddAlgo) { logFileHashStore.debug( - "FileHashStore.writeToTmpFileAndGenerateChecksums - Adding additional algorithm" - + " to hex digest map, algorithm: " + additionalAlgorithm - ); + "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); } if (generateCsAlgo) { logFileHashStore.debug( - "FileHashStore.writeToTmpFileAndGenerateChecksums - Adding checksum algorithm" - + " to hex digest map, algorithm: " + checksumAlgorithm - ); + "Adding checksum algorithm to hex digest map, algorithm: " + checksumAlgorithm); checksumAlgo = MessageDigest.getInstance(checksumAlgorithm); } @@ -1652,9 +1558,7 @@ protected Map writeToTmpFileAndGenerateChecksums( } } catch (IOException ioe) { - String errMsg = - "FileHashStore.writeToTmpFileAndGenerateChecksums - Unexpected Exception ~ " + ioe - .getClass().getName() + ": " + ioe.getMessage(); + String errMsg = "Unexpected Exception ~ " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; @@ -1687,9 +1591,8 @@ protected Map writeToTmpFileAndGenerateChecksums( hexDigests.put(checksumAlgorithm, extraChecksumDigest); } logFileHashStore.debug( - "FileHashStore.writeToTmpFileAndGenerateChecksums - Object has been written to" - + " tmpFile: " + tmpFile.getName() + ". To be moved to: " + sha256Digest - ); + "Object has been written to tmpFile: " + tmpFile.getName() + ". To be moved to: " + + sha256Digest); return hexDigests; } @@ -1711,14 +1614,12 @@ protected Map writeToTmpFileAndGenerateChecksums( protected void move(File source, File target, String entity) throws IOException, SecurityException, AtomicMoveNotSupportedException, FileAlreadyExistsException { logFileHashStore.debug( - "FileHashStore.move - called to move entity type: " + entity + ", from source: " - + source + ", to target: " + target - ); + "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); FileHashStoreUtility.checkForEmptyString(entity, "entity", "move"); if (entity.equals("object") && target.exists()) { - String errMsg = "FileHashStore.move - File already exists for target: " + target; + String errMsg = "File already exists for target: " + target; logFileHashStore.warn(errMsg); return; } @@ -1731,10 +1632,8 @@ protected void move(File source, File target, String entity) throws IOException, Files.createDirectories(destinationDirectoryPath); } catch (FileAlreadyExistsException faee) { - logFileHashStore.warn( - "FileHashStore.move - Directory already exists at: " - + destinationDirectoryPath + " - Skipping directory creation" - ); + logFileHashStore.warn("Directory already exists at: " + destinationDirectoryPath + + " - Skipping directory creation"); } } @@ -1744,29 +1643,22 @@ protected void move(File source, File target, String entity) throws IOException, try { Files.move(sourceFilePath, targetFilePath, StandardCopyOption.ATOMIC_MOVE); logFileHashStore.debug( - "FileHashStore.move - file moved from: " + sourceFilePath + ", to: " - + targetFilePath - ); + "File moved from: " + sourceFilePath + ", to: " + targetFilePath); } catch (FileAlreadyExistsException faee) { logFileHashStore.warn( - "FileHashStore.move - File already exists, skipping request to move object." - + " Source: " + source + ". Target: " + target - ); + "File already exists, skipping request to move object. Source: " + source + + ". Target: " + target); } catch (AtomicMoveNotSupportedException amnse) { - logFileHashStore.error( - "FileHashStore.move - StandardCopyOption.ATOMIC_MOVE failed. AtomicMove is" - + " not supported across file systems. Source: " + source + ". Target: " - + target - ); + logFileHashStore.error("StandardCopyOption.ATOMIC_MOVE failed. AtomicMove is" + + " not supported across file systems. Source: " + source + + ". Target: " + target); throw amnse; } catch (IOException ioe) { logFileHashStore.error( - "FileHashStore.move - Unable to move file. Source: " + source + ". Target: " - + target - ); + "Unable to move file. Source: " + source + ". Target: " + target); throw ioe; } @@ -1783,7 +1675,7 @@ protected void move(File source, File target, String entity) throws IOException, */ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException, InterruptedException { - logFileHashStore.debug("FileHashStore - deleteObjectByCid: called to delete cid: " + cid); + logFileHashStore.debug("Called to delete data object with cid: " + cid); // Get expected path of the cid refs file & permanent address of the actual cid Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); String objRelativePath = @@ -1797,40 +1689,33 @@ protected void deleteObjectByCid(String cid) } catch (InterruptedException ie) { String errMsg = - "FileHashStore.deleteObjectByCid - referenceLockedCids lock was " - + "interrupted while waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); + "referenceLockedCids lock was interrupted while waiting to delete object " + + "with cid: " + cid + ". InterruptedException: " + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "FileHashStore.deleteObjectByCid - Synchronizing referenceLockedCids for cid: " - + cid); + logFileHashStore.debug("Synchronizing referenceLockedCids for cid: " + cid); referenceLockedCids.add(cid); } try { if (Files.exists(absCidRefsPath)) { // The cid refs file exists, so the cid object cannot be deleted. - String warnMsg = "FileHashStore - deleteObjectByCid: cid refs file still contains" - + " references, skipping deletion."; + String warnMsg = "cid refs file still contains references, skipping deletion."; logFileHashStore.warn(warnMsg); } else { // If file exists, delete it. if (Files.exists(expectedRealPath)) { Files.delete(expectedRealPath); } - String debugMsg = - "FileHashStore - deleteObjectByCid: object deleted at" + expectedRealPath; + String debugMsg = "Object deleted at" + expectedRealPath; logFileHashStore.debug(debugMsg); } } finally { // Release lock synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObjectByCid - Releasing referenceLockedCids for cid: " - + cid); + logFileHashStore.debug("Releasing referenceLockedCids for cid: " + cid); referenceLockedCids.remove(cid); referenceLockedCids.notify(); } @@ -1855,8 +1740,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo // Confirm that reference files are where they are expected to be verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); // We throw an exception so the client is aware that everything is in place - String errMsg = "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid - + " already exists and is tagged with pid: " + pid; + String errMsg = + "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; logFileHashStore.error(errMsg); throw new HashStoreRefsAlreadyExistException(errMsg); @@ -1872,8 +1757,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo move(cidRefsTmpFile, absPathCidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "FileHashStore.storeHashStoreRefsFiles - Pid refs file exists for pid: " - + pid + ", but cid refs file for: " + cid + "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid + " is missing. Missing cid refs file created and tagging completed."); return; } else { @@ -1884,10 +1768,9 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath )) { // This pid is accounted for and tagged as expected. - String errMsg = - "FileHashStore.storeHashStoreRefsFiles - pid refs file already exists for" - + " pid: " + pid + ", and the associated cid refs file contains the " - + "pid. A pid can only reference one cid."; + String errMsg = "Pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; logFileHashStore.error(errMsg); throw new PidRefsFileExistsException(errMsg); } @@ -1906,9 +1789,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo move(pidRefsTmpFile, absPathPidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid - + " has been updated and tagged successfully with pid: " + pid - ); + "Object with cid: " + cid + " has been updated and tagged successfully with pid: " + + pid); return; } @@ -1923,9 +1805,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo // Verify tagging process, this throws an exception if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "FileHashStore.storeHashStoreRefsFiles - Object with cid: " + cid - + " has been tagged successfully with pid: " + pid - ); + "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); } // TODO: Review to see if this can be more DRY or make use of existing methods @@ -1958,14 +1838,12 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, objectLockedIds.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { - String errMsg = "FileHashStore.unTagObject - Untag request for pid: " + pid - + " has been interrupted."; + String errMsg = "Untag request for pid: " + pid + " has been interrupted."; logFileHashStore.warn(errMsg); throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "FileHashStore.unTagObject - Synchronizing objectLockedIds for pid: " + pid); + logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); objectLockedIds.add(pid); } @@ -1988,16 +1866,15 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (InterruptedException ie) { String errMsg = - "FileHashStore.unTagObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); + "referenceLockedCids lock was interrupted while waiting to delete" + + " objects for pid: " + pid + ". InterruptedException: " + + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } logFileHashStore.debug( - "FileHashStore.unTagObject - Synchronizing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cid); + "Synchronizing referenceLockedCids for pid: " + pid + " with cid: " + cid); referenceLockedCids.add(cid); } @@ -2011,24 +1888,20 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { - String warnMsg = - "FileHashStore.unTagObject - cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object " - + "deletion."; + String warnMsg = "Cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + "deletion."; logFileHashStore.warn(warnMsg); } deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info( - "FileHashStore.unTagObject - Untagged pid: " + pid + " with cid: " + cid); + logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); } finally { // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.unTagObject - Releasing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cid); + "Releasing referenceLockedCids for pid: " + pid + " with cid: " + cid); referenceLockedCids.remove(cid); referenceLockedCids.notify(); } @@ -2041,9 +1914,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = - "FileHashStore.unTagObject - Cid refs file does not exist for pid: " + pid - + ". Deleted orphan pid refs file."; + String warnMsg = "Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); } catch (OrphanRefsFilesException orfe) { @@ -2059,16 +1931,16 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (InterruptedException ie) { String errMsg = - "FileHashStore.unTagObject - referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); + "referenceLockedCids lock was interrupted while waiting to delete" + + " objects for pid: " + pid + ". InterruptedException: " + + ie.getMessage(); logFileHashStore.error(errMsg); throw new InterruptedException(errMsg); } } logFileHashStore.debug( - "FileHashStore.unTagObject - Synchronizing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cidRead); + "Synchronizing referenceLockedCids for " + "pid: " + pid + " with cid: " + + cidRead); referenceLockedCids.add(cidRead); } @@ -2082,7 +1954,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = "FileHashStore.unTagObject - Object with cid: " + cidRead + String warnMsg = "Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; logFileHashStore.warn(warnMsg); @@ -2091,8 +1963,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // Release lock synchronized (referenceLockedCids) { logFileHashStore.debug( - "FileHashStore.unTagObject - Releasing referenceLockedCids for " - + "pid: " + pid + " with cid: " + cidRead); + "Releasing referenceLockedCids for pid: " + pid + " with cid: " + + cidRead); referenceLockedCids.remove(cidRead); referenceLockedCids.notify(); } @@ -2106,9 +1978,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = - "FileHashStore.unTagObject - Pid not found in expected cid refs file for" - + " pid: " + pid + ". Deleted orphan pid refs file."; + String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found @@ -2117,16 +1988,15 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { updateRefsFile(pid, absCidRefsPath, "remove"); - String errMsg = "FileHashStore.unTagObject - Pid refs file not found, " - + "removed pid found in cid refs file: " + absCidRefsPath; + String errMsg = "Pid refs file not found, removed pid found in cid refs file: " + + absCidRefsPath; logFileHashStore.warn(errMsg); } } } finally { // Release lock on the pid synchronized (objectLockedIds) { - logFileHashStore.debug( - "FileHashStore.unTagObject - Releasing objectLockedIds for pid: " + pid); + logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); objectLockedIds.remove(pid); objectLockedIds.notify(); } @@ -2152,14 +2022,12 @@ protected void verifyHashStoreRefsFiles( PidNotFoundInCidRefsFileException, IOException { // First confirm that the refs files have been created/moved to where they need to be if (!Files.exists(absCidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " - + absCidRefsPath + " for pid: " + pid; + String errMsg = "Cid refs file is missing: " + absCidRefsPath + " for pid: " + pid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } if (!Files.exists(absPidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - pid refs file is missing: " - + absPidRefsPath + " for cid: " + cid; + String errMsg = "Pid refs file is missing: " + absPidRefsPath + " for cid: " + cid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } @@ -2167,22 +2035,21 @@ protected void verifyHashStoreRefsFiles( try { String cidRead = new String(Files.readAllBytes(absPidRefsPath)); if (!cidRead.equals(cid)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " - + cidRead + " found in pid refs file: " + absPidRefsPath + ". Expected cid: " - + cid; + String errMsg = + "Unexpected cid: " + cidRead + " found in pid refs file: " + absPidRefsPath + + ". Expected cid: " + cid; logFileHashStore.error(errMsg); throw new CidNotFoundInPidRefsFileException(errMsg); } if (!isStringInRefsFile(pid, absCidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " - + pid + " in cid refs file: " + absCidRefsPath; + String errMsg = + "Missing expected pid: " + pid + " in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); throw new PidNotFoundInCidRefsFileException(errMsg); } } catch (IOException ioe) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); + logFileHashStore.error(ioe.getMessage()); + throw ioe; } } @@ -2206,14 +2073,12 @@ protected File writeRefsFile(String ref, String refType) throws IOException { writer.write(ref); writer.close(); - logFileHashStore.debug( - "FileHashStore.writeRefsFile - " + refType + " refs file written for: " + ref - ); + logFileHashStore.debug(refType + " refs file written for: " + ref); return cidRefsTmpFile; } catch (IOException ioe) { - String errMsg = "FileHashStore.writeRefsFile - Unable to write refs file for ref: " - + refType + " IOException: " + ioe.getMessage(); + String errMsg = "Unable to write refs file for ref: " + refType + " IOException: " + + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -2266,9 +2131,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); logFileHashStore.debug( - "FileHashStore.updateRefsFile - Ref: " + ref - + " has been added to refs file: " + absRefsPath - ); + "Ref: " + ref + " has been added to refs file: " + absRefsPath); } if (updateType.equals("remove")) { @@ -2276,16 +2139,13 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); logFileHashStore.debug( - "FileHashStore.updateRefsFile - Ref: " + ref - + " has been removed from refs file: " + absRefsPath - ); + "Ref: " + ref + " has been removed from refs file: " + absRefsPath); } } // The lock is automatically released when the try block exits } catch (IOException ioe) { - String errMsg = "FileHashStore.updateRefsFile - " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); + logFileHashStore.error(ioe.getMessage()); + throw ioe; } } @@ -2305,9 +2165,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) protected String putMetadata(InputStream metadata, String pid, String formatId) throws NoSuchAlgorithmException, IOException { logFileHashStore.debug( - "FileHashStore.putMetadata - Called to put metadata for pid: " + pid - + " , with metadata namespace: " + formatId - ); + "Writing metadata for pid: " + pid + " , with metadata namespace: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); @@ -2333,16 +2191,13 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) boolean tmpMetadataWritten = writeToTmpMetadataFile(tmpMetadataFile, metadata); if (tmpMetadataWritten) { logFileHashStore.debug( - "FileHashStore.putMetadata - tmp metadata file has been written, moving to" - + " permanent location: " + pathToStoredMetadata - ); + "Tmp metadata file has been written, moving to" + " permanent location: " + + pathToStoredMetadata); File permMetadataFile = pathToStoredMetadata.toFile(); move(tmpMetadataFile, permMetadataFile, "metadata"); } logFileHashStore.debug( - "FileHashStore.putMetadata - Move metadata success, permanent address: " - + pathToStoredMetadata - ); + "Metadata moved successfully, permanent address: " + pathToStoredMetadata); return pathToStoredMetadata.toString(); } @@ -2369,10 +2224,7 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea return true; } catch (IOException ioe) { - String errMsg = - "FileHashStore.writeToTmpMetadataFile - Unexpected IOException encountered: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); + logFileHashStore.error(ioe.getMessage()); throw ioe; } finally { @@ -2400,9 +2252,8 @@ protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmEx Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); if (!Files.exists(pathToPidRefsFile)) { String errMsg = - "FileHashStore.getHashStoreDataObjectPath - Pid Refs file does not exist for pid: " - + abpId + " with object address: " + pathToPidRefsFile + ". Cannot retrieve " - + "cid."; + "Pid Refs file does not exist for pid: " + abpId + " with object address: " + + pathToPidRefsFile + ". Cannot retrieve " + "cid."; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } else { @@ -2457,8 +2308,8 @@ protected InputStream getHashStoreMetadataInputStream(String pid, String formatI // Check to see if metadata exists if (!Files.exists(metadataCidPath)) { String errMsg = - "FileHashStore.getMetadataDocInputStream - Metadata does not exist for pid: " + pid - + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; + "Metadata does not exist for pid: " + pid + " with formatId: " + formatId + + ". Metadata address: " + metadataCidPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } @@ -2467,15 +2318,12 @@ protected InputStream getHashStoreMetadataInputStream(String pid, String formatI try { InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); logFileHashStore.info( - "FileHashStore.getMetadataDocInputStream - Retrieved metadata for pid: " + pid - + " with formatId: " + formatId); + "Retrieved metadata for pid: " + pid + " with formatId: " + formatId); return metadataCidInputStream; } catch (IOException ioe) { - String errMsg = - "FileHashStore.getMetadataDocInputStream - Unexpected error when creating " - + "InputStream for pid: " + pid + " with formatId: " + formatId - + ". IOException: " + ioe.getMessage(); + String errMsg = "Unexpected error when creating InputStream for pid: " + pid + + " with formatId: " + formatId + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -2504,9 +2352,7 @@ protected Path getHashStoreRefsPath(String abpcId, String refType) ); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } else { - String errMsg = - "FileHashStore.getHashStoreRefsPath - formatId must be 'pid' or 'cid' when entity" - + " is 'refs'"; + String errMsg = "formatId must be 'pid' or 'cid'"; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); } From cebdd2f215d349a3a8f89ca3811046670f91170c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 16:08:55 -0700 Subject: [PATCH 374/553] Accept IDE warning suggestion to weaken specific list types to a Collection --- .../hashstore/filehashstore/FileHashStore.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 05df41a8..c0b2373a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -21,6 +21,7 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -54,9 +55,9 @@ public class FileHashStore implements HashStore { private static final Log logFileHashStore = LogFactory.getLog(FileHashStore.class); private static final int TIME_OUT_MILLISEC = 1000; - private static final ArrayList objectLockedIds = new ArrayList<>(100); - private static final ArrayList metadataLockedIds = new ArrayList<>(100); - private static final ArrayList referenceLockedCids = new ArrayList<>(100); + private static final Collection objectLockedIds = new ArrayList<>(100); + private static final Collection metadataLockedIds = new ArrayList<>(100); + private static final Collection referenceLockedCids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -1054,7 +1055,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept * @throws IOException If there is an issue renaming a document */ protected static void syncRenameMetadataDocForDeletion( - String pid, List deleteList, Path metadataDocAbsPath, String metadataDocId) + String pid, Collection deleteList, Path metadataDocAbsPath, String metadataDocId) throws InterruptedException, IOException { synchronized (metadataLockedIds) { while (metadataLockedIds.contains(metadataDocId)) { @@ -2124,7 +2125,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) try (FileChannel channel = FileChannel.open( absRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE ); FileLock ignored = channel.lock()) { - List lines = new ArrayList<>(Files.readAllLines(absRefsPath)); + Collection lines = new ArrayList<>(Files.readAllLines(absRefsPath)); if (updateType.equals("add")) { lines.add(ref); From c778e2d45eedcb42e87edc2e0a141c8097a2d139 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 16:20:22 -0700 Subject: [PATCH 375/553] Code clean-up: fix ide warnings for 'FileHashStoreUtility' class --- .../hashstore/filehashstore/FileHashStoreUtility.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index f1ef7f7a..356fc120 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -11,6 +11,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.Random; @@ -165,9 +166,9 @@ public static Path renamePathForDeletion(Path pathToRename) throws IOException { * @param deleteList Directory to check * @throws IOException Unexpected I/O error when deleting files */ - public static void deleteListItems(List deleteList) throws IOException { + public static void deleteListItems(Collection deleteList) throws IOException { ensureNotNull(deleteList, "deleteList", "deleteListItems"); - if (deleteList.size() > 0) { + if (!deleteList.isEmpty()) { for (Path deleteItem : deleteList) { if (Files.exists(deleteItem)) { Files.delete(deleteItem); @@ -244,7 +245,7 @@ public static void checkNotNegativeOrZero(long longInt, String method) * @return String */ public static String getHierarchicalPathString(int depth, int width, String digest) { - List tokens = new ArrayList<>(); + Collection tokens = new ArrayList<>(); int digestLength = digest.length(); for (int i = 0; i < depth; i++) { int start = i * width; @@ -256,7 +257,7 @@ public static String getHierarchicalPathString(int depth, int width, String dige tokens.add(digest.substring(depth * width)); } - List stringArray = new ArrayList<>(); + Collection stringArray = new ArrayList<>(); for (String str : tokens) { if (!str.trim().isEmpty()) { stringArray.add(str); From 261ca0093180b29e0d7b4845893ff046a452d73f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 16:20:55 -0700 Subject: [PATCH 376/553] Code clean-up: fix ide warnings for 'ObjectMetadata' class --- src/main/java/org/dataone/hashstore/ObjectMetadata.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 763d4952..8ae7fdf2 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -10,7 +10,7 @@ * underlying file system details. */ public class ObjectMetadata { - private String pid = null; + private String pid; private final String cid; private final long size; private final Map hexDigests; From 75b56c278188c764428a5c7491a3a074293e3032 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 16:21:49 -0700 Subject: [PATCH 377/553] Code clean-up: remove redundant printstacktrace statement in 'HashStoreFactory' --- src/main/java/org/dataone/hashstore/HashStoreFactory.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index a3d9b9ff..40a3344d 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -75,7 +75,6 @@ public static HashStore getHashStore(String classPackage, Properties storeProper String errMsg = "HashStoreFactory - Error instantiating 'FileHashStore'" + "(likely related to `.newInstance()`): " + ie.getCause(); logHashStore.error(errMsg); - ie.printStackTrace(); throw new HashStoreFactoryException(errMsg); } catch (InvocationTargetException ite) { From cc1a43b16a865201d8989948e7a74aa538b87346 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 11 Jul 2024 16:24:17 -0700 Subject: [PATCH 378/553] Code clean-up: address warnings for HashStore interface --- .../java/org/dataone/hashstore/HashStore.java | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 1276e366..1efe4e19 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -4,13 +4,9 @@ import java.io.IOException; import java.io.InputStream; import java.security.NoSuchAlgorithmException; -import java.util.Map; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; -import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; -import org.dataone.hashstore.exceptions.OrphanRefsFilesException; -import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; @@ -71,7 +67,7 @@ public interface HashStore { * illegal arguments (ex. empty pid) or null pointers * @throws InterruptedException When tagging pid and cid process is interrupted */ - public ObjectMetadata storeObject( + ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, @@ -82,7 +78,7 @@ public ObjectMetadata storeObject( * * Store an object only without reference files. */ - public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, + ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; /** @@ -100,7 +96,7 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce * @throws InterruptedException When tagObject is waiting to execute but is * interrupted */ - public void tagObject(String pid, String cid) throws IOException, + void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, InterruptedException; @@ -124,7 +120,7 @@ public void tagObject(String pid, String cid) throws IOException, * @throws IOException Issue with recalculating supported algo for * checksum not found */ - public void verifyObject( + void verifyObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, boolean deleteInvalidObject) throws NonMatchingObjSizeException, NonMatchingChecksumException, @@ -153,7 +149,7 @@ public void verifyObject( * @throws NoSuchAlgorithmException Algorithm used to calculate permanent address is not * supported */ - public String storeMetadata(InputStream metadata, String pid, String formatId) + String storeMetadata(InputStream metadata, String pid, String formatId) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException; @@ -163,7 +159,7 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) * If the '(InputStream metadata, String pid)' signature is used, the metadata format * stored will default to `sysmeta`. */ - public String storeMetadata(InputStream metadata, String pid) throws IOException, + String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, NoSuchAlgorithmException; @@ -178,7 +174,7 @@ public String storeMetadata(InputStream metadata, String pid) throws IOException * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - public InputStream retrieveObject(String pid) throws IllegalArgumentException, + InputStream retrieveObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** @@ -194,7 +190,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not * supported */ - public InputStream retrieveMetadata(String pid, String formatId) + InputStream retrieveMetadata(String pid, String formatId) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; @@ -204,7 +200,7 @@ public InputStream retrieveMetadata(String pid, String formatId) * If `retrieveMetadata` is called with signature (String pid), the metadata * document retrieved will be the given pid's 'sysmeta' */ - public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, + InputStream retrieveMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; /** @@ -220,7 +216,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, * address is not supported * @throws InterruptedException When deletion synchronization is interrupted */ - public void deleteObject(String pid) throws IllegalArgumentException, IOException, + void deleteObject(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; /** @@ -235,7 +231,7 @@ public void deleteObject(String pid) throws IllegalArgumentException, IOExceptio * supported * @throws InterruptedException Issue with synchronization on metadata doc */ - public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, + void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; /** @@ -248,7 +244,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * supported * @throws InterruptedException Issue with synchronization on metadata doc */ - public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, + void deleteMetadata(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; /** @@ -264,6 +260,6 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, + String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; } From 207937a597e3f9d8389fc3e8598a28c27eda62ea Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 08:57:15 -0700 Subject: [PATCH 379/553] Code clean-up: address warnings for HashStoreClient class --- .../dataone/hashstore/HashStoreClient.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index d4b15c5a..fcc6a6f7 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -11,8 +11,8 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Properties; import java.sql.Connection; @@ -34,8 +34,9 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; /** - * HashStoreClient is a command line interface that allows a developer to create a new HashStore or - * interact directly with an existing HashStore. See 'README.md' for usage examples. + * HashStoreClient is a command line interface is a development tool to create a new + * HashStore or interact directly with an existing HashStore through the command line. + * See 'README.md' for usage examples. */ public class HashStoreClient { private static HashStore hashStore; @@ -492,7 +493,7 @@ private static void testWithKnbvm( // For each row, get guid, docid, rev, checksum and checksum_algorithm // and create a List to loop over - List> resultObjList = new ArrayList<>(); + Collection> resultObjList = new ArrayList<>(); while (resultSet.next()) { String guid = resultSet.getString("guid"); String docid = resultSet.getString("docid"); @@ -566,7 +567,7 @@ private static void testWithKnbvm( * @param resultObjList List containing items with the following properties: 'pid', 'path', * 'algorithm', 'checksum' */ - private static void storeObjsWithChecksumFromDb(List> resultObjList) { + private static void storeObjsWithChecksumFromDb(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -623,7 +624,7 @@ private static void storeObjsWithChecksumFromDb(List> result * @param resultObjList List containing items with the following properties: 'pid', 'algorithm', * 'checksum' */ - private static void retrieveAndValidateObjs(List> resultObjList) { + private static void retrieveAndValidateObjs(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -683,7 +684,7 @@ private static void retrieveAndValidateObjs(List> resultObjL * * @param resultObjList List containing items with the following property: 'pid' */ - private static void deleteObjectsFromStore(List> resultObjList) { + private static void deleteObjectsFromStore(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -727,7 +728,7 @@ private static void deleteObjectsFromStore(List> resultObjLi * @param resultObjList List containing items that have the following properties: 'pid', 'path' * and 'namespace' */ - private static void storeMetadataFromDb(List> resultObjList) { + private static void storeMetadataFromDb(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -773,7 +774,7 @@ private static void storeMetadataFromDb(List> resultObjList) * @param resultObjList List containing items with the following properties: 'pid', 'namespace', * 'algorithm', 'checksum' */ - private static void retrieveAndValidateMetadata(List> resultObjList) { + private static void retrieveAndValidateMetadata(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -838,7 +839,7 @@ private static void retrieveAndValidateMetadata(List> result * * @param resultObjList List containing items with the following property: 'pid' */ - private static void deleteMetadataFromStore(List> resultObjList) { + private static void deleteMetadataFromStore(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { From 9010843b92989738ab069d643a8874f3f789c1a9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 08:59:06 -0700 Subject: [PATCH 380/553] Code clean-up: Address additional warnigs from test classes --- .../org/dataone/hashstore/HashStoreClientTest.java | 6 +++--- .../filehashstore/FileHashStoreInterfaceTest.java | 12 ++++++------ .../filehashstore/FileHashStorePublicTest.java | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index a4bee7aa..96f0cb58 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -12,7 +12,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; import java.util.Properties; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; @@ -67,7 +67,7 @@ public void getHashStore() { * @return String */ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String digest) { - List tokens = new ArrayList<>(); + Collection tokens = new ArrayList<>(); int digestLength = digest.length(); for (int i = 0; i < dirDepth; i++) { int start = i * dirWidth; @@ -79,7 +79,7 @@ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String di tokens.add(digest.substring(dirDepth * dirWidth)); } - List stringArray = new ArrayList<>(); + Collection stringArray = new ArrayList<>(); for (String str : tokens) { if (!str.trim().isEmpty()) { stringArray.add(str); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 730ed88f..d5eb9fca 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -20,11 +20,11 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; -import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -703,7 +703,7 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); - HashStoreRunnable + Runnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); executorService.execute(request); } @@ -714,7 +714,7 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { // Check cid refs file that every pid is found String cidSha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cidSha256DigestFromTestData, "cid"); - Set stringSet = new HashSet<>(pidModifiedList); + Collection stringSet = new HashSet<>(pidModifiedList); List lines = Files.readAllLines(cidRefsFilePath); boolean allFoundPidsFound = true; for (String line : lines) { @@ -1966,7 +1966,7 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - List pidModifiedList = new ArrayList<>(); + Collection pidModifiedList = new ArrayList<>(); for (int i = 1; i <= 1000; i++) { pidModifiedList.add(pid + ".dou.test." + i); } @@ -1978,13 +1978,13 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Store 1000 for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); - HashStoreRunnable + Runnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); executorService.execute(request); } // Delete 1000 for (String pidAdjusted : pidModifiedList) { - HashStoreRunnable + Runnable request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); executorService.execute(request); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java index 1c327be1..de9e6909 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java @@ -14,6 +14,7 @@ import java.util.HashMap; import java.util.Properties; +import org.dataone.hashstore.HashStore; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -421,7 +422,7 @@ public void testExistingHashStoreConfiguration_missingYaml() { "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); - FileHashStore secondHashStore = new FileHashStore(storeProperties); + HashStore secondHashStore = new FileHashStore(storeProperties); // Confirm config present Path newStoreHashStoreYaml = newStoreDirectory.resolve("hashstore.yaml"); From 67bd37c123f3123211d5c4f0a6bf571969e484e2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 09:05:35 -0700 Subject: [PATCH 381/553] Accept ide warning to Weaken type from List to Collection in 'FileHashStore' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c0b2373a..be7484cf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -824,7 +824,7 @@ public void deleteObject(String pid) // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); FileHashStoreUtility.checkForEmptyString(pid, "id", "deleteObject"); - List deleteList = new ArrayList<>(); + Collection deleteList = new ArrayList<>(); // Storing, deleting and untagging objects are synchronized together // Duplicate store object requests for a pid are rejected, but deleting an object @@ -1002,7 +1002,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); - List deleteList = new ArrayList<>(); + Collection deleteList = new ArrayList<>(); // Get the path to the metadata document and metadata document name/id String metadataDocId = FileHashStoreUtility.getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); @@ -1024,7 +1024,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); - List deleteList = new ArrayList<>(); + Collection deleteList = new ArrayList<>(); // Get the path to the pid metadata document directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -1828,7 +1828,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, FileHashStoreUtility.ensureNotNull(cid, "cid", "unTagObject"); FileHashStoreUtility.checkForEmptyString(cid, "cid", "unTagObject"); - List deleteList = new ArrayList<>(); + Collection deleteList = new ArrayList<>(); // Storing, deleting and untagging objects are synchronized together // Duplicate store object requests for a pid are rejected, but deleting an object From e6e10180153f1c9dd209bca1fcbebc64b4fbf91e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 09:52:51 -0700 Subject: [PATCH 382/553] Refactor public api methods in 'FileHashStore' by extracting synchronization code --- .../filehashstore/FileHashStore.java | 393 +++++++----------- 1 file changed, 142 insertions(+), 251 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index be7484cf..15358606 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -497,11 +497,7 @@ private ObjectMetadata syncPutObject( } finally { // Release lock - synchronized (objectLockedIds) { - logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); - objectLockedIds.remove(pid); - objectLockedIds.notify(); - } + releaseObjectLockedIds(pid); } } @@ -534,22 +530,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); // tagObject is synchronized with deleteObject based on a `cid` - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "referenceLockedCids lock was interrupted while waiting to tag pid: " + pid - + " with cid: " + cid + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug("Synchronizing referenceLockedCids for pid: " + pid); - referenceLockedCids.add(cid); - } + synchronizeReferencedLockedCids(cid); try { storeHashStoreRefsFiles(pid, cid); @@ -572,12 +553,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } finally { // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "Releasing referenceLockedCids for pid: " + pid + " with cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } + releaseReferencedLockedCids(cid); } } @@ -685,29 +661,10 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) private String syncPutMetadata(InputStream metadata, String pid, String checkedFormatId) throws InterruptedException, IOException, NoSuchAlgorithmException { // Get the metadata document id, which is the synchronization value - // Metadata storage requests for the same pid must be written serially - // However, the same pid could be used with different formatIds, so - // synchronize ids with pid + formatId; String pidFormatId = pid + checkedFormatId; String metadataDocId = FileHashStoreUtility.getPidHexDigest(pidFormatId, OBJECT_STORE_ALGORITHM); - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(metadataDocId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "Metadata lock was interrupted while storing metadata for: " + pid - + " and formatId: " + checkedFormatId + ". InterruptedException: " - + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug("Synchronizing metadataLockedIds for pid: " + pid); - metadataLockedIds.add(metadataDocId); - } + synchronizeMetadataLockedIds(metadataDocId); try { logFileHashStore.debug( @@ -732,14 +689,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF throw nsae; } finally { - // Release lock - synchronized (metadataLockedIds) { - logFileHashStore.debug( - "Releasing metadataLockedIds for pid: " + pid + " and formatId " - + checkedFormatId); - metadataLockedIds.remove(metadataDocId); - metadataLockedIds.notify(); - } + releaseMetadataLockedIds(metadataDocId); } } @@ -829,20 +779,7 @@ public void deleteObject(String pid) // Storing, deleting and untagging objects are synchronized together // Duplicate store object requests for a pid are rejected, but deleting an object // will wait for a pid to be released if it's found to be in use before proceeding. - synchronized (objectLockedIds) { - while (objectLockedIds.contains(pid)) { - try { - objectLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = "Delete request for pid: " + pid + " has been interrupted."; - logFileHashStore.warn(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); - objectLockedIds.add(pid); - } + synchronizeObjectLockedIds(pid); try { // Before we begin deletion process, we look for the `cid` by calling @@ -853,26 +790,7 @@ public void deleteObject(String pid) String cid = objInfoMap.get("cid"); // If no exceptions are thrown, we proceed to synchronization based on the `cid` - // Multiple threads may access the cid reference file (which contains a list of - // `pid`s that reference a `cid`) and this needs to be coordinated. Otherwise, - // we will run into a `OverlappingFileLockException` - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = "referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "Synchronizing referenceLockedCids for pid: " + pid + " with cid: " + cid); - referenceLockedCids.add(cid); - } + synchronizeReferencedLockedCids(cid); try { // Proceed with comprehensive deletion - cid exists, nothing out of place @@ -898,13 +816,7 @@ public void deleteObject(String pid) } finally { // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "Releasing referenceLockedCids for pid: " + pid + " with cid: " - + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } + releaseReferencedLockedCids(cid); } } catch (OrphanPidRefsFileException oprfe) { @@ -925,24 +837,7 @@ public void deleteObject(String pid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); // Since we must access the cid reference file, the `cid` must be synchronized - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cidRead)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = "referenceLockedCids lock was " - + "interrupted while waiting to delete objects for pid: " + pid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "Synchronizing referenceLockedCids for pid: " + pid + " with cid: " - + cidRead); - referenceLockedCids.add(cidRead); - } + synchronizeReferencedLockedCids(cidRead); try { Path absCidRefsPath = @@ -962,13 +857,7 @@ public void deleteObject(String pid) } finally { // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "Releasing referenceLockedCids for pid: " + pid + " with cid: " - + cidRead); - referenceLockedCids.remove(cidRead); - referenceLockedCids.notify(); - } + releaseReferencedLockedCids(cidRead); } } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -983,11 +872,7 @@ public void deleteObject(String pid) } } finally { // Release lock - synchronized (objectLockedIds) { - logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); - objectLockedIds.remove(pid); - objectLockedIds.notify(); - } + releaseObjectLockedIds(pid); } } @@ -1057,35 +942,14 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept protected static void syncRenameMetadataDocForDeletion( String pid, Collection deleteList, Path metadataDocAbsPath, String metadataDocId) throws InterruptedException, IOException { - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(metadataDocId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = "Metadata lock was interrupted while deleting metadata doc: " - + metadataDocId + " for pid: " + pid + ". InterruptedException: " - + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug("Synchronizing metadataLockedIds for pid: " + pid); - metadataLockedIds.add(metadataDocId); - } + synchronizeMetadataLockedIds(metadataDocId); try { if (Files.exists(metadataDocAbsPath)) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDocAbsPath)); } } finally { - // Release lock - synchronized (metadataLockedIds) { - logFileHashStore.debug( - "Releasing metadataLockedIds for pid: " + pid + " and doc " + metadataDocId); - metadataLockedIds.remove(metadataDocId); - metadataLockedIds.notify(); - } + releaseMetadataLockedIds(metadataDocId); } } @@ -1683,22 +1547,7 @@ protected void deleteObjectByCid(String cid) FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "referenceLockedCids lock was interrupted while waiting to delete object " - + "with cid: " + cid + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug("Synchronizing referenceLockedCids for cid: " + cid); - referenceLockedCids.add(cid); - } + synchronizeReferencedLockedCids(cid); try { if (Files.exists(absCidRefsPath)) { @@ -1715,11 +1564,7 @@ protected void deleteObjectByCid(String cid) } } finally { // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug("Releasing referenceLockedCids for cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } + releaseReferencedLockedCids(cid); } } @@ -1777,7 +1622,6 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo } // Orphaned pid refs file found, the retrieved cid refs file exists // but doesn't contain the pid. Proceed to overwrite the pid refs file. - // There is no return statement, so we move out of this if block. } } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file @@ -1829,24 +1673,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, FileHashStoreUtility.checkForEmptyString(cid, "cid", "unTagObject"); Collection deleteList = new ArrayList<>(); - - // Storing, deleting and untagging objects are synchronized together - // Duplicate store object requests for a pid are rejected, but deleting an object - // will wait for a pid to be released if it's found to be in use before proceeding. - synchronized (objectLockedIds) { - while (objectLockedIds.contains(pid)) { - try { - objectLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = "Untag request for pid: " + pid + " has been interrupted."; - logFileHashStore.warn(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); - objectLockedIds.add(pid); - } + synchronizeObjectLockedIds(pid); try { // Before we begin untagging process, we look for the `cid` by calling @@ -1855,29 +1682,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, try { Map objInfoMap = findObject(pid); cid = objInfoMap.get("cid"); - // If no exceptions are thrown, we proceed to synchronization based on the `cid` - // Multiple threads may access the cid reference file (which contains a list of - // `pid`s that reference a `cid`) and this needs to be coordinated. Otherwise, - // we will run into a `OverlappingFileLockException` - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "referenceLockedCids lock was interrupted while waiting to delete" - + " objects for pid: " + pid + ". InterruptedException: " - + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "Synchronizing referenceLockedCids for pid: " + pid + " with cid: " + cid); - referenceLockedCids.add(cid); - } + synchronizeReferencedLockedCids(cid); try { // Get paths to reference files to work on @@ -1899,13 +1705,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "Releasing referenceLockedCids for pid: " + pid + " with cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); - } + releaseReferencedLockedCids(cid); } } catch (OrphanPidRefsFileException oprfe) { @@ -1925,25 +1725,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); // Since we must access the cid reference file, the `cid` must be synchronized - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cidRead)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "referenceLockedCids lock was interrupted while waiting to delete" - + " objects for pid: " + pid + ". InterruptedException: " - + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "Synchronizing referenceLockedCids for " + "pid: " + pid + " with cid: " - + cidRead); - referenceLockedCids.add(cidRead); - } + synchronizeReferencedLockedCids(cidRead); try { Path absCidRefsPath = @@ -1961,14 +1743,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, logFileHashStore.warn(warnMsg); } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "Releasing referenceLockedCids for pid: " + pid + " with cid: " - + cidRead); - referenceLockedCids.remove(cidRead); - referenceLockedCids.notify(); - } + releaseReferencedLockedCids(cidRead); } } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -1995,12 +1770,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } } } finally { - // Release lock on the pid - synchronized (objectLockedIds) { - logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); - objectLockedIds.remove(pid); - objectLockedIds.notify(); - } + releaseObjectLockedIds(pid); } } @@ -2359,4 +2129,125 @@ protected Path getHashStoreRefsPath(String abpcId, String refType) } return realPath; } + + /** + * Storing, deleting and untagging objects are synchronized together. Duplicate store object + * requests for a pid are rejected, but deleting an object will wait for a pid to be released + * if it's found to be in use before proceeding. + * + * @param pid Persistent or authority-based identifier + * @throws InterruptedException When an issue occurs when attempting to sync the pid + */ + private static void synchronizeObjectLockedIds(String pid) + throws InterruptedException { + synchronized (objectLockedIds) { + while (objectLockedIds.contains(pid)) { + try { + objectLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync pid: " + pid; + logFileHashStore.warn(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); + objectLockedIds.add(pid); + } + } + + /** + * Remove the given pid from 'objectLockedIds' and notify other threads + * + * @param pid Content identifier + */ + private static void releaseObjectLockedIds(String pid) { + synchronized (objectLockedIds) { + logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); + objectLockedIds.remove(pid); + objectLockedIds.notify(); + } + } + + /** + * All requests to store/delete metadata will be accepted but must be executed serially + * + * @param metadataDocId Metadata document id hash(pid+formatId) + * @throws InterruptedException When an issue occurs when attempting to sync the metadata doc + */ + private static void synchronizeMetadataLockedIds(String metadataDocId) + throws InterruptedException { + synchronized (metadataLockedIds) { + while (metadataLockedIds.contains(metadataDocId)) { + try { + metadataLockedIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync metadata doc: " + + metadataDocId; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "Synchronizing metadataLockedIds for metadata doc: " + metadataDocId); + metadataLockedIds.add(metadataDocId); + } + } + + /** + * Remove the given metadata doc from 'metadataLockedIds' and notify other threads + * + * @param metadataDocId Metadata document id hash(pid+formatId) + */ + private static void releaseMetadataLockedIds(String metadataDocId) { + synchronized (metadataLockedIds) { + logFileHashStore.debug( + "Releasing metadataLockedIds for metadata doc: " + metadataDocId); + metadataLockedIds.remove(metadataDocId); + metadataLockedIds.notify(); + } + } + + /** + * Multiple threads may access the cid reference file (which contains a list of `pid`s that + * reference a `cid`) and this needs to be coordinated. Otherwise, we will run into a + * `OverlappingFileLockException` + * + * @param cid Content identifier + * @throws InterruptedException When an issue occurs when attempting to sync the pid + */ + private static void synchronizeReferencedLockedCids(String cid) throws InterruptedException { + synchronized (referenceLockedCids) { + while (referenceLockedCids.contains(cid)) { + try { + referenceLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync cid: " + cid; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "Synchronizing referenceLockedCids for cid: " + cid); + referenceLockedCids.add(cid); + } + } + + /** + * Remove the given cid from 'referenceLockedCids' and notify other threads + * + * @param cid Content identifier + */ + private static void releaseReferencedLockedCids(String cid) { + synchronized (referenceLockedCids) { + logFileHashStore.debug("Releasing referenceLockedCids for cid: " + cid); + referenceLockedCids.remove(cid); + referenceLockedCids.notify(); + } + } } From 3ccc3eb586b0b9dd83c98e59d2c655ba621918c3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 10:52:02 -0700 Subject: [PATCH 383/553] Remove unused parameter from 'syncRenameMetadataDocForDeletion' and fix affected code --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 15358606..c0d7e6fd 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -892,7 +892,7 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx String metadataDocId = FileHashStoreUtility.getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); - syncRenameMetadataDocForDeletion(pid, deleteList, metadataDocPath, metadataDocId); + syncRenameMetadataDocForDeletion(deleteList, metadataDocPath, metadataDocId); // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info( @@ -921,7 +921,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); for (Path metadataDoc : metadataDocPaths) { String metadataDocId = metadataDoc.getFileName().toString(); - syncRenameMetadataDocForDeletion(pid, deleteList, metadataDoc, metadataDocId); + syncRenameMetadataDocForDeletion(deleteList, metadataDoc, metadataDocId); } // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); @@ -929,21 +929,17 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept } /** - * Synchronize deleting a metadata doc for deletion by renaming it and adding it to the supplied - * List. + * Synchronize deleting a metadata doc by renaming it and adding it to the supplied List. * - * @param pid Persistent or authority-based identifier * @param deleteList List to add the renamed metadata document * @param metadataDocAbsPath Absolute path to the metadata document * @param metadataDocId Metadata document name * @throws InterruptedException When an issue with synchronization occurs * @throws IOException If there is an issue renaming a document */ - protected static void syncRenameMetadataDocForDeletion( - String pid, Collection deleteList, Path metadataDocAbsPath, String metadataDocId) + protected static void syncRenameMetadataDocForDeletion(Collection deleteList, Path metadataDocAbsPath, String metadataDocId) throws InterruptedException, IOException { synchronizeMetadataLockedIds(metadataDocId); - try { if (Files.exists(metadataDocAbsPath)) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDocAbsPath)); From f92cccae189ed0e7ebee0d8bafa651bbb58c16bf Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 11:09:40 -0700 Subject: [PATCH 384/553] Deactivate known intermittently failing test which issue has been created for --- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index d5eb9fca..db513af5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -33,6 +33,7 @@ import javax.xml.bind.DatatypeConverter; +import jdk.nashorn.internal.ir.annotations.Ignore; import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; @@ -1959,8 +1960,9 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { /** * Test deleteObject synchronization using a Runnable class + * TODO: Reactivate with @Test once bug has been investigated */ - @Test + @Ignore public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Get single test file to "upload" String pid = "jtao.1700.1"; From c2098d32ea9b5bf43705d38e38cf7332a3dc552a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 11:33:14 -0700 Subject: [PATCH 385/553] Update storeObject HashStoreRunnable junit test --- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index db513af5..5947181c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -700,7 +700,9 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { pidModifiedList.add(pid + ".dou.test." + i); } - ExecutorService executorService = Executors.newFixedThreadPool(5); + Runtime runtime = Runtime.getRuntime(); + int numCores = runtime.availableProcessors(); + ExecutorService executorService = Executors.newFixedThreadPool(numCores); for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); From b0c541439cd9e6496f08ec99962f4c1d0bcae3b5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 12:39:28 -0700 Subject: [PATCH 386/553] Fix warning for 'verifyObject' with boolean value in signature by removing a data object by default if it is determined to be invalid and the right conditions are met --- .../java/org/dataone/hashstore/HashStore.java | 10 ++++----- .../filehashstore/FileHashStore.java | 11 +++------- .../FileHashStoreInterfaceTest.java | 22 ++++++++----------- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 1efe4e19..3694b79e 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -101,15 +101,14 @@ void tagObject(String pid, String cid) throws IOException, InterruptedException; /** - * Confirms that an ObjectMetadata's content is equal to the given values. If it is not - * equal, it will return False - otherwise True. + * Confirms that an ObjectMetadata's content is equal to the given values. This method + * throws an exception if there are any issues, and attempts to remove the data object + * if it is determined to be invalid. * * @param objectInfo ObjectMetadata object with values * @param checksum Value of checksum to validate against * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing - * @param deleteInvalidObject If true, HashStore will attempt to remove the data object - * given to verify * @throws NonMatchingObjSizeException Given size =/= objMeta size value * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value * @throws UnsupportedHashAlgorithmException Given algo is not found or supported @@ -121,8 +120,7 @@ void tagObject(String pid, String cid) throws IOException, * checksum not found */ void verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, - boolean deleteInvalidObject) + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, IOException; diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c0d7e6fd..dad2d6a0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -559,8 +559,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi @Override public void verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize, - boolean deleteInvalidObject) + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, IOException { @@ -605,9 +604,7 @@ public void verifyObject( } // Validate checksum if (!digestFromHexDigests.equals(checksum)) { - if (deleteInvalidObject) { - deleteObjectByCid(objCid); - } + deleteObjectByCid(objCid); String errMsg = "Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " @@ -617,9 +614,7 @@ public void verifyObject( } // Validate size if (objInfoRetrievedSize != objSize) { - if (deleteInvalidObject) { - deleteObjectByCid(objCid); - } + deleteObjectByCid(objCid); String errMsg = "Object size invalid for cid: " + objCid + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize; logFileHashStore.error(errMsg); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 5947181c..252bd4ed 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -838,8 +838,7 @@ public void verifyObject_correctValues() throws Exception { long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true - ); + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -871,9 +870,7 @@ public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("md2"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.verifyObject( - objInfo, expectedChecksum, "MD2", expectedSize, true - ); + fileHashStore.verifyObject(objInfo, expectedChecksum, "MD2", expectedSize); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -903,8 +900,7 @@ public void verifyObject_unsupportedAlgo() throws Exception { assertThrows( UnsupportedHashAlgorithmException.class, - () -> fileHashStore.verifyObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000, - false)); + () -> fileHashStore.verifyObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -940,7 +936,7 @@ public void verifyObject_mismatchedSize() throws Exception { assertThrows( NonMatchingObjSizeException.class, () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize, false)); + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -949,7 +945,7 @@ public void verifyObject_mismatchedSize() throws Exception { storeDepth, storeWidth, objInfo.getCid() ); // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( "objects").resolve(objRelativePath))); } } @@ -976,7 +972,7 @@ public void verifyObject_mismatchedChecksum() throws Exception { assertThrows( NonMatchingChecksumException.class, () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize, false)); + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -985,7 +981,7 @@ public void verifyObject_mismatchedChecksum() throws Exception { storeDepth, storeWidth, objInfo.getCid() ); // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( "objects").resolve(objRelativePath))); } } @@ -1012,7 +1008,7 @@ public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Except assertThrows( NonMatchingObjSizeException.class, () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize, true)); + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -1047,7 +1043,7 @@ public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Ex long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize, true + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize )); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); From d2f73b457673b0c104c445117a91c7cb0e2ca998 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 13:03:51 -0700 Subject: [PATCH 387/553] Remove todo item and add new junit tests for 'getHashStoreMetadataInputStream' --- .../filehashstore/FileHashStore.java | 1 - .../FileHashStoreProtectedTest.java | 46 +++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index dad2d6a0..42f522af 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1644,7 +1644,6 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); } - // TODO: Review to see if this can be more DRY or make use of existing methods /** * Untags a data object in HashStore by deleting the 'pid reference file' and removing the 'pid' * from the 'cid reference file'. This method will never delete a data object. diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index f98b3b99..2acb1331 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -836,7 +837,7 @@ public void testMove() throws Exception { * Confirm that exceptions are not thrown when move is called on an object that already exists */ @Test - public void testMove_targetExists() throws Exception { + public void move_targetExists() throws Exception { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; File targetFile = new File(targetString); @@ -850,7 +851,7 @@ public void testMove_targetExists() throws Exception { * Confirm that NullPointerException is thrown when entity is null */ @Test - public void testMove_entityNull() { + public void move_entityNull() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -863,7 +864,7 @@ public void testMove_entityNull() { * Confirm that FileAlreadyExistsException is thrown entity is empty */ @Test - public void testMove_entityEmpty() { + public void move_entityEmpty() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -876,7 +877,7 @@ public void testMove_entityEmpty() { * Confirm that FileAlreadyExistsException is thrown when entity is empty spaces */ @Test - public void testMove_entityEmptySpaces() { + public void move_entityEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -1239,6 +1240,43 @@ public void getHashStoreMetadataPath() throws Exception { } } + /** + * Check that getHashStoreMetadataInputStream returns an InputStream + */ + @Test + public void getHashStoreMetadataInputStream() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + fileHashStore.storeMetadata(metadataStream, pid, null); + metadataStream.close(); + + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + + InputStream metadataCidInputStream = fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId); + assertNotNull(metadataCidInputStream); + } + } + + /** + * Check that getHashStoreMetadataInputStream throws FileNotFoundException when there is no + * metadata to retrieve + */ + @Test + public void getHashStoreMetadataInputStream_fileNotFound() { + for (String pid : testData.pidList) { + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + + assertThrows( + FileNotFoundException.class, + () -> fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId)); + } + } + /** * Confirm getHashStoreRefsPath returns correct pid refs path */ From 8cd0d3448bacc06d80e69d4a3204f67a7eb44624 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 13:14:27 -0700 Subject: [PATCH 388/553] Add missing junit tests for 'verifyChecksumParameters' --- .../FileHashStoreProtectedTest.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 2acb1331..5376db5e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -656,6 +656,45 @@ public void putObject_emptyAlgorithm() { }); } + /** + * Confirm verifyChecksumParameters returns true with good values + */ + @Test + public void verifyChecksumParameters() throws Exception { + boolean shouldValidate = fileHashStore.verifyChecksumParameters("abc123","SHA-256"); + assertTrue(shouldValidate); + } + + /** + * Confirm verifyChecksumParameters throws exception when checksum value is empty + */ + @Test + public void verifyChecksumParameters_emptyChecksum() { + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.verifyChecksumParameters(" ","SHA-256"); + }); + } + + /** + * Confirm verifyChecksumParameters throws exception when checksum algorithm is empty + */ + @Test + public void verifyChecksumParameters_emptyAlgorithm() { + assertThrows(IllegalArgumentException.class, () -> { + fileHashStore.verifyChecksumParameters("abc123"," "); + }); + } + + /** + * Confirm verifyChecksumParameters throws exception when checksum algorithm is not supported + */ + @Test + public void verifyChecksumParameters_unsupportedAlgorithm() { + assertThrows(NoSuchAlgorithmException.class, () -> { + fileHashStore.verifyChecksumParameters("abc123","SHA-DOU"); + }); + } + /** * Check default checksums are generated */ From f649697d283c0c68ca41121043bdcb2418aece9a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 13:18:34 -0700 Subject: [PATCH 389/553] Add missing junit tests for 'shouldCalculateAlgorithm' --- .../hashstore/filehashstore/FileHashStore.java | 6 ++++-- .../FileHashStoreProtectedTest.java | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 42f522af..912c595a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1259,7 +1259,7 @@ private void validateTmpObject( */ protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "putObject"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "validateAlgorithm"); FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "validateAlgorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); @@ -1280,7 +1280,9 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio * @param algorithm Algorithm to check * @return Boolean */ - private boolean shouldCalculateAlgorithm(String algorithm) { + protected boolean shouldCalculateAlgorithm(String algorithm) { + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "shouldCalculateAlgorithm"); + FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "shouldCalculateAlgorithm"); boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { if (algorithm.equals(defAlgo.getName())) { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 5376db5e..568dbd52 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -155,6 +155,24 @@ public void isValidAlgorithm_algorithmNull() { }); } + /** + * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list + */ + @Test + public void shouldCalculateAlgorithm_algoIncluded() { + boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-256"); + assertFalse(shouldCalculate); + } + + /** + * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list + */ + @Test + public void shouldCalculateAlgorithm_algoNotIncluded() { + boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-DOU"); + assertTrue(shouldCalculate); + } + /** * Confirm that a temporary file has been generated. */ From 3d00b4b9becd61b599e0fb4efc9a2293a01da869 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 13:38:20 -0700 Subject: [PATCH 390/553] Delete 'FileHashStoreReferencesTest' class, move and organize junit tests to 'FileHashStoreProtected', fix incorrect import in 'FileHashStoreInterface' class and use correct 'Disabled' decorator --- .../FileHashStoreInterfaceTest.java | 4 +- .../FileHashStoreProtectedTest.java | 900 ++++++++++++++---- .../FileHashStoreReferencesTest.java | 589 ------------ 3 files changed, 705 insertions(+), 788 deletions(-) delete mode 100644 src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 252bd4ed..0cb89980 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -33,7 +33,6 @@ import javax.xml.bind.DatatypeConverter; -import jdk.nashorn.internal.ir.annotations.Ignore; import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; @@ -43,6 +42,7 @@ import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -1960,7 +1960,7 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { * Test deleteObject synchronization using a Runnable class * TODO: Reactivate with @Test once bug has been investigated */ - @Ignore + @Disabled public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Get single test file to "upload" String pid = "jtao.1700.1"; diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 568dbd52..4926e475 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -16,6 +16,7 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Properties; @@ -23,9 +24,12 @@ import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; @@ -84,116 +88,6 @@ public File generateTemporaryFile() throws Exception { @TempDir public Path tempFolder; - /** - * Check algorithm support for supported algorithm - */ - @Test - public void isValidAlgorithm_supported() { - try { - String md2 = "MD2"; - boolean supported = fileHashStore.validateAlgorithm(md2); - assertTrue(supported); - - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); - - } - } - - /** - * Check algorithm support for unsupported algorithm - */ - @Test - public void isValidAlgorithm_notSupported() { - assertThrows(NoSuchAlgorithmException.class, () -> { - try { - String sm3 = "SM3"; - boolean not_supported = fileHashStore.validateAlgorithm(sm3); - assertFalse(not_supported); - - } catch (NoSuchAlgorithmException nsae) { - throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage()); - - } - }); - } - - /** - * Check algorithm support for unsupported algorithm with lower cases - */ - @Test - public void isValidAlgorithm_notSupportedLowerCase() { - assertThrows(NoSuchAlgorithmException.class, () -> { - try { - // Must match string to reduce complexity, no string formatting - String md2_lowercase = "md2"; - boolean lowercase_not_supported = fileHashStore.validateAlgorithm(md2_lowercase); - assertFalse(lowercase_not_supported); - - } catch (NoSuchAlgorithmException nsae) { - throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage()); - - } - }); - } - - /** - * Check algorithm support for null algorithm value throws exception - */ - @Test - public void isValidAlgorithm_algorithmNull() { - assertThrows(IllegalArgumentException.class, () -> { - try { - fileHashStore.validateAlgorithm(null); - - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); - - } - }); - } - - /** - * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list - */ - @Test - public void shouldCalculateAlgorithm_algoIncluded() { - boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-256"); - assertFalse(shouldCalculate); - } - - /** - * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list - */ - @Test - public void shouldCalculateAlgorithm_algoNotIncluded() { - boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-DOU"); - assertTrue(shouldCalculate); - } - - /** - * Confirm that a temporary file has been generated. - */ - @Test - public void generateTempFile() throws Exception { - File newTmpFile = generateTemporaryFile(); - assertTrue(newTmpFile.exists()); - } - - /** - * Confirm that a given digest is sharded appropriately - */ - @Test - public void getHierarchicalPathString() { - String shardedPath = FileHashStoreUtility.getHierarchicalPathString(3, 2, - "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"); - String shardedPathExpected = - "94/f9/b6/c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - assertEquals(shardedPath, shardedPathExpected); - } - /** * Check that findObject returns cid as expected. */ @@ -674,6 +568,95 @@ public void putObject_emptyAlgorithm() { }); } + /** + * Check algorithm support for supported algorithm + */ + @Test + public void validateAlgorithm_supported() { + try { + String md2 = "MD2"; + boolean supported = fileHashStore.validateAlgorithm(md2); + assertTrue(supported); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + } + + /** + * Check algorithm support for unsupported algorithm + */ + @Test + public void validateAlgorithm_notSupported() { + assertThrows(NoSuchAlgorithmException.class, () -> { + try { + String sm3 = "SM3"; + boolean not_supported = fileHashStore.validateAlgorithm(sm3); + assertFalse(not_supported); + + } catch (NoSuchAlgorithmException nsae) { + throw new NoSuchAlgorithmException( + "NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + }); + } + + /** + * Check algorithm support for unsupported algorithm with lower cases + */ + @Test + public void validateAlgorithm_notSupportedLowerCase() { + assertThrows(NoSuchAlgorithmException.class, () -> { + try { + // Must match string to reduce complexity, no string formatting + String md2_lowercase = "md2"; + boolean lowercase_not_supported = fileHashStore.validateAlgorithm(md2_lowercase); + assertFalse(lowercase_not_supported); + + } catch (NoSuchAlgorithmException nsae) { + throw new NoSuchAlgorithmException( + "NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + }); + } + + /** + * Check algorithm support for null algorithm value throws exception + */ + @Test + public void validateAlgorithm_algorithmNull() { + assertThrows(IllegalArgumentException.class, () -> { + try { + fileHashStore.validateAlgorithm(null); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + }); + } + + /** + * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list + */ + @Test + public void shouldCalculateAlgorithm_algoIncluded() { + boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-256"); + assertFalse(shouldCalculate); + } + + /** + * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list + */ + @Test + public void shouldCalculateAlgorithm_algoNotIncluded() { + boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-DOU"); + assertTrue(shouldCalculate); + } + /** * Confirm verifyChecksumParameters returns true with good values */ @@ -881,7 +864,7 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { * Confirm that object has moved */ @Test - public void testMove() throws Exception { + public void move() throws Exception { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; File targetFile = new File(targetString); @@ -944,70 +927,644 @@ public void move_entityEmptySpaces() { } /** - * Test putMetadata stores metadata as expected + * Confirm deleteObjectByCid method deletes object when there are no references. */ @Test - public void putMetadata() throws Exception { + public void deleteObjectByCid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream dataStream = Files.newInputStream(testDataFile); + // Store object only + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + dataStream.close(); + String cid = objInfo.getCid(); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); - metadataStream.close(); + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); - // Calculate absolute path - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = - fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); - assertEquals(metadataPath, metadataPidExpectedPath.toString()); + // Get permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + + Path objRealPath = storePath.resolve("objects").resolve(objShardString); + assertFalse(Files.exists(objRealPath)); } } /** - * Test putMetadata throws exception when metadata is null + * Confirm deleteObjectByCid method does not delete an object if a cid refs file exists (pids + * still referencing the cid). */ @Test - public void putMetadata_metadataNull() { + public void deleteObjectByCid_cidRefsFileContainsPids() throws Exception { for (String pid : testData.pidList) { - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null)); + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + dataStream.close(); + String cid = objInfo.getCid(); + + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); + + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objRealPath)); } } /** - * Test putMetadata throws exception when pid is null + * Check that storeHashStoreRefsFiles creates reference files */ @Test - public void putMetadata_pidNull() { - for (String pid : testData.pidList) { - assertThrows(IllegalArgumentException.class, () -> { - String pidFormatted = pid.replace("/", "_"); + public void storeHashStoreRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + // Confirm refs files exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); + assertTrue(Files.exists(absCidRefsPath)); + assertTrue(Files.exists(absPidRefsPath)); - fileHashStore.putMetadata(metadataStream, null, null); - metadataStream.close(); - }); - } + // Confirm no additional files were created + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); } /** - * Test putMetadata throws exception when pid is empty + * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content + * is correct */ @Test - public void putMetadata_pidEmpty() { - for (String pid : testData.pidList) { - assertThrows(IllegalArgumentException.class, () -> { - String pidFormatted = pid.replace("/", "_"); + public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); - // Get test metadata file - Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + assertTrue(Files.exists(pidRefsFilePath)); + + String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); + assertEquals(cid, retrievedCid); + } + + /** + * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content + * is correct + */ + @Test + public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertTrue(Files.exists(cidRefsFilePath)); + + String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); + assertEquals(pid, retrievedPid); + } + + /** + * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException + * when refs files already exist + */ + @Test + public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + assertThrows( + HashStoreRefsAlreadyExistException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + + // Confirm that there is only 1 of each ref file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what + * is found in the pid refs file, and the associated cid refs file from the pid refs file + * is correctly tagged (everything is where it's expected to be) + */ + @Test + public void storeHashStoreRefsFiles_PidRefsFileExistsException() + throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + String existingCid = "987654321fedcba"; + fileHashStore.storeHashStoreRefsFiles(pid, existingCid); + + // This will throw an exception because the pid and cid refs file are in sync + assertThrows( + PidRefsFileExistsException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + } + + /** + * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it + * references does not exist (does not have a cid refs file) + */ + @Test + public void storeHashStoreRefsFiles_pidRefsOrphanedFile() + throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + String cidForOrphanPidRef = "987654321fedcba"; + + // Create orphaned pid refs file + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + File pidRefsTmpFile = fileHashStore.writeRefsFile( + cidForOrphanPidRef, FileHashStore.HashStoreIdTypes.pid.getName() + ); + File absPathPidRefsFile = absPidRefsPath.toFile(); + fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + + fileHashStore.storeHashStoreRefsFiles(pid, cid); + // There should only be 1 of each ref file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that storeHashStoreRefsFiles creates a pid refs file and updates an existing cid refs + * file + */ + @Test + public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + String pidAdditional = "another.pid.2"; + fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); + + // Confirm missing pid refs file has been created + Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); + assertTrue(Files.exists(pidAdditionalRefsFilePath)); + + // Check cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( + pidAdditional, cidRefsFilePath + ); + assertTrue(pidFoundInCidRefFiles); + + // There should be 2 pid refs file, and 1 cid refs file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(2, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that unTagObject deletes reference files + */ + @Test + public void unTagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + fileHashStore.unTagObject(pid, cid); + + // Confirm refs files do not exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + assertFalse(Files.exists(absCidRefsPath)); + assertFalse(Files.exists(absPidRefsPath)); + } + + /** + * Check that unTagObject deletes pid refs file for a cid that is referenced by + * multiple pids, and that the cid refs file is not deleted. + */ + @Test + public void unTagObject_cidWithMultiplePidReferences() throws Exception { + String pid = "dou.test.1"; + String pidTwo = "dou.test.2"; + String pidThree = "dou.test.3"; + String pidFour = "dou.test.4"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + fileHashStore.tagObject(pidTwo, cid); + fileHashStore.tagObject(pidThree, cid); + fileHashStore.tagObject(pidFour, cid); + + fileHashStore.unTagObject(pid, cid); + + // Confirm refs files state + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + + assertFalse(Files.exists(absPidRefsPath)); + assertTrue(Files.exists(absCidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(3, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that unTagObject deletes an orphaned pid refs file (there is no cid refs file) + */ + @Test + public void unTagObject_orphanPidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Delete cid refs file to create orphaned pid refs file + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + Files.delete(absCidRefsPath); + assertFalse(Files.exists(absCidRefsPath)); + + fileHashStore.unTagObject(pid, cid); + + // Confirm pid refs is deleted + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + assertFalse(Files.exists(absPidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(0, pidRefsFiles.size()); + assertEquals(0, cidRefsFiles.size()); + } + + /** + * Check that unTagObject does not throw exception when a pid refs file and cid refs file + * does not exist + */ + @Test + public void unTagObject_missingRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + fileHashStore.unTagObject(pid, cid); + } + + /** + * Check that unTagObject does not throw exception when a pid refs file and cid refs file + * does not exist + */ + @Test + public void unTagObject_missingPidRefsFile() throws Exception { + String pid = "dou.test.1"; + String pidTwo = "dou.test.2"; + String pidThree = "dou.test.3"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + fileHashStore.tagObject(pidTwo, cid); + fileHashStore.tagObject(pidThree, cid); + + // Delete pid refs to create scenario + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + Files.delete(absPidRefsPath); + assertFalse(Files.exists(absPidRefsPath)); + + fileHashStore.unTagObject(pid, cid); + + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + assertFalse(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); + } + + /** + * Check that no exception is thrown when pid and cid are tagged correctly + */ + @Test + public void verifyHashStoreRefFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a pid refs file with the incorrect cid + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + + fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsFilePath); + } + + /** + * Check that an exception is thrown when a file is not found + */ + @Test + public void verifyHashStoreRefFiles_fileNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + // Create a pid refs file with the incorrect cid + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + + assertThrows(FileNotFoundException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsFilePath)); + } + + /** + * Check that exception is thrown when incorrect cid in a pid refs file. + */ + @Test + public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a pid refs file with the incorrect cid + String cidToWrite = "123456789abcdef"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); + + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + + assertThrows( + CidNotFoundInPidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, + cidRefsFilePath)); + } + + /** + * Check that exception is thrown when an expected pid is not found in a cid refs file + */ + @Test + public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a cid refs file with a different pid from the one that is expected + String pidToWrite = "dou.test.2"; + File cidRefsTmpFile = fileHashStore.writeRefsFile(pidToWrite, "cid"); + Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); + + // Get path of the pid refs file + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + + assertThrows( + PidNotFoundInCidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsTmpFilePath)); + } + + /** + * Check that the value supplied is written + */ + @Test + public void writeRefsFile_content() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); + assertEquals(cidRead, cidToWrite); + } + + // TODO: Add test for 'isStringInRefsFile' + + /** + * Confirm that cid refs file has been updated successfully + */ + @Test + public void updateRefsFile_add() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + + String pidAdditional = "dou.test.2"; + fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "add"); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + boolean pidAdditional_foundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pidAdditional)) { + pidAdditional_foundInCidRefFiles = true; + } + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertTrue(pidOriginal_foundInCidRefFiles); + assertTrue(pidAdditional_foundInCidRefFiles); + } + + /** + * Check that updateRefsFile removes pid from its cid refs file + */ + @Test + public void updateRefsFile_remove() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + String pidAdditional = "dou.test.2"; + fileHashStore.tagObject(pidAdditional, cid); + + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + + assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); + } + + /** + * Check that updateRefsFile removes all pids as expected and leaves an empty file. + */ + @Test + public void updateRefsFile_removeMultiplePids() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + String pidAdditional = "dou.test.2"; + fileHashStore.tagObject(pidAdditional, cid); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "remove"); + + assertTrue(Files.exists(cidRefsFilePath)); + assertEquals(0, Files.size(cidRefsFilePath)); + } + + /** + * Confirm that updateRefsFile does not throw any exception if called to remove a value + * that is not found in a cid refs file. + */ + @Test + public void updateRefsFile_cidRefsPidNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "remove"); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + int pidsFound = 0; + for (String line : lines) { + pidsFound++; + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertTrue(pidOriginal_foundInCidRefFiles); + assertEquals(1, pidsFound); + } + + /** + * Confirm that updateRefsFile does not throw any exception if called to remove a value + * from a cid refs file that is empty + */ + @Test + public void updateRefsFile_cidRefsEmpty() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + int pidsFound = 0; + for (String line : lines) { + pidsFound++; + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertFalse(pidOriginal_foundInCidRefFiles); + assertEquals(0, pidsFound); + + // Confirm that no exception is thrown and that the cid refs still exists + fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + assertTrue(Files.exists(cidRefsFilePath)); + } + + /** + * Test putMetadata stores metadata as expected + */ + @Test + public void putMetadata() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); + metadataStream.close(); + + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); + } + } + + /** + * Test putMetadata throws exception when metadata is null + */ + @Test + public void putMetadata_metadataNull() { + for (String pid : testData.pidList) { + assertThrows( + IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null)); + } + } + + /** + * Test putMetadata throws exception when pid is null + */ + @Test + public void putMetadata_pidNull() { + for (String pid : testData.pidList) { + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + + fileHashStore.putMetadata(metadataStream, null, null); + metadataStream.close(); + }); + } + } + + /** + * Test putMetadata throws exception when pid is empty + */ + @Test + public void putMetadata_pidEmpty() { + for (String pid : testData.pidList) { + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream metadataStream = Files.newInputStream(testMetaDataFile); @@ -1176,58 +1733,7 @@ public void isStringInRefsFile_pidNotFound() throws Exception { } } - /** - * Confirm deleteObjectByCid method deletes object when there are no references. - */ - @Test - public void deleteObjectByCid() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - // Store object only - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - String cid = objInfo.getCid(); - - // Try deleting the object - fileHashStore.deleteObjectByCid(cid); - - // Get permanent address of the actual cid - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); - - Path objRealPath = storePath.resolve("objects").resolve(objShardString); - assertFalse(Files.exists(objRealPath)); - } - } - - /** - * Confirm deleteObjectByCid method does not delete an object if a cid refs file exists (pids - * still referencing the cid). - */ - @Test - public void tryDeleteObjectByCid_cidRefsFileContainsPids() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - String cid = objInfo.getCid(); - - // Try deleting the object - fileHashStore.deleteObjectByCid(cid); - - // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); - assertTrue(Files.exists(objRealPath)); - } - } /** * Confirm getHashStoreDataObjectPath returns correct object path diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java deleted file mode 100644 index 267f09b1..00000000 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ /dev/null @@ -1,589 +0,0 @@ -package org.dataone.hashstore.filehashstore; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.security.NoSuchAlgorithmException; -import java.util.List; -import java.util.Properties; - -import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; -import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; -import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; -import org.dataone.hashstore.exceptions.PidRefsFileExistsException; -import org.dataone.hashstore.filehashstore.FileHashStore.HashStoreIdTypes; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -/** - * Test class for FileHashStore references related methods - */ -public class FileHashStoreReferencesTest { - private FileHashStore fileHashStore; - private Properties fhsProperties; - - /** - * Initialize FileHashStore before each test to creates tmp directories - */ - @BeforeEach - public void initializeFileHashStore() { - Path rootDirectory = tempFolder.resolve("hashstore"); - - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); - - try { - fhsProperties = storeProperties; - fileHashStore = new FileHashStore(storeProperties); - - } catch (IOException ioe) { - fail("IOException encountered: " + ioe.getMessage()); - - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); - - } - } - - /** - * Temporary folder for tests to run in - */ - @TempDir - public Path tempFolder; - - /** - * Check that storeHashStoreRefsFiles creates reference files - */ - @Test - public void storeHashStoreRefsFiles() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - // Confirm refs files exist - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - - assertTrue(Files.exists(absCidRefsPath)); - assertTrue(Files.exists(absPidRefsPath)); - - // Confirm no additional files were created - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content - * is correct - */ - @Test - public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - assertTrue(Files.exists(pidRefsFilePath)); - - String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); - assertEquals(cid, retrievedCid); - } - - /** - * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content - * is correct - */ - @Test - public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertTrue(Files.exists(cidRefsFilePath)); - - String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); - assertEquals(pid, retrievedPid); - } - - /** - * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException - * when refs files already exist - */ - @Test - public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - assertThrows( - HashStoreRefsAlreadyExistException.class, - () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); - - // Confirm that there is only 1 of each ref file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what - * is found in the pid refs file, and the associated cid refs file from the pid refs file - * is correctly tagged (everything is where it's expected to be) - */ - @Test - public void storeHashStoreRefsFiles_PidRefsFileExistsException() - throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - String existingCid = "987654321fedcba"; - fileHashStore.storeHashStoreRefsFiles(pid, existingCid); - - // This will throw an exception because the pid and cid refs file are in sync - assertThrows( - PidRefsFileExistsException.class, - () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); - } - - /** - * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it - * references does not exist (does not have a cid refs file) - */ - @Test - public void storeHashStoreRefsFiles_pidRefsOrphanedFile() - throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - String cidForOrphanPidRef = "987654321fedcba"; - - // Create orphaned pid refs file - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - File pidRefsTmpFile = fileHashStore.writeRefsFile( - cidForOrphanPidRef, HashStoreIdTypes.pid.getName() - ); - File absPathPidRefsFile = absPidRefsPath.toFile(); - fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - - fileHashStore.storeHashStoreRefsFiles(pid, cid); - // There should only be 1 of each ref file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check that storeHashStoreRefsFiles creates a pid refs file and updates an existing cid refs - * file - */ - @Test - public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.storeHashStoreRefsFiles(pid, cid); - - String pidAdditional = "another.pid.2"; - fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); - - // Confirm missing pid refs file has been created - Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); - assertTrue(Files.exists(pidAdditionalRefsFilePath)); - - // Check cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( - pidAdditional, cidRefsFilePath - ); - assertTrue(pidFoundInCidRefFiles); - - // There should be 2 pid refs file, and 1 cid refs file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(2, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check that unTagObject deletes reference files - */ - @Test - public void unTagObject() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - fileHashStore.unTagObject(pid, cid); - - // Confirm refs files do not exist - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - assertFalse(Files.exists(absCidRefsPath)); - assertFalse(Files.exists(absPidRefsPath)); - } - - /** - * Check that unTagObject deletes pid refs file for a cid that is referenced by - * multiple pids, and that the cid refs file is not deleted. - */ - @Test - public void unTagObject_cidWithMultiplePidReferences() throws Exception { - String pid = "dou.test.1"; - String pidTwo = "dou.test.2"; - String pidThree = "dou.test.3"; - String pidFour = "dou.test.4"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - fileHashStore.tagObject(pidTwo, cid); - fileHashStore.tagObject(pidThree, cid); - fileHashStore.tagObject(pidFour, cid); - - fileHashStore.unTagObject(pid, cid); - - // Confirm refs files state - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - - assertFalse(Files.exists(absPidRefsPath)); - assertTrue(Files.exists(absCidRefsPath)); - - // Confirm number of reference files - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(3, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); - } - - /** - * Check that unTagObject deletes an orphaned pid refs file (there is no cid refs file) - */ - @Test - public void unTagObject_orphanPidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Delete cid refs file to create orphaned pid refs file - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Files.delete(absCidRefsPath); - assertFalse(Files.exists(absCidRefsPath)); - - fileHashStore.unTagObject(pid, cid); - - // Confirm pid refs is deleted - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - assertFalse(Files.exists(absPidRefsPath)); - - // Confirm number of reference files - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(0, pidRefsFiles.size()); - assertEquals(0, cidRefsFiles.size()); - } - - /** - * Check that unTagObject does not throw exception when a pid refs file and cid refs file - * does not exist - */ - @Test - public void unTagObject_missingRefsFiles() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - - fileHashStore.unTagObject(pid, cid); - } - - /** - * Check that unTagObject does not throw exception when a pid refs file and cid refs file - * does not exist - */ - @Test - public void unTagObject_missingPidRefsFile() throws Exception { - String pid = "dou.test.1"; - String pidTwo = "dou.test.2"; - String pidThree = "dou.test.3"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - fileHashStore.tagObject(pidTwo, cid); - fileHashStore.tagObject(pidThree, cid); - - // Delete pid refs to create scenario - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - Files.delete(absPidRefsPath); - assertFalse(Files.exists(absPidRefsPath)); - - fileHashStore.unTagObject(pid, cid); - - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - assertFalse(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); - } - - /** - * Check that the value supplied is written - */ - @Test - public void writeRefsFile_content() throws Exception { - String cidToWrite = "test_cid_123"; - File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); - - String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); - assertEquals(cidRead, cidToWrite); - } - - /** - * Check that no exception is thrown when pid and cid are tagged correctly - */ - @Test - public void verifyHashStoreRefFiles() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Create a pid refs file with the incorrect cid - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - - fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsFilePath); - } - - /** - * Check that an exception is thrown when a file is not found - */ - @Test - public void verifyHashStoreRefFiles_fileNotFound() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - - // Create a pid refs file with the incorrect cid - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - - assertThrows(FileNotFoundException.class, - () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, - cidRefsFilePath)); - } - - /** - * Check that exception is thrown when incorrect cid in a pid refs file. - */ - @Test - public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Create a pid refs file with the incorrect cid - String cidToWrite = "123456789abcdef"; - File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); - Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); - - // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - - assertThrows( - CidNotFoundInPidRefsFileException.class, - () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, - cidRefsFilePath)); - } - - /** - * Check that exception is thrown when an expected pid is not found in a cid refs file - */ - @Test - public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Create a cid refs file with a different pid from the one that is expected - String pidToWrite = "dou.test.2"; - File cidRefsTmpFile = fileHashStore.writeRefsFile(pidToWrite, "cid"); - Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); - - // Get path of the pid refs file - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - - assertThrows( - PidNotFoundInCidRefsFileException.class, - () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, - cidRefsTmpFilePath)); - } - - /** - * Confirm that cid refs file has been updated successfully - */ - @Test - public void updateRefsFile_content() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - - String pidAdditional = "dou.test.2"; - fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "add"); - - List lines = Files.readAllLines(cidRefsFilePath); - boolean pidOriginal_foundInCidRefFiles = false; - boolean pidAdditional_foundInCidRefFiles = false; - for (String line : lines) { - if (line.equals(pidAdditional)) { - pidAdditional_foundInCidRefFiles = true; - } - if (line.equals(pid)) { - pidOriginal_foundInCidRefFiles = true; - } - } - assertTrue(pidOriginal_foundInCidRefFiles); - assertTrue(pidAdditional_foundInCidRefFiles); - } - - /** - * Confirm that updateRefsFile does not throw any exception if called to remove a value - * that is not found in a cid refs file. - */ - @Test - public void updateRefsFile_cidRefsPidNotFound() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "remove"); - - List lines = Files.readAllLines(cidRefsFilePath); - boolean pidOriginal_foundInCidRefFiles = false; - int pidsFound = 0; - for (String line : lines) { - pidsFound++; - if (line.equals(pid)) { - pidOriginal_foundInCidRefFiles = true; - } - } - assertTrue(pidOriginal_foundInCidRefFiles); - assertEquals(1, pidsFound); - } - - /** - * Confirm that updateRefsFile does not throw any exception if called to remove a value - * from a cid refs file that is empty - */ - @Test - public void updateRefsFile_cidRefsEmpty() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); - - List lines = Files.readAllLines(cidRefsFilePath); - boolean pidOriginal_foundInCidRefFiles = false; - int pidsFound = 0; - for (String line : lines) { - pidsFound++; - if (line.equals(pid)) { - pidOriginal_foundInCidRefFiles = true; - } - } - assertFalse(pidOriginal_foundInCidRefFiles); - assertEquals(0, pidsFound); - - // Confirm that no exception is thrown and that the cid refs still exists - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); - assertTrue(Files.exists(cidRefsFilePath)); - } - - /** - * Check that deleteCidRefsPid deletes pid from its cid refs file - */ - @Test - public void deleteCidRefsPid_pidRemoved() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - String pidAdditional = "dou.test.2"; - fileHashStore.tagObject(pidAdditional, cid); - - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); - - assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); - } - - /** - * Check that deleteCidRefsPid removes all pids as expected and leaves an - * empty file. - */ - @Test - public void deleteCidRefsPid_allPidsRemoved() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - String pidAdditional = "dou.test.2"; - fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); - fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "remove"); - - assertTrue(Files.exists(cidRefsFilePath)); - assertEquals(0, Files.size(cidRefsFilePath)); - } -} From 7474db3d2b00056974275021387838ebad582212 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 13:40:05 -0700 Subject: [PATCH 391/553] Rename 'FileHashStorePublicTest' to 'FileHashStoreInitTest' for accuracy --- ...{FileHashStorePublicTest.java => FileHashStoreInitTest.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/test/java/org/dataone/hashstore/filehashstore/{FileHashStorePublicTest.java => FileHashStoreInitTest.java} (99%) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java similarity index 99% rename from src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java rename to src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java index de9e6909..eb0bdae1 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java @@ -23,7 +23,7 @@ /** * Test class for FileHashStore constructor */ -public class FileHashStorePublicTest { +public class FileHashStoreInitTest { private static Path rootDirectory; private static Path objStringFull; private static Path objTmpStringFull; From 0e0dfcf735296d652a7b06b74805533a554c0a01 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 13:48:27 -0700 Subject: [PATCH 392/553] Add missing junit tests for 'isStringInRefsFile' in 'FileHashStoreProtectedTest' class --- .../FileHashStoreProtectedTest.java | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 4926e475..dd73e090 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1374,7 +1374,44 @@ public void writeRefsFile_content() throws Exception { assertEquals(cidRead, cidToWrite); } - // TODO: Add test for 'isStringInRefsFile' + /** + * Check isStringInRefsFile returns true when value is found + */ + @Test + public void isStringInRefsFile_found() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + assertTrue(fileHashStore.isStringInRefsFile(cidToWrite, pidRefsTmpFile.toPath())); + } + + /** + * Check isStringInRefsFile returns false when value is not found + */ + @Test + public void isStringInRefsFile_notFound() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + assertFalse(fileHashStore.isStringInRefsFile("not.found.in.ref", pidRefsTmpFile.toPath())); + } + + /** + * Check isStringInRefsFile returns true when value is found in a refs file with multiple values + * and returns false when a value isn't found + */ + @Test + public void isStringInRefsFile_cidRefsMultipleVals() throws Exception { + String cid = "abcdef123456789"; + fileHashStore.tagObject("dou.test.1", cid); + fileHashStore.tagObject("dou.test.2", cid); + fileHashStore.tagObject("dou.test.3", cid); + // Get path of the cid refs file + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + + assertTrue(fileHashStore.isStringInRefsFile("dou.test.1", cidRefsFilePath)); + assertFalse(fileHashStore.isStringInRefsFile("wont.be.found", cidRefsFilePath)); + } /** * Confirm that cid refs file has been updated successfully From 6c7e1517c798babf6c6296b084158d5437dfffe7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 14:07:09 -0700 Subject: [PATCH 393/553] Fix bug in 'validateTmpObject' and add missing 'validateTmpObject' junit tests --- .../filehashstore/FileHashStore.java | 41 ++++---- .../FileHashStoreProtectedTest.java | 93 +++++++++++++++++++ 2 files changed, 116 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 912c595a..0b37e209 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1182,23 +1182,23 @@ protected ObjectMetadata putObject( * @param checksumAlgorithm Hash algorithm of checksum value * @param tmpFile Path to the file that is being evaluated * @param hexDigests Map of the hex digests to parse data from - * @param objSize Expected size of object + * @param expectedSize Expected size of object * @param storedObjFileSize Actual size of object stored * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent */ - private void validateTmpObject( + protected void validateTmpObject( boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, - Map hexDigests, long objSize, long storedObjFileSize + Map hexDigests, long expectedSize, long storedObjFileSize ) throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException { - if (objSize > 0) { - if (objSize != storedObjFileSize) { + if (expectedSize > 0) { + if (expectedSize != storedObjFileSize) { // Delete tmp File try { Files.delete(tmpFile); } catch (Exception ge) { String errMsg = - "objSize given is not equal to the stored object size. ObjSize: " + objSize + "objSize given is not equal to the stored object size. ObjSize: " + expectedSize + ". storedObjFileSize: " + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); @@ -1206,7 +1206,7 @@ private void validateTmpObject( } String errMsg = - "objSize given is not equal to the stored object size. ObjSize: " + objSize + "objSize given is not equal to the stored object size. ObjSize: " + expectedSize + ". storedObjFileSize: " + storedObjFileSize + ". Deleting tmpFile: " + tmpFile; logFileHashStore.error(errMsg); @@ -1218,29 +1218,34 @@ private void validateTmpObject( logFileHashStore.info("Validating object, checksum arguments supplied and valid."); String digestFromHexDigests = hexDigests.get(checksumAlgorithm); if (digestFromHexDigests == null) { - String errMsg = "Checksum not found in hex digest map when validating object. " - + "checksumAlgorithm checked: " + checksumAlgorithm; + String baseErrMsg = "Object cannot be validated. Algorithm not found in given " + + "hexDigests map. Algorithm requested: " + checksumAlgorithm; + try { + Files.delete(tmpFile); + } catch (Exception ge) { + String errMsg = baseErrMsg + ". Failed to delete tmpFile: " + tmpFile + ". " + + ge.getMessage(); + logFileHashStore.error(errMsg); + throw new NonMatchingChecksumException(errMsg); + } + String errMsg = baseErrMsg + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); throw new NoSuchAlgorithmException(errMsg); } if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { - // Delete tmp File + String baseErrMsg = "Checksum given is not equal to the calculated hex digest: " + + digestFromHexDigests + ". Checksum" + " provided: " + checksum; try { Files.delete(tmpFile); - } catch (Exception ge) { - String errMsg = "Object cannot be validated. Checksum given is not equal to " - + "the calculated hex digest: " + digestFromHexDigests - + ". Checksum provided: " + checksum + ". Failed to delete tmpFile: " - + tmpFile + ". " + ge.getMessage(); + String errMsg = baseErrMsg + ". Failed to delete tmpFile: " + tmpFile + ". " + + ge.getMessage(); logFileHashStore.error(errMsg); throw new NonMatchingChecksumException(errMsg); } - String errMsg = "Checksum given is not equal to the calculated hex digest: " - + digestFromHexDigests + ". Checksum" + " provided: " + checksum - + ". tmpFile has been deleted: " + tmpFile; + String errMsg = baseErrMsg + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); throw new NonMatchingChecksumException(errMsg); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index dd73e090..b4cc3026 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -16,6 +16,7 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -26,6 +27,8 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; @@ -568,6 +571,96 @@ public void putObject_emptyAlgorithm() { }); } + /** + * Confirm validateTmpObject does nothing when requestValidation is false and does not throw + * any exceptions + */ + @Test + public void validateTmpObject() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + Path tmpFilePath = generateTemporaryFile().toPath(); + fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFilePath, + hexDigests, -1, 1); + } + + /** + * Confirm validateTmpObject does not throw exception when expected sie matches store size + */ + @Test + public void validateTmpObject_sizeMatches() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + Path tmpFilePath = generateTemporaryFile().toPath(); + fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFilePath, + hexDigests, 10, 10); + } + + /** + * Confirm validateTmpObject does not throw exception when expected sie matches store size + */ + @Test + public void validateTmpObject_sizeMismatch() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + Path tmpFilePath = generateTemporaryFile().toPath(); + + assertThrows(NonMatchingObjSizeException.class, + () -> fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", + tmpFilePath, hexDigests, 10, 20)); + } + + /** + * Confirm validateTmpObject does not throw exception when requested to validate checksums + * with good values + */ + @Test + public void validateTmpObject_validationRequested_matchingChecksum() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + Path tmpFilePath = generateTemporaryFile().toPath(); + fileHashStore.validateTmpObject(true, "sha256Digest", "SHA-256", tmpFilePath, + hexDigests, -1, 1); + } + + /** + * Confirm validateTmpObject does not throw exception when requested to validate checksums + * with good values, and that the tmpFile passed is deleted. + */ + @Test + public void validateTmpObject_validationRequested_nonMatchingChecksum() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + Path tmpFilePath = generateTemporaryFile().toPath(); + + assertThrows(NonMatchingChecksumException.class, + () -> fileHashStore.validateTmpObject(true, "checksum.string", "SHA-256", + tmpFilePath, hexDigests, -1, -1)); + assertFalse(Files.exists(tmpFilePath)); + } + + /** + * Confirm validateTmpObject throws exception when requested to validate but algo is not found + * in hex digests passed. + */ + @Test + public void validateTmpObject_validationRequested_algoNotFound() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + Path tmpFilePath = generateTemporaryFile().toPath(); + + assertThrows(NoSuchAlgorithmException.class, + () -> fileHashStore.validateTmpObject(true, "md2Digest", "MD2", + tmpFilePath, hexDigests, -1, -1)); + assertFalse(Files.exists(tmpFilePath)); + } + /** * Check algorithm support for supported algorithm */ From e9b4b24867554ffaf52c95d6ab289a8c59118749 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 12 Jul 2024 14:10:54 -0700 Subject: [PATCH 394/553] Reword javadoc for 'HashStoreClient' class --- src/main/java/org/dataone/hashstore/HashStoreClient.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index fcc6a6f7..b62cd0c5 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -34,9 +34,8 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; /** - * HashStoreClient is a command line interface is a development tool to create a new - * HashStore or interact directly with an existing HashStore through the command line. - * See 'README.md' for usage examples. + * HashStoreClient is a development tool used to create a new HashStore or interact directly with + * an existing HashStore through the command line. See 'README.md' for usage examples. */ public class HashStoreClient { private static HashStore hashStore; From 6ad3c01e7dcbeaf7e844b70c5a58669aa936b36a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jul 2024 10:10:50 -0700 Subject: [PATCH 395/553] Re-activate disabled runnable test, update 'putObject' to delete redundant tmp file before logging warning --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0b37e209..9ac4c96f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1157,11 +1157,11 @@ protected ObjectMetadata putObject( // Confirm that the object does not yet exist, delete tmpFile if so if (Files.exists(objRealPath)) { + Files.delete(tmpFilePath); String errMsg = "File already exists for pid: " + pid + ". Object address: " + objRealPath + ". Deleting temporary file."; logFileHashStore.warn(errMsg); - Files.delete(tmpFilePath); } else { // Move object File permFile = objRealPath.toFile(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 0cb89980..aaa6004d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1960,7 +1960,8 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { * Test deleteObject synchronization using a Runnable class * TODO: Reactivate with @Test once bug has been investigated */ - @Disabled +// @Disabled + @Test public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Get single test file to "upload" String pid = "jtao.1700.1"; From 30d7f88f88e0b3dd510bfcc483e0371e752152d8 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jul 2024 12:16:21 -0700 Subject: [PATCH 396/553] Rework/optimize 'putObject' routine and update junit tests --- .../filehashstore/FileHashStore.java | 45 ++++++++----------- .../FileHashStoreInterfaceTest.java | 1 - .../FileHashStoreProtectedTest.java | 45 +++++++++++-------- 3 files changed, 46 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9ac4c96f..c3c02e2d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1105,7 +1105,7 @@ protected ObjectMetadata putObject( AtomicMoveNotSupportedException { logFileHashStore.debug("Begin writing data object for pid: " + pid); // If validation is desired, checksumAlgorithm and checksum must both be present - boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); + boolean compareChecksum = verifyChecksumParameters(checksum, checksumAlgorithm); // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForEmptyString( @@ -1118,9 +1118,7 @@ protected ObjectMetadata putObject( } // Generate tmp file and write to it - logFileHashStore.debug("Generating tmpFile"); File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); - Path tmpFilePath = tmpFile.toPath(); Map hexDigests; try { hexDigests = writeToTmpFileAndGenerateChecksums( @@ -1128,25 +1126,17 @@ protected ObjectMetadata putObject( ); } catch (Exception ge) { // If the process to write to the tmpFile is interrupted for any reason, - // we will delete the tmpFile. - boolean deleteStatus = tmpFile.delete(); - String errMsg = "Unexpected Exception while storing object for: " + pid; - if (deleteStatus) { - errMsg = errMsg + ". Deleting temp file: " + tmpFile + ". Aborting request."; - } else { - errMsg = - errMsg + ". Failed to delete temp file: " + tmpFile + ". Aborting request."; - } + // we will delete the tmpFile. + Files.delete(tmpFile.toPath()); + String errMsg = + "Unexpected Exception while storing object for pid: " + pid + ". " + ge.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } - long storedObjFileSize = Files.size(Paths.get(tmpFile.toString())); // Validate object if checksum and checksum algorithm is passed validateTmpObject( - requestValidation, checksum, checksumAlgorithm, tmpFilePath, hexDigests, objSize, - storedObjFileSize - ); + compareChecksum, checksum, checksumAlgorithm, tmpFile, hexDigests, objSize); // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); @@ -1157,7 +1147,7 @@ protected ObjectMetadata putObject( // Confirm that the object does not yet exist, delete tmpFile if so if (Files.exists(objRealPath)) { - Files.delete(tmpFilePath); + Files.delete(tmpFile.toPath()); String errMsg = "File already exists for pid: " + pid + ". Object address: " + objRealPath + ". Deleting temporary file."; @@ -1168,16 +1158,17 @@ protected ObjectMetadata putObject( move(tmpFile, permFile, "object"); logFileHashStore.debug("Successfully moved data object: " + objRealPath); } + long storedObjFileSize = Files.size(objRealPath); return new ObjectMetadata(pid, objectCid, storedObjFileSize, hexDigests); } /** - * If requestValidation is true, determines the integrity of an object with a given checksum & + * If compareChecksum is true, determines the integrity of an object with a given checksum & * algorithm against a list of hex digests. If there is a mismatch, the tmpFile will be deleted * and exceptions will be thrown. * - * @param requestValidation Boolean to decide whether to proceed with validation + * @param compareChecksum Decide whether to proceed with comparing checksums * @param checksum Expected checksum value of object * @param checksumAlgorithm Hash algorithm of checksum value * @param tmpFile Path to the file that is being evaluated @@ -1187,14 +1178,16 @@ protected ObjectMetadata putObject( * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent */ protected void validateTmpObject( - boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, - Map hexDigests, long expectedSize, long storedObjFileSize - ) throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException { + boolean compareChecksum, String checksum, String checksumAlgorithm, File tmpFile, + Map hexDigests, long expectedSize + ) throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException, + IOException { if (expectedSize > 0) { + long storedObjFileSize = Files.size(Paths.get(tmpFile.toString())); if (expectedSize != storedObjFileSize) { // Delete tmp File try { - Files.delete(tmpFile); + Files.delete(tmpFile.toPath()); } catch (Exception ge) { String errMsg = @@ -1214,14 +1207,14 @@ protected void validateTmpObject( } } - if (requestValidation) { + if (compareChecksum) { logFileHashStore.info("Validating object, checksum arguments supplied and valid."); String digestFromHexDigests = hexDigests.get(checksumAlgorithm); if (digestFromHexDigests == null) { String baseErrMsg = "Object cannot be validated. Algorithm not found in given " + "hexDigests map. Algorithm requested: " + checksumAlgorithm; try { - Files.delete(tmpFile); + Files.delete(tmpFile.toPath()); } catch (Exception ge) { String errMsg = baseErrMsg + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); @@ -1237,7 +1230,7 @@ protected void validateTmpObject( String baseErrMsg = "Checksum given is not equal to the calculated hex digest: " + digestFromHexDigests + ". Checksum" + " provided: " + checksum; try { - Files.delete(tmpFile); + Files.delete(tmpFile.toPath()); } catch (Exception ge) { String errMsg = baseErrMsg + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index aaa6004d..4b7eecc8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1960,7 +1960,6 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { * Test deleteObject synchronization using a Runnable class * TODO: Reactivate with @Test once bug has been investigated */ -// @Disabled @Test public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Get single test file to "upload" diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index b4cc3026..5770959b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -9,6 +9,7 @@ import java.io.File; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -580,9 +581,9 @@ public void validateTmpObject() throws Exception { Map hexDigests = new HashMap<>(); hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); - Path tmpFilePath = generateTemporaryFile().toPath(); - fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFilePath, - hexDigests, -1, 1); + File tmpFile = generateTemporaryFile(); + fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFile, + hexDigests, -1); } /** @@ -593,9 +594,17 @@ public void validateTmpObject_sizeMatches() throws Exception { Map hexDigests = new HashMap<>(); hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); - Path tmpFilePath = generateTemporaryFile().toPath(); - fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFilePath, - hexDigests, 10, 10); + File tmpFile = generateTemporaryFile(); + + // Write the byte to the file + try (FileOutputStream fos = new FileOutputStream(tmpFile)) { + fos.write(0x41); + } catch (IOException e) { + e.printStackTrace(); + } + + fileHashStore.validateTmpObject(false, "sha256Digest", "SHA-256", tmpFile, + hexDigests, 1); } /** @@ -606,14 +615,14 @@ public void validateTmpObject_sizeMismatch() throws Exception { Map hexDigests = new HashMap<>(); hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); - Path tmpFilePath = generateTemporaryFile().toPath(); + File tmpFile = generateTemporaryFile(); assertThrows(NonMatchingObjSizeException.class, () -> fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", - tmpFilePath, hexDigests, 10, 20)); + tmpFile, hexDigests, 10)); } - /** + /**GG * Confirm validateTmpObject does not throw exception when requested to validate checksums * with good values */ @@ -622,9 +631,9 @@ public void validateTmpObject_validationRequested_matchingChecksum() throws Exce Map hexDigests = new HashMap<>(); hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); - Path tmpFilePath = generateTemporaryFile().toPath(); - fileHashStore.validateTmpObject(true, "sha256Digest", "SHA-256", tmpFilePath, - hexDigests, -1, 1); + File tmpFile = generateTemporaryFile(); + fileHashStore.validateTmpObject(true, "sha256Digest", "SHA-256", tmpFile, + hexDigests, -1); } /** @@ -636,12 +645,12 @@ public void validateTmpObject_validationRequested_nonMatchingChecksum() throws E Map hexDigests = new HashMap<>(); hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); - Path tmpFilePath = generateTemporaryFile().toPath(); + File tmpFile = generateTemporaryFile(); assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.validateTmpObject(true, "checksum.string", "SHA-256", - tmpFilePath, hexDigests, -1, -1)); - assertFalse(Files.exists(tmpFilePath)); + tmpFile, hexDigests, -1)); + assertFalse(Files.exists(tmpFile.toPath())); } /** @@ -653,12 +662,12 @@ public void validateTmpObject_validationRequested_algoNotFound() throws Exceptio Map hexDigests = new HashMap<>(); hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); - Path tmpFilePath = generateTemporaryFile().toPath(); + File tmpFile = generateTemporaryFile(); assertThrows(NoSuchAlgorithmException.class, () -> fileHashStore.validateTmpObject(true, "md2Digest", "MD2", - tmpFilePath, hexDigests, -1, -1)); - assertFalse(Files.exists(tmpFilePath)); + tmpFile, hexDigests, -1)); + assertFalse(Files.exists(tmpFile.toPath())); } /** From 8f323f532527541554dcc43e2567a31bc0925a02 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jul 2024 13:38:44 -0700 Subject: [PATCH 397/553] Revise error statement in 'putObject' and variable name in intermittently failing test to assist with debugging --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 2 +- .../hashstore/filehashstore/FileHashStoreInterfaceTest.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c3c02e2d..a1b6bc19 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1150,7 +1150,7 @@ protected ObjectMetadata putObject( Files.delete(tmpFile.toPath()); String errMsg = "File already exists for pid: " + pid + ". Object address: " + objRealPath - + ". Deleting temporary file."; + + ". Deleting temporary file: " + tmpFile; logFileHashStore.warn(errMsg); } else { // Move object diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 4b7eecc8..f391532e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -1958,7 +1958,6 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { /** * Test deleteObject synchronization using a Runnable class - * TODO: Reactivate with @Test once bug has been investigated */ @Test public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { @@ -1968,7 +1967,7 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { Collection pidModifiedList = new ArrayList<>(); for (int i = 1; i <= 1000; i++) { - pidModifiedList.add(pid + ".dou.test." + i); + pidModifiedList.add(pid + ".dou.delobj1k." + i); } Runtime runtime = Runtime.getRuntime(); From 6ff1336b8ce61ab836fc4ee7b840cf3a087d20f1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jul 2024 14:06:56 -0700 Subject: [PATCH 398/553] Update README.md --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a29c418d..59335555 100644 --- a/README.md +++ b/README.md @@ -40,12 +40,12 @@ the expected usage of HashStore. ###### Public API Methods - storeObject -- verifyObject - tagObject - storeMetadata - retrieveObject - retrieveMetadata - deleteObject +- deleteInvalidObject - deleteMetadata - getHexDigest @@ -95,8 +95,8 @@ once and only once. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. -The client is then expected to call `verifyObject` when the relevant metadata is available to -confirm that the object has been stored as expected. And to finalize the process (to make the object +The client is then expected to call `deleteInvalidObject` when the relevant metadata is available to +confirm that the object is what is expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: @@ -107,8 +107,9 @@ objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, chec // Manual Process // Store object objectMetadata objInfo = storeObject(InputStream) -// Validate object, returns False if there is a mismatch and deletes the associated file -verifyObject(objInfo, checksum, checksumAlgorithn, objSize, true) +// Validate object, if the parameters do not match, the data object associated with the objInfo +// supplied will be deleted +- deleteInvalidObject(objInfo, checksum, checksumAlgorithn, objSize) // Tag object, makes the object discoverable (find, retrieve, delete) tagObject(pid, cid) ``` From 8bee29d97392ba9c2f7a492340543d68a2aae26f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 17 Jul 2024 14:07:34 -0700 Subject: [PATCH 399/553] Update HashStore interface, rename 'verifyObject' to 'deleteInvalidObject' in 'FileHashStore' and update junit tests --- .../java/org/dataone/hashstore/HashStore.java | 2 +- .../filehashstore/FileHashStore.java | 148 +++++++++--------- .../FileHashStoreInterfaceTest.java | 54 +++---- 3 files changed, 103 insertions(+), 101 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 3694b79e..2d917e9e 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -119,7 +119,7 @@ void tagObject(String pid, String cid) throws IOException, * @throws IOException Issue with recalculating supported algo for * checksum not found */ - void verifyObject( + void deleteInvalidObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a1b6bc19..0a7b45c9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -513,8 +513,9 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // algorithm, etc.) is unavailable. // // Note: This method does not tag the object to make it discoverable, so the client can - // call 'verifyObject' (optional) to check that the object is valid, and then 'tagObject' - // (required) to create the reference files needed to associate the respective pids/cids. + // call 'deleteInvalidObject' (optional) to check that the object is valid, and then + // 'tagObject' (required) to create the reference files needed to associate the + // respective pids/cids. return putObject(object, "HashStoreNoPid", null, null, null, -1); } @@ -557,77 +558,6 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } } - @Override - public void verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) - throws NonMatchingObjSizeException, NonMatchingChecksumException, - UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, - IOException { - logFileHashStore.debug("Verifying data object for cid: " + objectInfo.getCid()); - // Validate input parameters - FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); - - String objCid = objectInfo.getCid(); - long objInfoRetrievedSize = objectInfo.getSize(); - Map hexDigests = objectInfo.getHexDigests(); - String digestFromHexDigests = hexDigests.get(checksumAlgorithm); - - // Confirm that requested checksum to verify against is available - if (digestFromHexDigests == null) { - try { - validateAlgorithm(checksumAlgorithm); - // If no exceptions thrown, calculate the checksum with the given algo - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objCid - ); - Path pathToCidObject = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - try (InputStream inputStream = Files.newInputStream(pathToCidObject)) { - digestFromHexDigests = FileHashStoreUtility.calculateHexDigest(inputStream, - checksumAlgorithm); - } catch (IOException ioe) { - String errMsg = - "Unexpected error when calculating a checksum for cid: " + objCid - + " with algorithm (" + checksumAlgorithm - + ") that is not part of the default list. " + ioe.getMessage(); - throw new IOException(errMsg); - } - } catch (NoSuchAlgorithmException nsae) { - String errMsg = "checksumAlgorithm given: " + checksumAlgorithm - + " is not supported. Supported algorithms: " + Arrays.toString( - SUPPORTED_HASH_ALGORITHMS); - logFileHashStore.error(errMsg); - throw new UnsupportedHashAlgorithmException(errMsg); - } - } - // Validate checksum - if (!digestFromHexDigests.equals(checksum)) { - deleteObjectByCid(objCid); - String errMsg = - "Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum - + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " - + checksumAlgorithm + ")"; - logFileHashStore.error(errMsg); - throw new NonMatchingChecksumException(errMsg); - } - // Validate size - if (objInfoRetrievedSize != objSize) { - deleteObjectByCid(objCid); - String errMsg = "Object size invalid for cid: " + objCid + ". Expected size: " + objSize - + ". Actual size: " + objInfoRetrievedSize; - logFileHashStore.error(errMsg); - throw new NonMatchingObjSizeException(errMsg); - } - - String infoMsg = - "Object has been validated for cid: " + objCid + ". Expected checksum: " + checksum - + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " - + checksumAlgorithm + ")"; - logFileHashStore.info(infoMsg); - } - @Override public String storeMetadata(InputStream metadata, String pid, String formatId) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, @@ -871,6 +801,78 @@ public void deleteObject(String pid) } } + + @Override + public void deleteInvalidObject( + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) + throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, + IOException { + logFileHashStore.debug("Verifying data object for cid: " + objectInfo.getCid()); + // Validate input parameters + FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "deleteInvalidObject"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteInvalidObject"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteInvalidObject"); + FileHashStoreUtility.checkNotNegativeOrZero(objSize, "deleteInvalidObject"); + + String objCid = objectInfo.getCid(); + long objInfoRetrievedSize = objectInfo.getSize(); + Map hexDigests = objectInfo.getHexDigests(); + String digestFromHexDigests = hexDigests.get(checksumAlgorithm); + + // Confirm that requested checksum to verify against is available + if (digestFromHexDigests == null) { + try { + validateAlgorithm(checksumAlgorithm); + // If no exceptions thrown, calculate the checksum with the given algo + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, objCid + ); + Path pathToCidObject = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + try (InputStream inputStream = Files.newInputStream(pathToCidObject)) { + digestFromHexDigests = FileHashStoreUtility.calculateHexDigest(inputStream, + checksumAlgorithm); + } catch (IOException ioe) { + String errMsg = + "Unexpected error when calculating a checksum for cid: " + objCid + + " with algorithm (" + checksumAlgorithm + + ") that is not part of the default list. " + ioe.getMessage(); + throw new IOException(errMsg); + } + } catch (NoSuchAlgorithmException nsae) { + String errMsg = "checksumAlgorithm given: " + checksumAlgorithm + + " is not supported. Supported algorithms: " + Arrays.toString( + SUPPORTED_HASH_ALGORITHMS); + logFileHashStore.error(errMsg); + throw new UnsupportedHashAlgorithmException(errMsg); + } + } + // Validate checksum + if (!digestFromHexDigests.equals(checksum)) { + deleteObjectByCid(objCid); + String errMsg = + "Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum + + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + + checksumAlgorithm + ")"; + logFileHashStore.error(errMsg); + throw new NonMatchingChecksumException(errMsg); + } + // Validate size + if (objInfoRetrievedSize != objSize) { + deleteObjectByCid(objCid); + String errMsg = "Object size invalid for cid: " + objCid + ". Expected size: " + objSize + + ". Actual size: " + objInfoRetrievedSize; + logFileHashStore.error(errMsg); + throw new NonMatchingObjSizeException(errMsg); + } + + String infoMsg = + "Object has been validated for cid: " + objCid + ". Expected checksum: " + checksum + + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + + checksumAlgorithm + ")"; + logFileHashStore.info(infoMsg); + } + @Override public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index f391532e..8e679e3e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -819,10 +819,10 @@ public void tagObject_PidRefsFileExistsException() throws Exception { } /** - * Check that verifyObject does not throw exception with matching values + * Check that deleteInvalidObject does not throw exception with matching values */ @Test - public void verifyObject_correctValues() throws Exception { + public void deleteInvalidObject_correctValues() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -833,11 +833,11 @@ public void verifyObject_correctValues() throws Exception { String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get verifyObject args + // Get deleteInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.verifyObject( + fileHashStore.deleteInvalidObject( objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -853,11 +853,11 @@ public void verifyObject_correctValues() throws Exception { } /** - * Check that verifyObject calculates and verifies a checksum with a supported algorithm that is + * Check that deleteInvalidObject calculates and verifies a checksum with a supported algorithm that is * not included in the default list */ @Test - public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { + public void deleteInvalidObject_supportedAlgoNotInDefaultList() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -866,11 +866,11 @@ public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); dataStream.close(); - // Get verifyObject args + // Get deleteInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("md2"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.verifyObject(objInfo, expectedChecksum, "MD2", expectedSize); + fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, "MD2", expectedSize); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -885,11 +885,11 @@ public void verifyObject_supportedAlgoNotInDefaultList() throws Exception { } /** - * Check that verifyObject calculates throws exception when given a checksumAlgorithm that is + * Check that deleteInvalidObject calculates throws exception when given a checksumAlgorithm that is * not supported */ @Test - public void verifyObject_unsupportedAlgo() throws Exception { + public void deleteInvalidObject_unsupportedAlgo() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -900,7 +900,7 @@ public void verifyObject_unsupportedAlgo() throws Exception { assertThrows( UnsupportedHashAlgorithmException.class, - () -> fileHashStore.verifyObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); + () -> fileHashStore.deleteInvalidObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -915,10 +915,10 @@ public void verifyObject_unsupportedAlgo() throws Exception { } /** - * Check that verifyObject throws exception when non-matching size value provided + * Check that deleteInvalidObject throws exception when non-matching size value provided */ @Test - public void verifyObject_mismatchedSize() throws Exception { + public void deleteInvalidObject_mismatchedSize() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -929,13 +929,13 @@ public void verifyObject_mismatchedSize() throws Exception { String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get verifyObject args + // Get deleteInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; assertThrows( NonMatchingObjSizeException.class, - () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + () -> fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -951,10 +951,10 @@ public void verifyObject_mismatchedSize() throws Exception { } /** - * Check that verifyObject throws exception with non-matching checksum value + * Check that deleteInvalidObject throws exception with non-matching checksum value */ @Test - public void verifyObject_mismatchedChecksum() throws Exception { + public void deleteInvalidObject_mismatchedChecksum() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -965,13 +965,13 @@ public void verifyObject_mismatchedChecksum() throws Exception { String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get verifyObject args + // Get deleteInvalidObject args String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); assertThrows( NonMatchingChecksumException.class, - () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + () -> fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -987,10 +987,10 @@ public void verifyObject_mismatchedChecksum() throws Exception { } /** - * Check that verifyObject throws exception when non-matching size value provided + * Check that deleteInvalidObject throws exception when non-matching size value provided */ @Test - public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Exception { + public void deleteInvalidObject_mismatchedSize_deleteInvalidObject_true() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1001,13 +1001,13 @@ public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Except String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get verifyObject args + // Get deleteInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; assertThrows( NonMatchingObjSizeException.class, - () -> fileHashStore.verifyObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + () -> fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize)); @@ -1024,10 +1024,10 @@ public void verifyObject_mismatchedSize_deleteInvalidObject_true() throws Except } /** - * Check that verifyObject throws exception with non-matching checksum value + * Check that deleteInvalidObject throws exception with non-matching checksum value */ @Test - public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Exception { + public void deleteInvalidObject_mismatchedChecksum_deleteInvalidObject_true() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1038,11 +1038,11 @@ public void verifyObject_mismatchedChecksum_deleteInvalidObject_true() throws Ex String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get verifyObject args + // Get deleteInvalidObject args String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.verifyObject( + assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.deleteInvalidObject( objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize )); From 71dc4863450b4833f44590031ed971699b4c2bca Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 08:49:18 -0700 Subject: [PATCH 400/553] Move synchronization code calls into try blocks --- .../filehashstore/FileHashStore.java | 65 +++++++++---------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0a7b45c9..00dc5f3b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -437,20 +437,20 @@ private ObjectMetadata syncPutObject( String checksumAlgorithm, long objSize ) throws NoSuchAlgorithmException, PidRefsFileExistsException, IOException, RuntimeException, InterruptedException { - // Lock pid for thread safety, transaction control and atomic writing - // An object is stored once and only once - synchronized (objectLockedIds) { - if (objectLockedIds.contains(pid)) { - String errMsg = "Duplicate object request encountered for pid: " + pid - + ". Already in progress."; - logFileHashStore.warn(errMsg); - throw new RuntimeException(errMsg); + try { + // Lock pid for thread safety, transaction control and atomic writing + // An object is stored once and only once + synchronized (objectLockedIds) { + if (objectLockedIds.contains(pid)) { + String errMsg = "Duplicate object request encountered for pid: " + pid + + ". Already in progress."; + logFileHashStore.warn(errMsg); + throw new RuntimeException(errMsg); + } + logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); + objectLockedIds.add(pid); } - logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); - objectLockedIds.add(pid); - } - try { logFileHashStore.debug( "putObject() called to store pid: " + pid + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum + ". checksumAlgorithm: " @@ -530,10 +530,9 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); - // tagObject is synchronized with deleteObject based on a `cid` - synchronizeReferencedLockedCids(cid); - try { + // tagObject is synchronized with deleteObject based on a `cid` + synchronizeReferencedLockedCids(cid); storeHashStoreRefsFiles(pid, cid); } catch (HashStoreRefsAlreadyExistException hsrfae) { @@ -589,12 +588,11 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF String pidFormatId = pid + checkedFormatId; String metadataDocId = FileHashStoreUtility.getPidHexDigest(pidFormatId, OBJECT_STORE_ALGORITHM); - synchronizeMetadataLockedIds(metadataDocId); - + logFileHashStore.debug( + "putMetadata() called to store metadata for pid: " + pid + ", with formatId: " + + checkedFormatId + " for metadata document: " + metadataDocId); try { - logFileHashStore.debug( - "putMetadata() called to store metadata for pid: " + pid + ", with formatId: " - + checkedFormatId + " for metadata document: " + metadataDocId); + synchronizeMetadataLockedIds(metadataDocId); // Store metadata String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( @@ -701,12 +699,12 @@ public void deleteObject(String pid) FileHashStoreUtility.checkForEmptyString(pid, "id", "deleteObject"); Collection deleteList = new ArrayList<>(); - // Storing, deleting and untagging objects are synchronized together - // Duplicate store object requests for a pid are rejected, but deleting an object - // will wait for a pid to be released if it's found to be in use before proceeding. - synchronizeObjectLockedIds(pid); - try { + // Storing, deleting and untagging objects are synchronized together + // Duplicate store object requests for a pid are rejected, but deleting an object + // will wait for a pid to be released if it's found to be in use before proceeding. + synchronizeObjectLockedIds(pid); + // Before we begin deletion process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. @@ -761,10 +759,11 @@ public void deleteObject(String pid) // but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - // Since we must access the cid reference file, the `cid` must be synchronized - synchronizeReferencedLockedCids(cidRead); try { + // Since we must access the cid reference file, the `cid` must be synchronized + synchronizeReferencedLockedCids(cidRead); + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); @@ -936,8 +935,8 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept */ protected static void syncRenameMetadataDocForDeletion(Collection deleteList, Path metadataDocAbsPath, String metadataDocId) throws InterruptedException, IOException { - synchronizeMetadataLockedIds(metadataDocId); try { + synchronizeMetadataLockedIds(metadataDocId); if (Files.exists(metadataDocAbsPath)) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDocAbsPath)); } @@ -1540,9 +1539,8 @@ protected void deleteObjectByCid(String cid) FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - synchronizeReferencedLockedCids(cid); - try { + synchronizeReferencedLockedCids(cid); if (Files.exists(absCidRefsPath)) { // The cid refs file exists, so the cid object cannot be deleted. String warnMsg = "cid refs file still contains references, skipping deletion."; @@ -1665,9 +1663,9 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, FileHashStoreUtility.checkForEmptyString(cid, "cid", "unTagObject"); Collection deleteList = new ArrayList<>(); - synchronizeObjectLockedIds(pid); try { + synchronizeObjectLockedIds(pid); // Before we begin untagging process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. @@ -1716,10 +1714,11 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - // Since we must access the cid reference file, the `cid` must be synchronized - synchronizeReferencedLockedCids(cidRead); try { + // Since we must access the cid reference file, the `cid` must be synchronized + synchronizeReferencedLockedCids(cidRead); + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); updateRefsFile(pid, absCidRefsPath, "remove"); From 28cfa7abde750b46859a5eb55dee3da7f3aa470d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 09:27:43 -0700 Subject: [PATCH 401/553] Add missing synchronization for data object cid when data object is about to be moved to its final location --- .../filehashstore/FileHashStore.java | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 00dc5f3b..48f0430a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -506,7 +506,7 @@ private ObjectMetadata syncPutObject( */ @Override public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidRefsFileExistsException, RuntimeException { + IOException, PidRefsFileExistsException, RuntimeException, InterruptedException { // 'putObject' is called directly to bypass the pid synchronization implemented to // efficiently handle object store requests without a pid. This scenario occurs when // metadata about the object (ex. form data including the pid, checksum, checksum @@ -1097,13 +1097,14 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep * etc.) * @throws NullPointerException Arguments are null for pid or object * @throws AtomicMoveNotSupportedException When attempting to move files across file systems + * @throws InterruptedException An issue synchronizing the cid when moving object */ protected ObjectMetadata putObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize ) throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, PidRefsFileExistsException, IllegalArgumentException, NullPointerException, - AtomicMoveNotSupportedException { + AtomicMoveNotSupportedException, InterruptedException { logFileHashStore.debug("Begin writing data object for pid: " + pid); // If validation is desired, checksumAlgorithm and checksum must both be present boolean compareChecksum = verifyChecksumParameters(checksum, checksumAlgorithm); @@ -1146,22 +1147,33 @@ protected ObjectMetadata putObject( ); Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); - // Confirm that the object does not yet exist, delete tmpFile if so - if (Files.exists(objRealPath)) { - Files.delete(tmpFile.toPath()); + try { + synchronizeReferencedLockedCids(objectCid); + // Confirm that the object does not yet exist, delete tmpFile if so + if (!Files.exists(objRealPath)) { + logFileHashStore.info("Storing tmpFile: " + tmpFile); + // Move object + File permFile = objRealPath.toFile(); + move(tmpFile, permFile, "object"); + logFileHashStore.debug("Successfully moved data object: " + objRealPath); + } else { + Files.delete(tmpFile.toPath()); + String errMsg = + "File already exists for pid: " + pid + ". Object address: " + objRealPath + + ". Deleting temporary file: " + tmpFile; + logFileHashStore.warn(errMsg); + } + } catch (Exception e) { String errMsg = - "File already exists for pid: " + pid + ". Object address: " + objRealPath - + ". Deleting temporary file: " + tmpFile; - logFileHashStore.warn(errMsg); - } else { - // Move object - File permFile = objRealPath.toFile(); - move(tmpFile, permFile, "object"); - logFileHashStore.debug("Successfully moved data object: " + objRealPath); + "Unexpected exception when moving object with cid: " + objectCid + " for pid:" + + pid + ". Additional Details: " + e.getMessage(); + logFileHashStore.error(errMsg); + throw e; + } finally { + releaseReferencedLockedCids(objectCid); } - long storedObjFileSize = Files.size(objRealPath); - return new ObjectMetadata(pid, objectCid, storedObjFileSize, hexDigests); + return new ObjectMetadata(pid, objectCid, Files.size(objRealPath), hexDigests); } /** @@ -2203,9 +2215,9 @@ private static void releaseMetadataLockedIds(String metadataDocId) { } /** - * Multiple threads may access the cid reference file (which contains a list of `pid`s that - * reference a `cid`) and this needs to be coordinated. Otherwise, we will run into a - * `OverlappingFileLockException` + * Multiple threads may access a data object or the respective cid reference file (which + * contains a list of `pid`s that reference a `cid`) and this needs to be coordinated. + * Otherwise, we may run into unexpected exceptions (ex. `OverlappingFileLockException`) * * @param cid Content identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid From b0f86a5641496e167c30b2b10c627553078d6861 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 09:31:51 -0700 Subject: [PATCH 402/553] Rename synchronization array from 'objectLockedIds' to 'objectLockedPids' to improve clarity --- .../filehashstore/FileHashStore.java | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 48f0430a..2d0ce3e0 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -55,7 +55,7 @@ public class FileHashStore implements HashStore { private static final Log logFileHashStore = LogFactory.getLog(FileHashStore.class); private static final int TIME_OUT_MILLISEC = 1000; - private static final Collection objectLockedIds = new ArrayList<>(100); + private static final Collection objectLockedPids = new ArrayList<>(100); private static final Collection metadataLockedIds = new ArrayList<>(100); private static final Collection referenceLockedCids = new ArrayList<>(100); private final Path STORE_ROOT; @@ -440,15 +440,15 @@ private ObjectMetadata syncPutObject( try { // Lock pid for thread safety, transaction control and atomic writing // An object is stored once and only once - synchronized (objectLockedIds) { - if (objectLockedIds.contains(pid)) { + synchronized (objectLockedPids) { + if (objectLockedPids.contains(pid)) { String errMsg = "Duplicate object request encountered for pid: " + pid + ". Already in progress."; logFileHashStore.warn(errMsg); throw new RuntimeException(errMsg); } - logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); - objectLockedIds.add(pid); + logFileHashStore.debug("Synchronizing objectLockedPids for pid: " + pid); + objectLockedPids.add(pid); } logFileHashStore.debug( @@ -497,7 +497,7 @@ private ObjectMetadata syncPutObject( } finally { // Release lock - releaseObjectLockedIds(pid); + releaseObjectLockedPids(pid); } } @@ -703,7 +703,7 @@ public void deleteObject(String pid) // Storing, deleting and untagging objects are synchronized together // Duplicate store object requests for a pid are rejected, but deleting an object // will wait for a pid to be released if it's found to be in use before proceeding. - synchronizeObjectLockedIds(pid); + synchronizeObjectLockedPids(pid); // Before we begin deletion process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with @@ -796,7 +796,7 @@ public void deleteObject(String pid) } } finally { // Release lock - releaseObjectLockedIds(pid); + releaseObjectLockedPids(pid); } } @@ -1677,7 +1677,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, Collection deleteList = new ArrayList<>(); try { - synchronizeObjectLockedIds(pid); + synchronizeObjectLockedPids(pid); // Before we begin untagging process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. @@ -1773,7 +1773,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } } } finally { - releaseObjectLockedIds(pid); + releaseObjectLockedPids(pid); } } @@ -2141,12 +2141,12 @@ protected Path getHashStoreRefsPath(String abpcId, String refType) * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private static void synchronizeObjectLockedIds(String pid) + private static void synchronizeObjectLockedPids(String pid) throws InterruptedException { - synchronized (objectLockedIds) { - while (objectLockedIds.contains(pid)) { + synchronized (objectLockedPids) { + while (objectLockedPids.contains(pid)) { try { - objectLockedIds.wait(TIME_OUT_MILLISEC); + objectLockedPids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = @@ -2155,21 +2155,21 @@ private static void synchronizeObjectLockedIds(String pid) throw new InterruptedException(errMsg); } } - logFileHashStore.debug("Synchronizing objectLockedIds for pid: " + pid); - objectLockedIds.add(pid); + logFileHashStore.debug("Synchronizing objectLockedPids for pid: " + pid); + objectLockedPids.add(pid); } } /** - * Remove the given pid from 'objectLockedIds' and notify other threads + * Remove the given pid from 'objectLockedPids' and notify other threads * * @param pid Content identifier */ - private static void releaseObjectLockedIds(String pid) { - synchronized (objectLockedIds) { - logFileHashStore.debug("Releasing objectLockedIds for pid: " + pid); - objectLockedIds.remove(pid); - objectLockedIds.notify(); + private static void releaseObjectLockedPids(String pid) { + synchronized (objectLockedPids) { + logFileHashStore.debug("Releasing objectLockedPids for pid: " + pid); + objectLockedPids.remove(pid); + objectLockedPids.notify(); } } From 2fc6721ae6e01834ca8758f7619c3e6cc832ece2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 09:36:13 -0700 Subject: [PATCH 403/553] Rename synchronization array from 'metadataLockedIds' to 'metadataLockedDocIds' to improve clarity --- .../filehashstore/FileHashStore.java | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2d0ce3e0..b8deb594 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -56,7 +56,7 @@ public class FileHashStore implements HashStore { private static final Log logFileHashStore = LogFactory.getLog(FileHashStore.class); private static final int TIME_OUT_MILLISEC = 1000; private static final Collection objectLockedPids = new ArrayList<>(100); - private static final Collection metadataLockedIds = new ArrayList<>(100); + private static final Collection metadataLockedDocIds = new ArrayList<>(100); private static final Collection referenceLockedCids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; @@ -592,7 +592,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF "putMetadata() called to store metadata for pid: " + pid + ", with formatId: " + checkedFormatId + " for metadata document: " + metadataDocId); try { - synchronizeMetadataLockedIds(metadataDocId); + synchronizeMetadataLockedDocIds(metadataDocId); // Store metadata String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( @@ -612,7 +612,7 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF throw nsae; } finally { - releaseMetadataLockedIds(metadataDocId); + releaseMetadataLockedDocIds(metadataDocId); } } @@ -936,12 +936,12 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept protected static void syncRenameMetadataDocForDeletion(Collection deleteList, Path metadataDocAbsPath, String metadataDocId) throws InterruptedException, IOException { try { - synchronizeMetadataLockedIds(metadataDocId); + synchronizeMetadataLockedDocIds(metadataDocId); if (Files.exists(metadataDocAbsPath)) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDocAbsPath)); } } finally { - releaseMetadataLockedIds(metadataDocId); + releaseMetadataLockedDocIds(metadataDocId); } } @@ -2179,12 +2179,12 @@ private static void releaseObjectLockedPids(String pid) { * @param metadataDocId Metadata document id hash(pid+formatId) * @throws InterruptedException When an issue occurs when attempting to sync the metadata doc */ - private static void synchronizeMetadataLockedIds(String metadataDocId) + private static void synchronizeMetadataLockedDocIds(String metadataDocId) throws InterruptedException { - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(metadataDocId)) { + synchronized (metadataLockedDocIds) { + while (metadataLockedDocIds.contains(metadataDocId)) { try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); + metadataLockedDocIds.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = @@ -2195,22 +2195,22 @@ private static void synchronizeMetadataLockedIds(String metadataDocId) } } logFileHashStore.debug( - "Synchronizing metadataLockedIds for metadata doc: " + metadataDocId); - metadataLockedIds.add(metadataDocId); + "Synchronizing metadataLockedDocIds for metadata doc: " + metadataDocId); + metadataLockedDocIds.add(metadataDocId); } } /** - * Remove the given metadata doc from 'metadataLockedIds' and notify other threads + * Remove the given metadata doc from 'metadataLockedDocIds' and notify other threads * * @param metadataDocId Metadata document id hash(pid+formatId) */ - private static void releaseMetadataLockedIds(String metadataDocId) { - synchronized (metadataLockedIds) { + private static void releaseMetadataLockedDocIds(String metadataDocId) { + synchronized (metadataLockedDocIds) { logFileHashStore.debug( - "Releasing metadataLockedIds for metadata doc: " + metadataDocId); - metadataLockedIds.remove(metadataDocId); - metadataLockedIds.notify(); + "Releasing metadataLockedDocIds for metadata doc: " + metadataDocId); + metadataLockedDocIds.remove(metadataDocId); + metadataLockedDocIds.notify(); } } From 08f2c795fd2d60631e8e1b8b35df3f3aaf0a02c1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 09:40:46 -0700 Subject: [PATCH 404/553] Rename synchronization array from 'referenceLockedCids' to 'objectLockedCids' to improve clarity --- .../filehashstore/FileHashStore.java | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b8deb594..99219e11 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -57,7 +57,7 @@ public class FileHashStore implements HashStore { private static final int TIME_OUT_MILLISEC = 1000; private static final Collection objectLockedPids = new ArrayList<>(100); private static final Collection metadataLockedDocIds = new ArrayList<>(100); - private static final Collection referenceLockedCids = new ArrayList<>(100); + private static final Collection objectLockedCids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -532,7 +532,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi try { // tagObject is synchronized with deleteObject based on a `cid` - synchronizeReferencedLockedCids(cid); + synchronizeObjectLockedCids(cid); storeHashStoreRefsFiles(pid, cid); } catch (HashStoreRefsAlreadyExistException hsrfae) { @@ -553,7 +553,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } finally { // Release lock - releaseReferencedLockedCids(cid); + releaseObjectLockedCids(cid); } } @@ -713,7 +713,7 @@ public void deleteObject(String pid) String cid = objInfoMap.get("cid"); // If no exceptions are thrown, we proceed to synchronization based on the `cid` - synchronizeReferencedLockedCids(cid); + synchronizeObjectLockedCids(cid); try { // Proceed with comprehensive deletion - cid exists, nothing out of place @@ -739,7 +739,7 @@ public void deleteObject(String pid) } finally { // Release lock - releaseReferencedLockedCids(cid); + releaseObjectLockedCids(cid); } } catch (OrphanPidRefsFileException oprfe) { @@ -762,7 +762,7 @@ public void deleteObject(String pid) try { // Since we must access the cid reference file, the `cid` must be synchronized - synchronizeReferencedLockedCids(cidRead); + synchronizeObjectLockedCids(cidRead); Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); @@ -781,7 +781,7 @@ public void deleteObject(String pid) } finally { // Release lock - releaseReferencedLockedCids(cidRead); + releaseObjectLockedCids(cidRead); } } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -1148,7 +1148,7 @@ protected ObjectMetadata putObject( Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); try { - synchronizeReferencedLockedCids(objectCid); + synchronizeObjectLockedCids(objectCid); // Confirm that the object does not yet exist, delete tmpFile if so if (!Files.exists(objRealPath)) { logFileHashStore.info("Storing tmpFile: " + tmpFile); @@ -1170,7 +1170,7 @@ protected ObjectMetadata putObject( logFileHashStore.error(errMsg); throw e; } finally { - releaseReferencedLockedCids(objectCid); + releaseObjectLockedCids(objectCid); } return new ObjectMetadata(pid, objectCid, Files.size(objRealPath), hexDigests); @@ -1187,7 +1187,6 @@ protected ObjectMetadata putObject( * @param tmpFile Path to the file that is being evaluated * @param hexDigests Map of the hex digests to parse data from * @param expectedSize Expected size of object - * @param storedObjFileSize Actual size of object stored * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent */ protected void validateTmpObject( @@ -1552,7 +1551,7 @@ protected void deleteObjectByCid(String cid) Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); try { - synchronizeReferencedLockedCids(cid); + synchronizeObjectLockedCids(cid); if (Files.exists(absCidRefsPath)) { // The cid refs file exists, so the cid object cannot be deleted. String warnMsg = "cid refs file still contains references, skipping deletion."; @@ -1567,7 +1566,7 @@ protected void deleteObjectByCid(String cid) } } finally { // Release lock - releaseReferencedLockedCids(cid); + releaseObjectLockedCids(cid); } } @@ -1685,7 +1684,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, Map objInfoMap = findObject(pid); cid = objInfoMap.get("cid"); // If no exceptions are thrown, we proceed to synchronization based on the `cid` - synchronizeReferencedLockedCids(cid); + synchronizeObjectLockedCids(cid); try { // Get paths to reference files to work on @@ -1707,7 +1706,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); } finally { - releaseReferencedLockedCids(cid); + releaseObjectLockedCids(cid); } } catch (OrphanPidRefsFileException oprfe) { @@ -1729,7 +1728,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, try { // Since we must access the cid reference file, the `cid` must be synchronized - synchronizeReferencedLockedCids(cidRead); + synchronizeObjectLockedCids(cidRead); Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); @@ -1746,7 +1745,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, logFileHashStore.warn(warnMsg); } finally { - releaseReferencedLockedCids(cidRead); + releaseObjectLockedCids(cidRead); } } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -2215,18 +2214,18 @@ private static void releaseMetadataLockedDocIds(String metadataDocId) { } /** - * Multiple threads may access a data object or the respective cid reference file (which - * contains a list of `pid`s that reference a `cid`) and this needs to be coordinated. - * Otherwise, we may run into unexpected exceptions (ex. `OverlappingFileLockException`) + * Multiple threads may access a data object via its 'cid' or the respective 'cid reference + * file' (which contains a list of 'pid's that reference a 'cid') and this needs to be + * coordinated. * * @param cid Content identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private static void synchronizeReferencedLockedCids(String cid) throws InterruptedException { - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { + private static void synchronizeObjectLockedCids(String cid) throws InterruptedException { + synchronized (objectLockedCids) { + while (objectLockedCids.contains(cid)) { try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); + objectLockedCids.wait(TIME_OUT_MILLISEC); } catch (InterruptedException ie) { String errMsg = @@ -2236,21 +2235,21 @@ private static void synchronizeReferencedLockedCids(String cid) throws Interrupt } } logFileHashStore.debug( - "Synchronizing referenceLockedCids for cid: " + cid); - referenceLockedCids.add(cid); + "Synchronizing objectLockedCids for cid: " + cid); + objectLockedCids.add(cid); } } /** - * Remove the given cid from 'referenceLockedCids' and notify other threads + * Remove the given cid from 'objectLockedCids' and notify other threads * * @param cid Content identifier */ - private static void releaseReferencedLockedCids(String cid) { - synchronized (referenceLockedCids) { - logFileHashStore.debug("Releasing referenceLockedCids for cid: " + cid); - referenceLockedCids.remove(cid); - referenceLockedCids.notify(); + private static void releaseObjectLockedCids(String cid) { + synchronized (objectLockedCids) { + logFileHashStore.debug("Releasing objectLockedCids for cid: " + cid); + objectLockedCids.remove(cid); + objectLockedCids.notify(); } } } From 7aa852d34d6b416837bc0c8d79cc7abee4c163ae Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 09:49:19 -0700 Subject: [PATCH 405/553] Add new synchronization code for 'tagObject' which can be called directly, to synchronize on pid via 'referenceLockedPids' --- .../filehashstore/FileHashStore.java | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 99219e11..3dd986c7 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -55,9 +55,10 @@ public class FileHashStore implements HashStore { private static final Log logFileHashStore = LogFactory.getLog(FileHashStore.class); private static final int TIME_OUT_MILLISEC = 1000; + private static final Collection objectLockedCids = new ArrayList<>(100); private static final Collection objectLockedPids = new ArrayList<>(100); private static final Collection metadataLockedDocIds = new ArrayList<>(100); - private static final Collection objectLockedCids = new ArrayList<>(100); + private static final Collection referenceLockedPids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -531,8 +532,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); try { - // tagObject is synchronized with deleteObject based on a `cid` synchronizeObjectLockedCids(cid); + synchronizeReferenceLockedPids(pid); storeHashStoreRefsFiles(pid, cid); } catch (HashStoreRefsAlreadyExistException hsrfae) { @@ -552,8 +553,9 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi throw e; } finally { - // Release lock + // Release locks releaseObjectLockedCids(cid); + releaseReferenceLockedPids(pid); } } @@ -2252,4 +2254,44 @@ private static void releaseObjectLockedCids(String cid) { objectLockedCids.notify(); } } + + /** + * Synchronize the pid tagging process since `tagObject` is a Public API method that can be + * called directly. This is used in the scenario when the client is missing metadata but must + * store the data object first. + * + * @param pid Persistent or authority-based identifier + * @throws InterruptedException When an issue occurs when attempting to sync the pid + */ + private static void synchronizeReferenceLockedPids(String pid) throws InterruptedException { + synchronized (referenceLockedPids) { + while (referenceLockedPids.contains(pid)) { + try { + referenceLockedPids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync pid: " + pid; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "Synchronizing referenceLockedPids for pid: " + pid); + referenceLockedPids.add(pid); + } + } + + /** + * Remove the given pid from 'referenceLockedPids' and notify other threads + * + * @param pid Persistent or authority-based identifier + */ + private static void releaseReferenceLockedPids(String pid) { + synchronized (referenceLockedPids) { + logFileHashStore.debug("Releasing referenceLockedPids for pid: " + pid); + referenceLockedPids.remove(pid); + referenceLockedPids.notify(); + } + } } From 2ea427b3df736ad82a914a3b255ce3692326ed28 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 10:06:12 -0700 Subject: [PATCH 406/553] Revise 'tagObject' unexpected exception process to release locks before calling 'unTagObject', which is synchronized, to prevent a deadlock situation --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3dd986c7..1ad82078 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -549,6 +549,10 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi } catch (Exception e) { // Revert the process for all other exceptions + // We must first release the cid and pid since 'unTagObject' is synchronized + // If not, we will run into a deadlock. + releaseObjectLockedCids(cid); + releaseReferenceLockedPids(pid); unTagObject(pid, cid); throw e; From 38d0924ba500ee0aaf3b8f16445501b11b93dcf3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 12:00:59 -0700 Subject: [PATCH 407/553] Check 'deleteInvalidObject' for when ObjectMetadata passed does not include hexDigests, add custom exception and two new junit tests --- .../MissingHexDigestsException.java | 14 ++++++++ .../filehashstore/FileHashStore.java | 6 ++++ .../FileHashStoreInterfaceTest.java | 35 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java b/src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java new file mode 100644 index 00000000..46cd1c00 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +import java.util.NoSuchElementException; + +/** + * An exception thrown when hexDigests from a supplied ObjectMetadata object is empty. + */ +public class MissingHexDigestsException extends NoSuchElementException { + + public MissingHexDigestsException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1ad82078..172d6629 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -38,6 +38,7 @@ import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.MissingHexDigestsException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; @@ -816,6 +817,11 @@ public void deleteInvalidObject( logFileHashStore.debug("Verifying data object for cid: " + objectInfo.getCid()); // Validate input parameters FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "deleteInvalidObject"); + FileHashStoreUtility.ensureNotNull( + objectInfo.getHexDigests(), "objectInfo.getHexDigests()", "deleteInvalidObject"); + if (objectInfo.getHexDigests().isEmpty()) { + throw new MissingHexDigestsException("Missing hexDigests in supplied ObjectMetadata"); + } FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteInvalidObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteInvalidObject"); FileHashStoreUtility.checkNotNegativeOrZero(objSize, "deleteInvalidObject"); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 8e679e3e..5b586ca0 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -21,6 +21,7 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -36,6 +37,7 @@ import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.MissingHexDigestsException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; @@ -852,6 +854,39 @@ public void deleteInvalidObject_correctValues() throws Exception { } } + /** + * Check that deleteInvalidObject throws MissingHexDigestsException when objInfo hexDigests + * is empty. + */ + @Test + public void deleteInvalidObject_objInfoEmptyHexDigests() { + String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; + long size = 1999999; + Map hexDigests = new HashMap<>(); + + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); + + assertThrows( + MissingHexDigestsException.class, + () -> fileHashStore.deleteInvalidObject(objInfo, id, "MD2", size)); + } + + /** + * Check that deleteInvalidObject throws MissingHexDigestsException when objInfo hexDigests + * is null. + */ + @Test + public void deleteInvalidObject_objInfoNullHexDigests() { + String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; + long size = 1999999; + Map hexDigests = null; + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, null); + + assertThrows( + IllegalArgumentException.class, + () -> fileHashStore.deleteInvalidObject(objInfo, id, "MD2", size)); + } + /** * Check that deleteInvalidObject calculates and verifies a checksum with a supported algorithm that is * not included in the default list From f76a13c88192f8d29ccdfd83f065aa164c58bc1b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 12:04:43 -0700 Subject: [PATCH 408/553] Rename FileHashStoreUtility 'checkForEmptyString' to 'checkForEmptyAndValidString' for clarity --- .../filehashstore/FileHashStore.java | 64 +++++++++---------- .../filehashstore/FileHashStoreUtility.java | 6 +- .../FileHashStoreProtectedTest.java | 3 +- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 172d6629..7feb701c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -238,7 +238,7 @@ protected void verifyHashStoreProperties( FileHashStoreUtility.ensureNotNull( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); @@ -408,16 +408,16 @@ public ObjectMetadata storeObject( // Validate input parameters FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( additionalAlgorithm, "additionalAlgorithm", "storeObject" ); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm", "storeObject" ); validateAlgorithm(checksumAlgorithm); @@ -529,8 +529,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); - FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "tagObject"); + FileHashStoreUtility.checkForEmptyAndValidString(cid, "cid", "tagObject"); try { synchronizeObjectLockedCids(cid); @@ -572,14 +572,14 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeMetadata"); // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "storeMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "storeMetadata"); checkedFormatId = formatId; } @@ -639,7 +639,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "retrieveObject"); // Check to see if object exists Path objRealPath = getHashStoreDataObjectPath(pid); @@ -674,9 +674,9 @@ public InputStream retrieveMetadata(String pid, String formatId) "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "retrieveMetadata"); return getHashStoreMetadataInputStream(pid, formatId); } @@ -691,7 +691,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "retrieveMetadata"); return getHashStoreMetadataInputStream(pid, DEFAULT_METADATA_NAMESPACE); } @@ -703,7 +703,7 @@ public void deleteObject(String pid) logFileHashStore.debug("Deleting object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); - FileHashStoreUtility.checkForEmptyString(pid, "id", "deleteObject"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "id", "deleteObject"); Collection deleteList = new ArrayList<>(); try { @@ -891,9 +891,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "deleteMetadata"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "deleteMetadata"); Collection deleteList = new ArrayList<>(); // Get the path to the metadata document and metadata document name/id @@ -915,7 +915,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "deleteMetadata"); Collection deleteList = new ArrayList<>(); // Get the path to the pid metadata document directory @@ -962,7 +962,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug("Calculating hex digest for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "getHexDigest"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); // Find the content identifier @@ -1017,7 +1017,7 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "findObject"); // Get path of the pid references file Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); @@ -1122,7 +1122,7 @@ protected ObjectMetadata putObject( boolean compareChecksum = verifyChecksumParameters(checksum, checksumAlgorithm); // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( additionalAlgorithm, "additionalAlgorithm", "putObject" ); validateAlgorithm(additionalAlgorithm); @@ -1282,7 +1282,7 @@ protected void validateTmpObject( protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "validateAlgorithm"); - FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "validateAlgorithm"); + FileHashStoreUtility.checkForEmptyAndValidString(algorithm, "algorithm", "validateAlgorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { @@ -1304,7 +1304,7 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio */ protected boolean shouldCalculateAlgorithm(String algorithm) { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "shouldCalculateAlgorithm"); - FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "shouldCalculateAlgorithm"); + FileHashStoreUtility.checkForEmptyAndValidString(algorithm, "algorithm", "shouldCalculateAlgorithm"); boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { if (algorithm.equals(defAlgo.getName())) { @@ -1327,12 +1327,12 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor throws NoSuchAlgorithmException { // First ensure algorithm is compatible and values are valid if they aren't null if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters"); validateAlgorithm(checksumAlgorithm); } if (checksum != null) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( checksum, "checksum", "verifyChecksumParameters"); } // If checksum is supplied, checksumAlgorithm cannot be empty @@ -1340,7 +1340,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" ); - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( checksumAlgorithm, "algorithm", "verifyChecksumParameters" ); } @@ -1353,7 +1353,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksum, "checksum", "verifyChecksumParameters" ); - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( checksum, "checksum", "verifyChecksumParameters" ); } @@ -1384,7 +1384,7 @@ protected Map writeToTmpFileAndGenerateChecksums( // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(additionalAlgorithm); @@ -1392,7 +1392,7 @@ protected Map writeToTmpFileAndGenerateChecksums( } boolean generateCsAlgo = false; if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { - FileHashStoreUtility.checkForEmptyString( + FileHashStoreUtility.checkForEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(checksumAlgorithm); @@ -1497,7 +1497,7 @@ protected void move(File source, File target, String entity) throws IOException, "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); - FileHashStoreUtility.checkForEmptyString(entity, "entity", "move"); + FileHashStoreUtility.checkForEmptyAndValidString(entity, "entity", "move"); if (entity.equals("object") && target.exists()) { String errMsg = "File already exists for target: " + target; logFileHashStore.warn(errMsg); @@ -1681,9 +1681,9 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, NoSuchAlgorithmException, IOException { // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "unTagObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "unTagObject"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "unTagObject"); FileHashStoreUtility.ensureNotNull(cid, "cid", "unTagObject"); - FileHashStoreUtility.checkForEmptyString(cid, "cid", "unTagObject"); + FileHashStoreUtility.checkForEmptyAndValidString(cid, "cid", "unTagObject"); Collection deleteList = new ArrayList<>(); @@ -1954,14 +1954,14 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "putMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "putMetadata"); // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "putMetadata"); + FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "putMetadata"); checkedFormatId = formatId; } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 356fc120..b5473245 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -88,9 +88,9 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm public static String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "getPidHexDigest"); FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.checkForEmptyAndValidString(algorithm, "algorithm", "getPidHexDigest"); MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); byte[] bytes = pid.getBytes(StandardCharsets.UTF_8); @@ -186,7 +186,7 @@ public static void deleteListItems(Collection deleteList) throws IOExcepti * @param method Calling method * @throws IllegalArgumentException If the string is empty or contains illegal characters */ - public static void checkForEmptyString(String string, String argument, String method) + public static void checkForEmptyAndValidString(String string, String argument, String method) throws IllegalArgumentException { ensureNotNull(string, "string", "checkForEmptyString"); if (string.trim().isEmpty()) { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 5770959b..3bef190b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2073,6 +2073,7 @@ public void getHashStoreDataObjectPath_fileNotFound() { public void fileHashStoreUtility_checkForEmptyString() { assertThrows( IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForEmptyString("dou.test.1\n", "pid", "storeObject")); + () -> FileHashStoreUtility.checkForEmptyAndValidString("dou.test.1\n", "pid", + "storeObject")); } } From b04472898417f6f09ec97d3cce6c1f0f83550e04 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 12:15:22 -0700 Subject: [PATCH 409/553] Rework FileHashStoreUtility 'deleteListItems' method to attempt to delete as many items in the list as possible --- .../filehashstore/FileHashStoreUtility.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index b5473245..caaf8622 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -1,5 +1,8 @@ package org.dataone.hashstore.filehashstore; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -25,6 +28,8 @@ */ public class FileHashStoreUtility { + private static final Log logFHSU = LogFactory.getLog(FileHashStoreUtility.class); + /** * Checks whether a given object is null and throws an exception if so * @@ -164,14 +169,21 @@ public static Path renamePathForDeletion(Path pathToRename) throws IOException { * Delete all paths found in the given List object. * * @param deleteList Directory to check - * @throws IOException Unexpected I/O error when deleting files */ - public static void deleteListItems(Collection deleteList) throws IOException { + public static void deleteListItems(Collection deleteList) { ensureNotNull(deleteList, "deleteList", "deleteListItems"); if (!deleteList.isEmpty()) { for (Path deleteItem : deleteList) { if (Files.exists(deleteItem)) { - Files.delete(deleteItem); + try { + Files.delete(deleteItem); + } catch (Exception ge) { + String warnMsg = + "Attempted to delete metadata document: " + deleteItem + " but failed." + + " Additional Details: " + ge.getMessage(); + logFHSU.warn(warnMsg); + } + } } } From 8442d66a4d2cf1e8db686b251d0450ad87cbd1f8 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 12:20:03 -0700 Subject: [PATCH 410/553] Add logic to Public API store methods to try-finally .close the supplied stream to the object or metadata --- .../filehashstore/FileHashStore.java | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7feb701c..1410b1e3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -412,23 +412,26 @@ public ObjectMetadata storeObject( // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForEmptyAndValidString( - additionalAlgorithm, "additionalAlgorithm", "storeObject" - ); + additionalAlgorithm, "additionalAlgorithm", "storeObject"); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { FileHashStoreUtility.checkForEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "storeObject" - ); + checksumAlgorithm, "checksumAlgorithm", "storeObject"); validateAlgorithm(checksumAlgorithm); } if (objSize != -1) { FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); } - return syncPutObject( - object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize - ); + try { + return syncPutObject( + object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize + ); + } finally { + // Close stream + object.close(); + } } /** @@ -518,7 +521,12 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // call 'deleteInvalidObject' (optional) to check that the object is valid, and then // 'tagObject' (required) to create the reference files needed to associate the // respective pids/cids. - return putObject(object, "HashStoreNoPid", null, null, null, -1); + try { + return putObject(object, "HashStoreNoPid", null, null, null, -1); + } finally { + // Close stream + object.close(); + } } @@ -583,7 +591,12 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) checkedFormatId = formatId; } - return syncPutMetadata(metadata, pid, checkedFormatId); + try { + return syncPutMetadata(metadata, pid, checkedFormatId); + } finally { + // Close stream + metadata.close(); + } } /** From 2cbec48ac858f3da3b827f382f24cb85e92838bc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 13:32:23 -0700 Subject: [PATCH 411/553] Refactor 'deleteMetadata' with logic to revert the process if there is an unexpected issue, and add new junit tests for new supporting methods --- .../filehashstore/FileHashStore.java | 87 ++++++++++++++----- .../filehashstore/FileHashStoreUtility.java | 21 +++++ .../FileHashStoreProtectedTest.java | 53 ++++++++++- 3 files changed, 138 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1410b1e3..3a6f320e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -909,15 +909,25 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "deleteMetadata"); Collection deleteList = new ArrayList<>(); - // Get the path to the metadata document and metadata document name/id - String metadataDocId = FileHashStoreUtility.getPidHexDigest(pid + formatId, - OBJECT_STORE_ALGORITHM); + // Get the path to the metadata document Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); - syncRenameMetadataDocForDeletion(deleteList, metadataDocPath, metadataDocId); + Collection metadataDocPaths = new ArrayList<>(); + metadataDocPaths.add(metadataDocPath); + try { + syncRenameMetadataDocForDeletion(metadataDocPaths); + } catch (Exception ge) { + // Revert process if there is any exception + syncRenameMetadataDocForRestoration(deleteList); + String errMsg = "Unexpected issue when trying to delete metadata for pid: " + pid + + ". Metadata has not been deleted. Additional Details: " + ge.getMessage(); + logFileHashStore.error(errMsg); + throw ge; + } + // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info( - "Metadata document deleted for: " + pid + " with metadata address: " + metadataDocId); + "Metadata document deleted for: " + pid + " with metadata address: " + metadataDocPath); } /** @@ -940,33 +950,66 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept // Add all metadata docs found in the metadata doc directory to a list to iterate over List metadataDocPaths = FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); - for (Path metadataDoc : metadataDocPaths) { - String metadataDocId = metadataDoc.getFileName().toString(); - syncRenameMetadataDocForDeletion(deleteList, metadataDoc, metadataDocId); + try { + syncRenameMetadataDocForDeletion(metadataDocPaths); + } catch (Exception ge) { + // Revert process if there is any exception + syncRenameMetadataDocForRestoration(deleteList); + String errMsg = "Unexpected issue when trying to delete metadata for pid: " + pid + + ". Metadata has not been deleted. Additional Details: " + ge.getMessage(); + logFileHashStore.error(errMsg); + throw ge; } + // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info("All metadata documents deleted for: " + pid); } /** - * Synchronize deleting a metadata doc by renaming it and adding it to the supplied List. + * Synchronize rename metadata documents for deletion * - * @param deleteList List to add the renamed metadata document - * @param metadataDocAbsPath Absolute path to the metadata document - * @param metadataDocId Metadata document name - * @throws InterruptedException When an issue with synchronization occurs - * @throws IOException If there is an issue renaming a document + * @param metadataDocPaths List of metadata document paths + * @throws IOException If there is an issue renaming paths + * @throws InterruptedException If there is an issue with synchronization metadata calls */ - protected static void syncRenameMetadataDocForDeletion(Collection deleteList, Path metadataDocAbsPath, String metadataDocId) - throws InterruptedException, IOException { - try { - synchronizeMetadataLockedDocIds(metadataDocId); - if (Files.exists(metadataDocAbsPath)) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(metadataDocAbsPath)); + protected static void syncRenameMetadataDocForDeletion( + Collection metadataDocPaths) throws IOException, InterruptedException { + for (Path metadataDocToDelete : metadataDocPaths) { + String metadataDocId = metadataDocToDelete.getFileName().toString(); + + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToDelete)) { + FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete); + } + } finally { + releaseMetadataLockedDocIds(metadataDocId); + } + } + } + + /** + * Synchronize restoring metadata documents renamed for deletion back to what they were. + * + * @param deleteList Array of items that have been marked for deletion + * @throws IOException If there is an issue renaming paths + * @throws InterruptedException If there is an issue with synchronization metadata calls + */ + protected static void syncRenameMetadataDocForRestoration(Collection deleteList) + throws IOException, InterruptedException { + for (Path metadataDocToPlaceBack : deleteList) { + Path fileNameWithDeleted = metadataDocToPlaceBack.getFileName(); + String metadataDocId = fileNameWithDeleted.toString().replace("_delete", ""); + + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToPlaceBack)) { + FileHashStoreUtility.renamePathForRestoration(metadataDocToPlaceBack); + } + } finally { + releaseMetadataLockedDocIds(metadataDocId); } - } finally { - releaseMetadataLockedDocIds(metadataDocId); } } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index caaf8622..26704362 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -165,6 +165,27 @@ public static Path renamePathForDeletion(Path pathToRename) throws IOException { return deletePath; } + /** + * Rename the given path slated for deletion by replacing '_delete' with "" + * + * @param pathToRename The path to the file to revert deletion + * @throws IOException Issue with renaming the given file path + */ + public static void renamePathForRestoration(Path pathToRename) throws IOException { + ensureNotNull(pathToRename, "pathToRename", "renamePathForRestoration"); + if (!Files.exists(pathToRename)) { + String errMsg = "FileHashStoreUtility.renamePathForRestoration - Given path to file: " + + pathToRename + " does not exist."; + throw new FileNotFoundException(errMsg); + } + Path parentPath = pathToRename.getParent(); + Path fileName = pathToRename.getFileName(); + String newFileName = fileName.toString().replace("_delete", ""); + + Path restorePath = parentPath.resolve(newFileName); + Files.move(pathToRename, restorePath, StandardCopyOption.ATOMIC_MOVE); + } + /** * Delete all paths found in the given List object. * diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 3bef190b..51c149d5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2070,10 +2070,61 @@ public void getHashStoreDataObjectPath_fileNotFound() { * exist */ @Test - public void fileHashStoreUtility_checkForEmptyString() { + public void fileHashStoreUtility_checkForEmptyAndValidString() { assertThrows( IllegalArgumentException.class, () -> FileHashStoreUtility.checkForEmptyAndValidString("dou.test.1\n", "pid", "storeObject")); } + + /** + * Confirm that renamePathForDeletion adds '_delete' to the given path + */ + @Test + public void fileHashStoreUtility_renamePathForDeletion() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); + metadataStream.close(); + + Path metadataPath = Paths.get(pathToMetadata); + FileHashStoreUtility.renamePathForDeletion(metadataPath); + + Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); + assertTrue(Files.exists(expectedMetadataPathRenamed)); + } + } + + /** + * Confirm that renamePathForDeletion adds '_delete' to the given path + */ + @Test + public void fileHashStoreUtility_renamePathForRestoration() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metadataStream = Files.newInputStream(testMetaDataFile); + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); + metadataStream.close(); + + Path metadataPath = Paths.get(pathToMetadata); + FileHashStoreUtility.renamePathForDeletion(metadataPath); + + Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); + assertFalse(Files.exists(metadataPath)); + assertTrue(Files.exists(expectedMetadataPathRenamed)); + + FileHashStoreUtility.renamePathForRestoration(expectedMetadataPathRenamed); + assertFalse(Files.exists(expectedMetadataPathRenamed)); + assertTrue(Files.exists(metadataPath)); + } + } } From 9f3dac9703349c9677b1b010d456e9155a8905e0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 13:38:02 -0700 Subject: [PATCH 412/553] Clean up code for recently reworked deleteMetadata process --- .idea/inspectionProfiles/Project_Default.xml | 147 ++++++++++++++++++ .../filehashstore/FileHashStore.java | 9 +- 2 files changed, 153 insertions(+), 3 deletions(-) create mode 100644 .idea/inspectionProfiles/Project_Default.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..fd7d3768 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,147 @@ + + + + \ No newline at end of file diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3a6f320e..db521b7a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -909,10 +909,11 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "deleteMetadata"); Collection deleteList = new ArrayList<>(); - // Get the path to the metadata document + // Get the path to the metadata document and add it to a list Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); Collection metadataDocPaths = new ArrayList<>(); metadataDocPaths.add(metadataDocPath); + try { syncRenameMetadataDocForDeletion(metadataDocPaths); } catch (Exception ge) { @@ -950,13 +951,15 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept // Add all metadata docs found in the metadata doc directory to a list to iterate over List metadataDocPaths = FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); + try { syncRenameMetadataDocForDeletion(metadataDocPaths); } catch (Exception ge) { // Revert process if there is any exception syncRenameMetadataDocForRestoration(deleteList); - String errMsg = "Unexpected issue when trying to delete metadata for pid: " + pid + - ". Metadata has not been deleted. Additional Details: " + ge.getMessage(); + String errMsg = "Unexpected issue when trying to delete metadata for pid: " + pid + + ". An attempt to restore metadata has been made. Additional details: " + + ge.getMessage(); logFileHashStore.error(errMsg); throw ge; } From 1a127657058a3864f42ece9f392426497ca5801e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 15:09:12 -0700 Subject: [PATCH 413/553] Revert process to restore metadata documents from 'deleteMetadata' methods --- .../filehashstore/FileHashStore.java | 65 ++++--------------- 1 file changed, 13 insertions(+), 52 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index db521b7a..93e4a88e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -908,23 +908,12 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "deleteMetadata"); - Collection deleteList = new ArrayList<>(); // Get the path to the metadata document and add it to a list Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); Collection metadataDocPaths = new ArrayList<>(); metadataDocPaths.add(metadataDocPath); - try { - syncRenameMetadataDocForDeletion(metadataDocPaths); - } catch (Exception ge) { - // Revert process if there is any exception - syncRenameMetadataDocForRestoration(deleteList); - String errMsg = "Unexpected issue when trying to delete metadata for pid: " + pid + - ". Metadata has not been deleted. Additional Details: " + ge.getMessage(); - logFileHashStore.error(errMsg); - throw ge; - } - + Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info( @@ -941,7 +930,6 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "deleteMetadata"); - Collection deleteList = new ArrayList<>(); // Get the path to the pid metadata document directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -952,18 +940,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept List metadataDocPaths = FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); - try { - syncRenameMetadataDocForDeletion(metadataDocPaths); - } catch (Exception ge) { - // Revert process if there is any exception - syncRenameMetadataDocForRestoration(deleteList); - String errMsg = "Unexpected issue when trying to delete metadata for pid: " + pid - + ". An attempt to restore metadata has been made. Additional details: " - + ge.getMessage(); - logFileHashStore.error(errMsg); - throw ge; - } - + Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); // Delete all items in the list FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info("All metadata documents deleted for: " + pid); @@ -976,44 +953,28 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept * @throws IOException If there is an issue renaming paths * @throws InterruptedException If there is an issue with synchronization metadata calls */ - protected static void syncRenameMetadataDocForDeletion( + protected static Collection syncRenameMetadataDocForDeletion( Collection metadataDocPaths) throws IOException, InterruptedException { + // Rename paths and add to a List + Collection metadataDocsToDelete = new ArrayList<>(); for (Path metadataDocToDelete : metadataDocPaths) { String metadataDocId = metadataDocToDelete.getFileName().toString(); - try { synchronizeMetadataLockedDocIds(metadataDocId); if (Files.exists(metadataDocToDelete)) { - FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete); - } - } finally { - releaseMetadataLockedDocIds(metadataDocId); - } - } - } - - /** - * Synchronize restoring metadata documents renamed for deletion back to what they were. - * - * @param deleteList Array of items that have been marked for deletion - * @throws IOException If there is an issue renaming paths - * @throws InterruptedException If there is an issue with synchronization metadata calls - */ - protected static void syncRenameMetadataDocForRestoration(Collection deleteList) - throws IOException, InterruptedException { - for (Path metadataDocToPlaceBack : deleteList) { - Path fileNameWithDeleted = metadataDocToPlaceBack.getFileName(); - String metadataDocId = fileNameWithDeleted.toString().replace("_delete", ""); - - try { - synchronizeMetadataLockedDocIds(metadataDocId); - if (Files.exists(metadataDocToPlaceBack)) { - FileHashStoreUtility.renamePathForRestoration(metadataDocToPlaceBack); + try { + metadataDocsToDelete.add(FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete)); + } catch (Exception ge) { + String warnMsg = "Unexpected error renaming metadata doc path for " + + "deletion: " + metadataDocToDelete; + logFileHashStore.warn(warnMsg); + } } } finally { releaseMetadataLockedDocIds(metadataDocId); } } + return metadataDocsToDelete; } @Override From f28b522f0f526d41803c9d169be98703976b6d9a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 15:17:08 -0700 Subject: [PATCH 414/553] Add revert metadata deletion process to 'syncRenameMetadataDocForDeletion' --- .../filehashstore/FileHashStore.java | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 93e4a88e..f0352b44 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -957,23 +957,39 @@ protected static Collection syncRenameMetadataDocForDeletion( Collection metadataDocPaths) throws IOException, InterruptedException { // Rename paths and add to a List Collection metadataDocsToDelete = new ArrayList<>(); - for (Path metadataDocToDelete : metadataDocPaths) { - String metadataDocId = metadataDocToDelete.getFileName().toString(); - try { - synchronizeMetadataLockedDocIds(metadataDocId); - if (Files.exists(metadataDocToDelete)) { - try { + try { + for (Path metadataDocToDelete : metadataDocPaths) { + String metadataDocId = metadataDocToDelete.getFileName().toString(); + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToDelete)) { metadataDocsToDelete.add(FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete)); - } catch (Exception ge) { - String warnMsg = "Unexpected error renaming metadata doc path for " - + "deletion: " + metadataDocToDelete; - logFileHashStore.warn(warnMsg); } + } finally { + releaseMetadataLockedDocIds(metadataDocId); } - } finally { - releaseMetadataLockedDocIds(metadataDocId); } + } catch (Exception ge) { + // If there is any exception, revert the process and throw an exception + for (Path metadataDocToPlaceBack : metadataDocsToDelete) { + Path fileNameWithDeleted = metadataDocToPlaceBack.getFileName(); + String metadataDocId = fileNameWithDeleted.toString().replace("_delete", ""); + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToPlaceBack)) { + FileHashStoreUtility.renamePathForRestoration(metadataDocToPlaceBack); + } + } finally { + releaseMetadataLockedDocIds(metadataDocId); + } + } + String errMsg = "An unexpected exception has occurred when deleting metadata " + + "documents. Attempts to restore all affected metadata documents have " + + "been made. Additional details: " + ge.getMessage(); + logFileHashStore.error(errMsg); + throw ge; } + return metadataDocsToDelete; } From 2012a2a728601a1211648e7b226a42ec753b338b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 19 Jul 2024 16:03:11 -0700 Subject: [PATCH 415/553] Add guard rails to 'syncRenameMetadataDocForDeletion' and new junit tests --- .../filehashstore/FileHashStore.java | 52 +++++++---- .../FileHashStoreProtectedTest.java | 90 ++++++++++++++++--- 2 files changed, 112 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f0352b44..b56c6163 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -913,9 +913,11 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx Collection metadataDocPaths = new ArrayList<>(); metadataDocPaths.add(metadataDocPath); - Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); - // Delete all items in the list - FileHashStoreUtility.deleteListItems(deleteList); + if (!metadataDocPaths.isEmpty()) { + Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); + // Delete all items in the list + FileHashStoreUtility.deleteListItems(deleteList); + } logFileHashStore.info( "Metadata document deleted for: " + pid + " with metadata address: " + metadataDocPath); } @@ -940,21 +942,30 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept List metadataDocPaths = FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); - Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); - // Delete all items in the list - FileHashStoreUtility.deleteListItems(deleteList); + if (!metadataDocPaths.isEmpty()) { + Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); + // Delete all items in the list + FileHashStoreUtility.deleteListItems(deleteList); + } logFileHashStore.info("All metadata documents deleted for: " + pid); } /** - * Synchronize rename metadata documents for deletion + * Synchronize renaming metadata documents for deletion * * @param metadataDocPaths List of metadata document paths * @throws IOException If there is an issue renaming paths * @throws InterruptedException If there is an issue with synchronization metadata calls */ - protected static Collection syncRenameMetadataDocForDeletion( + protected Collection syncRenameMetadataDocForDeletion( Collection metadataDocPaths) throws IOException, InterruptedException { + FileHashStoreUtility.ensureNotNull( + metadataDocPaths, "metadataDocPaths", "syncRenameMetadataDocForDeletion"); + if (metadataDocPaths.isEmpty()) { + String errMsg = "metadataDocPaths supplied cannot be empty."; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } // Rename paths and add to a List Collection metadataDocsToDelete = new ArrayList<>(); try { @@ -963,24 +974,27 @@ protected static Collection syncRenameMetadataDocForDeletion( try { synchronizeMetadataLockedDocIds(metadataDocId); if (Files.exists(metadataDocToDelete)) { - metadataDocsToDelete.add(FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete)); + metadataDocsToDelete.add( + FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete)); } } finally { releaseMetadataLockedDocIds(metadataDocId); } } } catch (Exception ge) { - // If there is any exception, revert the process and throw an exception - for (Path metadataDocToPlaceBack : metadataDocsToDelete) { - Path fileNameWithDeleted = metadataDocToPlaceBack.getFileName(); - String metadataDocId = fileNameWithDeleted.toString().replace("_delete", ""); - try { - synchronizeMetadataLockedDocIds(metadataDocId); - if (Files.exists(metadataDocToPlaceBack)) { - FileHashStoreUtility.renamePathForRestoration(metadataDocToPlaceBack); + // If there is any exception, attempt to revert the process and throw an exception + if (!metadataDocsToDelete.isEmpty()) { + for (Path metadataDocToPlaceBack : metadataDocsToDelete) { + Path fileNameWithDeleted = metadataDocToPlaceBack.getFileName(); + String metadataDocId = fileNameWithDeleted.toString().replace("_delete", ""); + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToPlaceBack)) { + FileHashStoreUtility.renamePathForRestoration(metadataDocToPlaceBack); + } + } finally { + releaseMetadataLockedDocIds(metadataDocId); } - } finally { - releaseMetadataLockedDocIds(metadataDocId); } } String errMsg = "An unexpected exception has occurred when deleting metadata " diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 51c149d5..edde872b 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -17,6 +17,8 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -773,9 +775,9 @@ public void verifyChecksumParameters() throws Exception { */ @Test public void verifyChecksumParameters_emptyChecksum() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.verifyChecksumParameters(" ","SHA-256"); - }); + assertThrows( + IllegalArgumentException.class, + () -> fileHashStore.verifyChecksumParameters(" ", "SHA-256")); } /** @@ -783,9 +785,9 @@ public void verifyChecksumParameters_emptyChecksum() { */ @Test public void verifyChecksumParameters_emptyAlgorithm() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.verifyChecksumParameters("abc123"," "); - }); + assertThrows( + IllegalArgumentException.class, + () -> fileHashStore.verifyChecksumParameters("abc123", " ")); } /** @@ -793,9 +795,9 @@ public void verifyChecksumParameters_emptyAlgorithm() { */ @Test public void verifyChecksumParameters_unsupportedAlgorithm() { - assertThrows(NoSuchAlgorithmException.class, () -> { - fileHashStore.verifyChecksumParameters("abc123","SHA-DOU"); - }); + assertThrows( + NoSuchAlgorithmException.class, + () -> fileHashStore.verifyChecksumParameters("abc123", "SHA-DOU")); } /** @@ -1829,6 +1831,74 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { } } + /** + * Confirm that syncRenameMetadataDocForDeletion adds '_delete' to the given paths + */ + @Test + public void syncRenameMetadataDocForDeletion_renamesAsExpected() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + InputStream metaStream = Files.newInputStream(testMetaDataFile); + String pathToMetadata = fileHashStore.putMetadata(metaStream, pid, null); + String pathToMetadataTwo = fileHashStore.putMetadata(metaStream, pid, "ns.test.1"); + String pathToMetadataThree = + fileHashStore.putMetadata(metaStream, pid, "ns.test" + ".3"); + metaStream.close(); + + // Confirm that metadata documents are present + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, pidHexDigest + ); + Path expectedPidMetadataDirectory = + storePath.resolve("metadata").resolve(pidRelativePath); + List metadataDocPaths = + FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); + + assertEquals(3, metadataDocPaths.size()); + + Collection deleteList = + fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths); + + Collection renamedDocStrings = new ArrayList<>(); + for (Path renamedDoc : deleteList) { + renamedDocStrings.add(renamedDoc.toString()); + } + assertTrue(renamedDocStrings.contains(pathToMetadata + "_delete")); + assertTrue(renamedDocStrings.contains(pathToMetadataTwo + "_delete")); + assertTrue(renamedDocStrings.contains(pathToMetadataThree + "_delete")); + } + } + + /** + * Confirm that syncRenameMetadataDocForDeletion throws exception when supplied list is empty. + */ + @Test + public void syncRenameMetadataDocForDeletion_emptyList() { + Collection metadataDocPaths = new ArrayList<>(); + assertThrows( + IllegalArgumentException.class, + () -> fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths)); + } + + /** + * Confirm that syncRenameMetadataDocForDeletion throws exception when supplied list is null. + */ + @Test + public void syncRenameMetadataDocForDeletion_nullList() { + assertThrows( + IllegalArgumentException.class, + () -> fileHashStore.syncRenameMetadataDocForDeletion(null)); + } + /** * Confirm that isStringInRefsFile returns true when pid is found */ @@ -1872,8 +1942,6 @@ public void isStringInRefsFile_pidNotFound() throws Exception { } } - - /** * Confirm getHashStoreDataObjectPath returns correct object path */ From 24315f0de72dcaf6dd2022693c96fdaf502608c1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jul 2024 16:12:05 -0700 Subject: [PATCH 416/553] Create new package 'hashstoreconverter' and add new class 'FileHashStoreLinks' with basic constructor --- .../hashstoreconverter/FileHashStoreLinks.java | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java new file mode 100644 index 00000000..5124bd5f --- /dev/null +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -0,0 +1,11 @@ +package org.dataone.hashstore.hashstoreconverter; + +import java.util.Properties; + +public class FileHashStoreLinks { + + public FileHashStoreLinks(Properties hashstoreProperties) throws Exception { + super(); + } + +} From 8ceec6c7d43a4c81ee21390479b77b16a1849897 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jul 2024 17:10:01 -0700 Subject: [PATCH 417/553] Add 'FileHashStoreLinks' constructor, new junit test class 'FileHashStoreLinksInitTest' --- .../FileHashStoreLinks.java | 11 +- .../FileHashStoreLinksInitTest.java | 123 ++++++++++++++++++ 2 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 5124bd5f..fbc51281 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -1,11 +1,16 @@ package org.dataone.hashstore.hashstoreconverter; +import org.dataone.hashstore.filehashstore.FileHashStore; + +import java.io.IOException; +import java.security.NoSuchAlgorithmException; import java.util.Properties; -public class FileHashStoreLinks { +public class FileHashStoreLinks extends FileHashStore { - public FileHashStoreLinks(Properties hashstoreProperties) throws Exception { - super(); + public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgumentException, + IOException, NoSuchAlgorithmException { + super(hashstoreProperties); } } diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java new file mode 100644 index 00000000..3343ec31 --- /dev/null +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java @@ -0,0 +1,123 @@ +package org.dataone.hashstore.hashstoreconverter; + +import org.dataone.hashstore.filehashstore.FileHashStore; +import org.dataone.hashstore.testdata.TestDataHarness; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +public class FileHashStoreLinksInitTest { + + private static Path rootDirectory; + private static Path objStringFull; + private static Path objTmpStringFull; + private static Path metadataStringFull; + private static Path metadataTmpStringFull; + private static FileHashStoreLinks fileHashStoreLinks; + + /** + * Initialize FileHashStore + */ + @BeforeEach + public void initializeFileHashStore() { + Path root = tempFolder; + rootDirectory = root.resolve("hashstore"); + objStringFull = rootDirectory.resolve("objects"); + objTmpStringFull = rootDirectory.resolve("objects/tmp"); + metadataStringFull = rootDirectory.resolve("metadata"); + metadataTmpStringFull = rootDirectory.resolve("metadata/tmp"); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + try { + fileHashStoreLinks = new FileHashStoreLinks(storeProperties); + + } catch (IOException e) { + e.printStackTrace(); + fail("IOException encountered: " + e.getMessage()); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + } + + /** + * Temporary folder for tests to run in + */ + @TempDir + public Path tempFolder; + + /** + * Check object store and tmp directories are created after initialization + */ + @Test + public void initObjDirectories() { + Path checkObjectStorePath = objStringFull; + assertTrue(Files.isDirectory(checkObjectStorePath)); + Path checkTmpPath = objTmpStringFull; + assertTrue(Files.isDirectory(checkTmpPath)); + } + + /** + * Check metadata store and tmp directories are created after initialization + */ + @Test + public void initMetadataDirectories() { + Path checkMetadataStorePath = metadataStringFull; + assertTrue(Files.isDirectory(checkMetadataStorePath)); + Path checkMetadataTmpPath = metadataTmpStringFull; + assertTrue(Files.isDirectory(checkMetadataTmpPath)); + } + + /** + * Check refs tmp, pid and cid directories are created after initialization + */ + @Test + public void initRefsDirectories() { + Path refsPath = rootDirectory.resolve("refs"); + assertTrue(Files.isDirectory(refsPath)); + Path refsTmpPath = rootDirectory.resolve("refs/tmp"); + assertTrue(Files.isDirectory(refsTmpPath)); + Path refsPidPath = rootDirectory.resolve("refs/pids"); + assertTrue(Files.isDirectory(refsPidPath)); + Path refsCidPath = rootDirectory.resolve("refs/cids"); + assertTrue(Files.isDirectory(refsCidPath)); + } + + /** + * Test FileHashStore instantiates with matching config + */ + @Test + public void testExistingHashStoreConfiguration_sameConfig() throws Exception { + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + new FileHashStore(storeProperties); + } +} From 713bda82d5af53eb5e134d61f526267f56cb1a0c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 24 Jul 2024 17:10:54 -0700 Subject: [PATCH 418/553] Add logging module to 'FileHashStoreLinks' --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index fbc51281..4925363e 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -1,5 +1,7 @@ package org.dataone.hashstore.hashstoreconverter; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.filehashstore.FileHashStore; import java.io.IOException; @@ -8,9 +10,12 @@ public class FileHashStoreLinks extends FileHashStore { + private static final Log logFileHashStoreLinks = LogFactory.getLog(FileHashStore.class); + public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { super(hashstoreProperties); + logFileHashStoreLinks.info("FileHashStoreLinks initialized"); } } From e3b40164f79abd2c5deb5ce147d883c6005506eb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 12:01:25 -0700 Subject: [PATCH 419/553] Change access modifier to public for 'enum DefaultHashAlgorithms' in 'FileHashStore', and add new method 'generateChecksums' in 'FileHashStoreLinks' --- .../filehashstore/FileHashStore.java | 2 +- .../FileHashStoreLinks.java | 124 ++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b56c6163..299dd1dc 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -94,7 +94,7 @@ public String getName() { } } - enum DefaultHashAlgorithms { + public enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); final String algoName; diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 4925363e..4ace0e13 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -2,10 +2,25 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.filehashstore.FileHashStore; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; +import javax.xml.bind.DatatypeConverter; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.nio.file.AtomicMoveNotSupportedException; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.Map; import java.util.Properties; public class FileHashStoreLinks extends FileHashStore { @@ -18,4 +33,113 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument logFileHashStoreLinks.info("FileHashStoreLinks initialized"); } + /** + * Write the input stream into a given file (tmpFile) and return a HashMap consisting of + * algorithms and their respective hex digests. If an additional algorithm is supplied and + * supported, it and its checksum value will be included in the hex digests map. Default + * algorithms: MD5, SHA-1, SHA-256, SHA-384, SHA-512 + * + * @param tmpFile file to write input stream data into + * @param dataStream input stream of data to store + * @param additionalAlgorithm additional algorithm to include in hex digest map + * @param checksumAlgorithm checksum algorithm to calculate hex digest for to verifying + * object + * @return A map containing the hex digests of the default algorithms + * @throws NoSuchAlgorithmException Unable to generate new instance of supplied algorithm + * @throws IOException Issue with writing file from InputStream + * @throws SecurityException Unable to write to tmpFile + * @throws FileNotFoundException tmpFile cannot be found + */ + protected Map generateChecksums(InputStream dataStream, + String additionalAlgorithm, String checksumAlgorithm + ) throws NoSuchAlgorithmException, IOException, SecurityException { + // Determine whether to calculate additional or checksum algorithms + boolean generateAddAlgo = false; + if (additionalAlgorithm != null) { + FileHashStoreUtility.checkForEmptyAndValidString( + additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" + ); + validateAlgorithm(additionalAlgorithm); + generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); + } + boolean generateCsAlgo = false; + if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { + FileHashStoreUtility.checkForEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" + ); + validateAlgorithm(checksumAlgorithm); + generateCsAlgo = shouldCalculateAlgorithm(checksumAlgorithm); + } + + MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); + MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); + MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); + MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); + MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); + MessageDigest additionalAlgo = null; + MessageDigest checksumAlgo = null; + if (generateAddAlgo) { + logFileHashStoreLinks.debug( + "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); + additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); + } + if (generateCsAlgo) { + logFileHashStoreLinks.debug( + "Adding checksum algorithm to hex digest map, algorithm: " + checksumAlgorithm); + checksumAlgo = MessageDigest.getInstance(checksumAlgorithm); + } + + // Calculate hex digests + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = dataStream.read(buffer)) != -1) { + md5.update(buffer, 0, bytesRead); + sha1.update(buffer, 0, bytesRead); + sha256.update(buffer, 0, bytesRead); + sha384.update(buffer, 0, bytesRead); + sha512.update(buffer, 0, bytesRead); + if (generateAddAlgo) { + additionalAlgo.update(buffer, 0, bytesRead); + } + if (generateCsAlgo) { + checksumAlgo.update(buffer, 0, bytesRead); + } + } + + } catch (IOException ioe) { + String errMsg = "Unexpected Exception ~ " + ioe.getMessage(); + logFileHashStoreLinks.error(errMsg); + throw ioe; + + } finally { + dataStream.close(); + } + + // Create map of hash algorithms and corresponding hex digests + Map hexDigests = new HashMap<>(); + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); + String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); + String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); + hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); + if (generateAddAlgo) { + String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) + .toLowerCase(); + hexDigests.put(additionalAlgorithm, extraAlgoDigest); + } + if (generateCsAlgo) { + String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) + .toLowerCase(); + hexDigests.put(checksumAlgorithm, extraChecksumDigest); + } + + return hexDigests; + } + } From f7e3b91329247951ca54011f36d14c27df82d651 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 13:10:09 -0700 Subject: [PATCH 420/553] Rename 'FileHashStoreLinksInitTest' class to 'FileHashStoreLinks', add new junit test for 'generateChecksums' --- .../FileHashStoreLinks.java | 9 ++--- ...tTest.java => FileHashStoreLinksTest.java} | 39 +++++++++++++++++-- 2 files changed, 39 insertions(+), 9 deletions(-) rename src/test/java/org/dataone/hashstore/hashstoreconverter/{FileHashStoreLinksInitTest.java => FileHashStoreLinksTest.java} (74%) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 4ace0e13..14a2752f 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -34,12 +34,11 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument } /** - * Write the input stream into a given file (tmpFile) and return a HashMap consisting of - * algorithms and their respective hex digests. If an additional algorithm is supplied and - * supported, it and its checksum value will be included in the hex digests map. Default - * algorithms: MD5, SHA-1, SHA-256, SHA-384, SHA-512 + * Get a HashMap consisting of algorithms and their respective hex digests for a given + * data stream. If an additional algorithm is supplied and supported, it and its checksum + * value will be included in the hex digests map. Default algorithms: MD5, SHA-1, SHA-256, + * SHA-384, SHA-512 * - * @param tmpFile file to write input stream data into * @param dataStream input stream of data to store * @param additionalAlgorithm additional algorithm to include in hex digest map * @param checksumAlgorithm checksum algorithm to calculate hex digest for to verifying diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java similarity index 74% rename from src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java rename to src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 3343ec31..273bc75c 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksInitTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -1,30 +1,32 @@ package org.dataone.hashstore.hashstoreconverter; import org.dataone.hashstore.filehashstore.FileHashStore; -import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; -import java.util.HashMap; +import java.util.Map; import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; +import org.dataone.hashstore.testdata.TestDataHarness; -public class FileHashStoreLinksInitTest { +public class FileHashStoreLinksTest { private static Path rootDirectory; private static Path objStringFull; private static Path objTmpStringFull; private static Path metadataStringFull; private static Path metadataTmpStringFull; + private static final TestDataHarness testData = new TestDataHarness(); private static FileHashStoreLinks fileHashStoreLinks; /** @@ -120,4 +122,33 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { new FileHashStore(storeProperties); } + + /** + * Confirm that generateChecksums calculates checksums as expected + */ + @Test + public void testGenerateChecksums() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + // Get test file + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + Map hexDigests = + fileHashStoreLinks.generateChecksums(dataStream, null, null); + dataStream.close(); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } + } } From 55f3677fc0299f3a90f8f93573e9065364d46208 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 13:10:28 -0700 Subject: [PATCH 421/553] Clean up code: remove unused imports --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 14a2752f..96dd6dd4 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -2,21 +2,13 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dataone.hashstore.ObjectMetadata; -import org.dataone.hashstore.exceptions.PidRefsFileExistsException; import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import javax.xml.bind.DatatypeConverter; -import java.io.File; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.file.AtomicMoveNotSupportedException; -import java.nio.file.FileAlreadyExistsException; -import java.nio.file.Files; -import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.HashMap; From 79b29da2746c6af7a142e9e49030905380177c74 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 15:08:20 -0700 Subject: [PATCH 422/553] Add new method 'storeHardLink' to 'FileHashStoreLinks', new accessor method 'getHashStoreLinksDataObjectPath' and junit test for 'storeHardLink' --- .../filehashstore/FileHashStore.java | 2 +- .../FileHashStoreLinks.java | 106 ++++++++++++++++++ .../FileHashStoreLinksTest.java | 32 +++++- 3 files changed, 138 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 299dd1dc..3e692d80 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -108,7 +108,7 @@ public String getName() { } } - enum HashStoreProperties { + public enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 96dd6dd4..fd949f88 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -2,13 +2,19 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import javax.xml.bind.DatatypeConverter; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.HashMap; @@ -18,13 +24,113 @@ public class FileHashStoreLinks extends FileHashStore { private static final Log logFileHashStoreLinks = LogFactory.getLog(FileHashStore.class); + private final Path STORE_ROOT; + private final int DIRECTORY_DEPTH; + private final int DIRECTORY_WIDTH; + private final String OBJECT_STORE_ALGORITHM; + private final Path OBJECT_STORE_DIRECTORY; + private final String DEFAULT_METADATA_NAMESPACE; public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { super(hashstoreProperties); + // If configuration matches, set FileHashStoreLinks private variables + Path storePath = Paths.get( + hashstoreProperties.getProperty(HashStoreProperties.storePath.name()) + ); + int storeDepth = Integer.parseInt( + hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name()) + ); + int storeWidth = Integer.parseInt( + hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name()) + ); + String storeAlgorithm = hashstoreProperties.getProperty( + HashStoreProperties.storeAlgorithm.name() + ); + String storeMetadataNamespace = hashstoreProperties.getProperty( + HashStoreProperties.storeMetadataNamespace.name() + ); + // + STORE_ROOT = storePath; + DIRECTORY_DEPTH = storeDepth; + DIRECTORY_WIDTH = storeWidth; + OBJECT_STORE_ALGORITHM = storeAlgorithm; + DEFAULT_METADATA_NAMESPACE = storeMetadataNamespace; + OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); logFileHashStoreLinks.info("FileHashStoreLinks initialized"); } + /** + * Creates a hard link to a data file and return a ObjectMetadata of the given data object. + * + * @param filePath Path to the source file which a hard link will be created for + * @param fileStream Stream to the source file to calculate checksums for + * @param pid Persistent or authority-based identifier for tagging + * @return ObjectMetadata encapsulating information about the data file + * @throws NoSuchAlgorithmException Issue with one of the hashing algorithms to calculate + * @throws IOException An issue with reading from the given file stream + * @throws InterruptedException Sync issue when tagging pid and cid + */ + public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, String pid) + throws NoSuchAlgorithmException, IOException, InterruptedException { + // Validate input parameters + FileHashStoreUtility.ensureNotNull(filePath, "filePath", "storeHardLink"); + FileHashStoreUtility.ensureNotNull(fileStream, "fileStream", "storeHardLink"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "storeHardLink"); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeHardLink"); + if (!Files.exists(filePath)) { + String errMsg = "Given file path: " + filePath + " does not exist."; + throw new FileNotFoundException(errMsg); + } + + try { + Map hexDigests = generateChecksums(fileStream, null, null); + // Gather the elements to form the permanent address + String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid + ); + Path objHardLinkPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + // Create parent directories to the hard link, otherwise + // Files.createLink will throw a NoSuchFileException + File destinationDirectory = new File(objHardLinkPath.toFile().getParent()); + if (!destinationDirectory.exists()) { + Path destinationDirectoryPath = destinationDirectory.toPath(); + try { + Files.createDirectories(destinationDirectoryPath); + + } catch (FileAlreadyExistsException faee) { + logFileHashStoreLinks.warn( + "Directory already exists at: " + destinationDirectoryPath + + " - Skipping directory creation"); + } + } + // Finish the contract + Files.createLink(objHardLinkPath, filePath); + // This method is thread safe and synchronized + tagObject(pid, objectCid); + + return new ObjectMetadata(pid, objectCid, Files.size(objHardLinkPath), hexDigests); + + } finally { + // Close stream + fileStream.close(); + } + } + + /** + * Get a HashStore data object path + * + * @param pid Persistent or authority-based identifier + * @return Path to a HashStore data object + * @throws NoSuchAlgorithmException Conflicting algorithm preventing calculation of the path + * @throws IOException If there is an issue with reading from the pid refs file + */ + protected Path getHashStoreLinksDataObjectPath(String pid) + throws NoSuchAlgorithmException, IOException { + return getHashStoreDataObjectPath(pid); + } + /** * Get a HashMap consisting of algorithms and their respective hex digests for a given * data stream. If an additional algorithm is supplied and supported, it and its checksum diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 273bc75c..51bb1238 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -1,15 +1,16 @@ package org.dataone.hashstore.hashstoreconverter; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.filehashstore.FileHashStore; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Properties; @@ -123,6 +124,35 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { new FileHashStore(storeProperties); } + /** + * Check that store hard link creates hard link and returns the correct ObjectMetadata cid + */ + @Test + public void storeHardLink() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + assertTrue(Files.exists(testDataFile)); + + InputStream dataStream = Files.newInputStream(testDataFile); + ObjectMetadata objInfo = + fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid); + dataStream.close(); + + // Check id (content identifier based on the store algorithm) + String objectCid = testData.pidData.get(pid).get("sha256"); + assertEquals(objectCid, objInfo.getCid()); + assertEquals(pid, objInfo.getPid()); + + Path objPath = fileHashStoreLinks.getHashStoreLinksDataObjectPath(pid); + + // Verify that a hard link has been created + BasicFileAttributes fileAttributes = Files.readAttributes(objPath, BasicFileAttributes.class); + BasicFileAttributes originalFileAttributes = Files.readAttributes(testDataFile, BasicFileAttributes.class); + assertEquals(fileAttributes.fileKey(), originalFileAttributes.fileKey()); + } + } + /** * Confirm that generateChecksums calculates checksums as expected */ From 745adac28301d0e7e52d6e4c176c1b887c077105 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 15:10:00 -0700 Subject: [PATCH 423/553] Clean up 'FileHashStoreLinks' class, remove unused variables --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index fd949f88..d9eade14 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -24,12 +24,10 @@ public class FileHashStoreLinks extends FileHashStore { private static final Log logFileHashStoreLinks = LogFactory.getLog(FileHashStore.class); - private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; private final String OBJECT_STORE_ALGORITHM; private final Path OBJECT_STORE_DIRECTORY; - private final String DEFAULT_METADATA_NAMESPACE; public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { @@ -47,15 +45,9 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument String storeAlgorithm = hashstoreProperties.getProperty( HashStoreProperties.storeAlgorithm.name() ); - String storeMetadataNamespace = hashstoreProperties.getProperty( - HashStoreProperties.storeMetadataNamespace.name() - ); - // - STORE_ROOT = storePath; DIRECTORY_DEPTH = storeDepth; DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; - DEFAULT_METADATA_NAMESPACE = storeMetadataNamespace; OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); logFileHashStoreLinks.info("FileHashStoreLinks initialized"); } From 0c7a630d6ed022dbccd0579750ff982f011350d4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 15:14:47 -0700 Subject: [PATCH 424/553] Add missing logging statement for 'storeHardLink' --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index d9eade14..540a5861 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -101,6 +101,9 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin Files.createLink(objHardLinkPath, filePath); // This method is thread safe and synchronized tagObject(pid, objectCid); + logFileHashStoreLinks.info( + "Hard link has been created for pid:" + pid + " with cid: " + objectCid + + " has been tagged"); return new ObjectMetadata(pid, objectCid, Files.size(objHardLinkPath), hexDigests); From c5e37b2809aee83c8411451c671d8d1ca1e16435 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 15:17:55 -0700 Subject: [PATCH 425/553] Add missing class javadoc for 'FileHashStoreLinks' --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 540a5861..f7a4cb26 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -21,6 +21,10 @@ import java.util.Map; import java.util.Properties; +/** + * FileHashStoreLinks is an extension of FileHashStore that provides the client the ability + * to store a hard link instead of storing a data object. + */ public class FileHashStoreLinks extends FileHashStore { private static final Log logFileHashStoreLinks = LogFactory.getLog(FileHashStore.class); From ad09376b9ecfdd7610ece86dfd45f878f5dcc0b0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 26 Jul 2024 15:21:26 -0700 Subject: [PATCH 426/553] Fix wording in javadoc --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index f7a4cb26..80fcc307 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -22,7 +22,7 @@ import java.util.Properties; /** - * FileHashStoreLinks is an extension of FileHashStore that provides the client the ability + * FileHashStoreLinks is an extension of FileHashStore that provides the client with the ability * to store a hard link instead of storing a data object. */ public class FileHashStoreLinks extends FileHashStore { From e9310ea7078d64570afa9a910a564d42cbedcf21 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Sat, 27 Jul 2024 19:17:37 -0700 Subject: [PATCH 427/553] Add new HashStoreConverter class, public convert method and todo items --- .../HashStoreConverter.java | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java new file mode 100644 index 00000000..36feaa75 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -0,0 +1,79 @@ +package org.dataone.hashstore.hashstoreconverter; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.hashstore.ObjectMetadata; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.NoSuchAlgorithmException; +import java.util.Properties; + +public class HashStoreConverter { + private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); + private FileHashStoreLinks fileHashStoreLinks; + + /** + * Properties to an existing HashStore are required to initialize HashStoreConverter. + * HashStoreConverter is a utility tool to assist clients to convert + * + * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, + * storeWidth, storeAlgorithm, storeMetadataNamespace + * @throws IOException Issue with directories or hashstore.yaml config + * @throws NoSuchAlgorithmException Unsupported FileHashStoreLinks algorithm + */ + public HashStoreConverter(Properties hashstoreProperties) + throws IOException, NoSuchAlgorithmException { + try { + fileHashStoreLinks = new FileHashStoreLinks(hashstoreProperties); + logHashStoreConverter.info("FileHashStoreLinks initialized"); + + } catch (IOException ioe) { + String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); + logHashStoreConverter.error(errMsg); + throw ioe; + + } catch (NoSuchAlgorithmException nsae) { + String errMsg = "Unexpected issue with an algorithm encountered: " + nsae.getMessage(); + logHashStoreConverter.error(errMsg); + throw nsae; + + } + } + + // TODO Finish Javadocs + /** + * Create a hard link in the specified hashstore for an existing data object. + * + * @param filePath Path to existing data object + * @param pid Persistent or authority-based identifier + * @param sysmetaStream Stream to sysmeta content to store + * @return + * @throws IOException + * @throws NoSuchAlgorithmException + * @throws InterruptedException + */ + public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream) + throws IOException, NoSuchAlgorithmException, InterruptedException { + // TODO Review flow + // TODO Add junit tests + + try { + InputStream fileStream = Files.newInputStream(filePath); + ObjectMetadata objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); + fileHashStoreLinks.storeMetadata(sysmetaStream, pid); + return objInfo; + } catch (IOException ioe) { + throw ioe; + // TODO + } catch (NoSuchAlgorithmException nsae) { + throw nsae; + // TODO + } catch (InterruptedException ie) { + throw ie; + // TODO + } + } +} From 7cfa4f5f1ed041ed31f7847c0b5ac4ffd7691d9f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 10:15:36 -0700 Subject: [PATCH 428/553] Clean-up code in 'HashStoreConverter' and add missing javadocs --- .../HashStoreConverter.java | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 36feaa75..fb566c2f 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -16,8 +16,10 @@ public class HashStoreConverter { private FileHashStoreLinks fileHashStoreLinks; /** - * Properties to an existing HashStore are required to initialize HashStoreConverter. - * HashStoreConverter is a utility tool to assist clients to convert + * HashStoreConverter is a utility tool to assist with ingesting existing data objects and + * their respective system metadata into HashStore directory by creating hard links instead of + * duplicating a data object. Properties to an existing or desired HashStore are required to + * initialize HashStoreConverter. * * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, * storeWidth, storeAlgorithm, storeMetadataNamespace @@ -36,44 +38,52 @@ public HashStoreConverter(Properties hashstoreProperties) throw ioe; } catch (NoSuchAlgorithmException nsae) { - String errMsg = "Unexpected issue with an algorithm encountered: " + nsae.getMessage(); + String errMsg = "A supplied algorithm is not supported: " + nsae.getMessage(); logHashStoreConverter.error(errMsg); throw nsae; } } - // TODO Finish Javadocs /** - * Create a hard link in the specified hashstore for an existing data object. + * Create a hard link in the specified hashstore directoryfor an existing data object. * - * @param filePath Path to existing data object - * @param pid Persistent or authority-based identifier + * @param filePath Path to existing data object + * @param pid Persistent or authority-based identifier * @param sysmetaStream Stream to sysmeta content to store - * @return - * @throws IOException - * @throws NoSuchAlgorithmException - * @throws InterruptedException + * @return ObjectMetadata for the given pid + * @throws IOException An issue with calculating checksums or storing sysmeta + * @throws NoSuchAlgorithmException An algorithm defined is not supported + * @throws InterruptedException Issue with synchronizing storing metadata */ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream) throws IOException, NoSuchAlgorithmException, InterruptedException { - // TODO Review flow - // TODO Add junit tests + logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); - try { - InputStream fileStream = Files.newInputStream(filePath); + try (InputStream fileStream = Files.newInputStream(filePath)) { ObjectMetadata objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); fileHashStoreLinks.storeMetadata(sysmetaStream, pid); return objInfo; + } catch (IOException ioe) { + String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); + logHashStoreConverter.error(errMsg); throw ioe; - // TODO + } catch (NoSuchAlgorithmException nsae) { + String errMsg = "A supplied algorithm is not supported: " + nsae.getMessage(); + logHashStoreConverter.error(errMsg); throw nsae; - // TODO + } catch (InterruptedException ie) { + String errMsg = + "Unexpected issue with synchronizing storing data objects or metadata: " + + ie.getMessage(); + logHashStoreConverter.error(errMsg); throw ie; - // TODO + + } finally { + sysmetaStream.close(); } } } From 641a73d021413849a695a66e59be125e73b88895 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 10:44:32 -0700 Subject: [PATCH 429/553] Add new 'HashstoreConverter' test class --- .../FileHashStoreLinksTest.java | 6 +- .../HashStoreConverterTest.java | 115 ++++++++++++++++++ 2 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 51bb1238..c16b2c77 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -20,15 +20,17 @@ import static org.junit.jupiter.api.Assertions.fail; import org.dataone.hashstore.testdata.TestDataHarness; +/** + * Test class for 'FileHashStoreLinks' + */ public class FileHashStoreLinksTest { - private static Path rootDirectory; private static Path objStringFull; private static Path objTmpStringFull; private static Path metadataStringFull; private static Path metadataTmpStringFull; private static final TestDataHarness testData = new TestDataHarness(); - private static FileHashStoreLinks fileHashStoreLinks; + private FileHashStoreLinks fileHashStoreLinks; /** * Initialize FileHashStore diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java new file mode 100644 index 00000000..b101fd83 --- /dev/null +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -0,0 +1,115 @@ +package org.dataone.hashstore.hashstoreconverter; + +import org.dataone.hashstore.testdata.TestDataHarness; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.NoSuchAlgorithmException; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Test class for HashStoreConverter + */ +public class HashStoreConverterTest { + private static Path rootDirectory; + private static Path objStringFull; + private static Path objTmpStringFull; + private static Path metadataStringFull; + private static Path metadataTmpStringFull; + private static final TestDataHarness testData = new TestDataHarness(); + private HashStoreConverter hashstoreConverter; + + /** + * Initialize FileHashStore + */ + @BeforeEach + public void initializeFileHashStore() { + Path root = tempFolder; + rootDirectory = root.resolve("hashstore"); + objStringFull = rootDirectory.resolve("objects"); + objTmpStringFull = rootDirectory.resolve("objects/tmp"); + metadataStringFull = rootDirectory.resolve("metadata"); + metadataTmpStringFull = rootDirectory.resolve("metadata/tmp"); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + try { + hashstoreConverter = new HashStoreConverter(storeProperties); + + } catch (IOException e) { + e.printStackTrace(); + fail("IOException encountered: " + e.getMessage()); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + } + + /** + * Temporary folder for tests to run in + */ + @TempDir + public Path tempFolder; + + /** + * Check object store and tmp directories exist after initialization + */ + @Test + public void initObjDirectories() { + Path checkObjectStorePath = objStringFull; + assertTrue(Files.isDirectory(checkObjectStorePath)); + Path checkTmpPath = objTmpStringFull; + assertTrue(Files.isDirectory(checkTmpPath)); + } + + /** + * Check HashStoreConverter initializes with existing HashStore directory, does not throw + * exception + */ + @Test + public void hashStoreConverter_hashStoreExists() { + Path root = tempFolder; + rootDirectory = root.resolve("hashstore"); + objStringFull = rootDirectory.resolve("objects"); + objTmpStringFull = rootDirectory.resolve("objects/tmp"); + metadataStringFull = rootDirectory.resolve("metadata"); + metadataTmpStringFull = rootDirectory.resolve("metadata/tmp"); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" + ); + + try { + HashStoreConverter hashstoreConverterTwo = new HashStoreConverter(storeProperties); + + } catch (IOException e) { + e.printStackTrace(); + fail("IOException encountered: " + e.getMessage()); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + } + +} From 06a9eab0d392f219b0b322c60c7a2a6fdaa2bb83 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 11:07:32 -0700 Subject: [PATCH 430/553] Catch and swallow 'FileAlreadyExistsException' in 'storeHardLink' and add new junit tests --- .../FileHashStoreLinks.java | 11 +++++++-- .../FileHashStoreLinksTest.java | 23 +++++++++++++++++-- .../HashStoreConverterTest.java | 2 +- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 80fcc307..18bc667f 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -102,12 +102,19 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin } } // Finish the contract - Files.createLink(objHardLinkPath, filePath); + try { + Files.createLink(objHardLinkPath, filePath); + + } catch (FileAlreadyExistsException faee) { + logFileHashStoreLinks.warn( + "Data object already exists at: " + objHardLinkPath); + } + // This method is thread safe and synchronized tagObject(pid, objectCid); logFileHashStoreLinks.info( "Hard link has been created for pid:" + pid + " with cid: " + objectCid - + " has been tagged"); + + ", and has been tagged"); return new ObjectMetadata(pid, objectCid, Files.size(objHardLinkPath), hexDigests); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index c16b2c77..e21733ff 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -36,7 +36,7 @@ public class FileHashStoreLinksTest { * Initialize FileHashStore */ @BeforeEach - public void initializeFileHashStore() { + public void initializeFileHashStoreLinks() { Path root = tempFolder; rootDirectory = root.resolve("hashstore"); objStringFull = rootDirectory.resolve("objects"); @@ -127,7 +127,7 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { } /** - * Check that store hard link creates hard link and returns the correct ObjectMetadata cid + * Check that storeHardLink creates hard link and returns the correct ObjectMetadata cid */ @Test public void storeHardLink() throws Exception { @@ -155,6 +155,25 @@ public void storeHardLink() throws Exception { } } + /** + * Check that storeHardLink does not throw exception when a hard link already exists + */ + @Test + public void storeHardLink_alreadyExists() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + assertTrue(Files.exists(testDataFile)); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid); + dataStream.close(); + InputStream dataStreamTwo = Files.newInputStream(testDataFile); + fileHashStoreLinks.storeHardLink(testDataFile, dataStreamTwo, pid+".test.pid"); + dataStreamTwo.close(); + } + } + /** * Confirm that generateChecksums calculates checksums as expected */ diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index b101fd83..398047cb 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -100,7 +100,7 @@ public void hashStoreConverter_hashStoreExists() { ); try { - HashStoreConverter hashstoreConverterTwo = new HashStoreConverter(storeProperties); + new HashStoreConverter(storeProperties); } catch (IOException e) { e.printStackTrace(); From 4f3e8fbfd7037c77eda56050eb08742aa12323be Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 11:21:05 -0700 Subject: [PATCH 431/553] Add new junit test for 'HashStoreConverter' convert method --- .../HashStoreConverterTest.java | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index 398047cb..2bd0ed21 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -1,16 +1,21 @@ package org.dataone.hashstore.hashstoreconverter; +import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; import java.security.NoSuchAlgorithmException; +import java.util.Map; import java.util.Properties; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -112,4 +117,41 @@ public void hashStoreConverter_hashStoreExists() { } } + /** + * Check that convert creates hard link, stores sysmeta and returns the correct ObjectMetadata + */ + @Test + public void convert() throws Exception { + for (String pid : testData.pidList) { + // Path to test harness data file + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + + ObjectMetadata objInfo = + hashstoreConverter.convert(testDataFile, pid, sysmetaStream); + sysmetaStream.close(); + + // Check checksums + Map hexDigests = objInfo.getHexDigests(); + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + assertEquals(sha256, objInfo.getCid()); + assertEquals(pid, objInfo.getPid()); + + // Metadata is stored directly through 'FileHashStore' + // Creation of hard links is confirmed via 'FileHashStoreLinks' + } + } + } From 51b3c5a3db69b31505371b9e3a4ae098fb905359 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 11:29:21 -0700 Subject: [PATCH 432/553] Revise 'HashStoreConverter' javadocs to improve clarity --- .../hashstoreconverter/HashStoreConverter.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index fb566c2f..aee32a8e 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -11,15 +11,18 @@ import java.security.NoSuchAlgorithmException; import java.util.Properties; +/** + * HashStoreConverter is a utility tool to assist with ingesting existing data objects and their + * respective system metadata into a HashStore by creating hard links instead of duplicating for + * the data object. + */ public class HashStoreConverter { private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); private FileHashStoreLinks fileHashStoreLinks; /** - * HashStoreConverter is a utility tool to assist with ingesting existing data objects and - * their respective system metadata into HashStore directory by creating hard links instead of - * duplicating a data object. Properties to an existing or desired HashStore are required to - * initialize HashStoreConverter. + * Constructor to initialize HashStoreConverter. Properties to an existing or desired HashStore + * are required. * * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, * storeWidth, storeAlgorithm, storeMetadataNamespace @@ -46,7 +49,9 @@ public HashStoreConverter(Properties hashstoreProperties) } /** - * Create a hard link in the specified hashstore directoryfor an existing data object. + * Take an existing path to a data object, store it into a new or existing HashStore via a + * hard link (to save disk space), store the supplied system metadata and return the + * ObjectMetadata for the data object. * * @param filePath Path to existing data object * @param pid Persistent or authority-based identifier From d9747d7d3479e1ac22f2530335b0a794cfc1492e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 11:39:10 -0700 Subject: [PATCH 433/553] Update javadoc for 'HashStoreConverter' --- .../hashstore/hashstoreconverter/HashStoreConverter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index aee32a8e..c95a6b86 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -13,8 +13,8 @@ /** * HashStoreConverter is a utility tool to assist with ingesting existing data objects and their - * respective system metadata into a HashStore by creating hard links instead of duplicating for - * the data object. + * respective system metadata into a HashStore. Instead of duplicating data objects and writing + * ObjectMetadata. */ public class HashStoreConverter { private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); From 4a02ffb75bd78f8b6f6f98e8e095c80b0a60aee3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 11:41:25 -0700 Subject: [PATCH 434/553] Remove unused imports --- .../hashstore/hashstoreconverter/HashStoreConverterTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index 2bd0ed21..bb2736dc 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -10,7 +10,6 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.attribute.BasicFileAttributes; import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Properties; From 123d71427843fe52384d6427faec31dc7d580fa2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 14:40:40 -0700 Subject: [PATCH 435/553] Further revise javadocs --- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 6 ++++-- .../hashstore/hashstoreconverter/HashStoreConverter.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 18bc667f..c93b41c8 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -23,7 +23,9 @@ /** * FileHashStoreLinks is an extension of FileHashStore that provides the client with the ability - * to store a hard link instead of storing a data object. + * to store a hard link instead of storing a data object. This is desirable when a directory with + * data objects already exists to optimize disk usage, and is more performant since there is no + * write operation. */ public class FileHashStoreLinks extends FileHashStore { @@ -101,7 +103,7 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin + " - Skipping directory creation"); } } - // Finish the contract + try { Files.createLink(objHardLinkPath, filePath); diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index c95a6b86..1eff5a6f 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -33,7 +33,7 @@ public HashStoreConverter(Properties hashstoreProperties) throws IOException, NoSuchAlgorithmException { try { fileHashStoreLinks = new FileHashStoreLinks(hashstoreProperties); - logHashStoreConverter.info("FileHashStoreLinks initialized"); + logHashStoreConverter.info("HashStoreConverter initialized"); } catch (IOException ioe) { String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); From d102c246b67aec3cf5b13d222c9b429e1c439a05 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 29 Jul 2024 15:08:10 -0700 Subject: [PATCH 436/553] Cleanup and remove redundant junit tests, add missing javadocs --- .../FileHashStoreLinks.java | 10 ++++++++ .../HashStoreConverter.java | 5 ++-- .../FileHashStoreLinksTest.java | 6 +++++ .../HashStoreConverterTest.java | 24 ------------------- 4 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index c93b41c8..5cef0d68 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -35,6 +35,16 @@ public class FileHashStoreLinks extends FileHashStore { private final String OBJECT_STORE_ALGORITHM; private final Path OBJECT_STORE_DIRECTORY; + /** + * Constructor for FireHashStoreLinks. HashStore properties are required. + * + * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, + * storeWidth, storeAlgorithm, storeMetadataNamespace + * @throws IllegalArgumentException If there is an issue with one of the properties supplied + * @throws IOException An issue with reading or writing a hashstore.yaml + * configuration file + * @throws NoSuchAlgorithmException If an algorithm in the properties is not supported + */ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { super(hashstoreProperties); diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 1eff5a6f..870bc04b 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -13,8 +13,9 @@ /** * HashStoreConverter is a utility tool to assist with ingesting existing data objects and their - * respective system metadata into a HashStore. Instead of duplicating data objects and writing - * ObjectMetadata. + * respective system metadata into a HashStore. Instead of duplicating data objects (that already + * exists), HashStoreConverter provide a simple API to create a hard link to the data object with + * the expected HashStore object path. */ public class HashStoreConverter { private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index e21733ff..832e9d72 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -29,6 +29,9 @@ public class FileHashStoreLinksTest { private static Path objTmpStringFull; private static Path metadataStringFull; private static Path metadataTmpStringFull; + private static Path refStringFull; + private static Path refPidsStringFull; + private static Path refCidsStringFull; private static final TestDataHarness testData = new TestDataHarness(); private FileHashStoreLinks fileHashStoreLinks; @@ -43,6 +46,9 @@ public void initializeFileHashStoreLinks() { objTmpStringFull = rootDirectory.resolve("objects/tmp"); metadataStringFull = rootDirectory.resolve("metadata"); metadataTmpStringFull = rootDirectory.resolve("metadata/tmp"); + refStringFull = rootDirectory.resolve("refs"); + refPidsStringFull = rootDirectory.resolve("refs/pids"); + refCidsStringFull = rootDirectory.resolve("refs/cids"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index bb2736dc..c6e71926 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -15,7 +15,6 @@ import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; /** @@ -23,10 +22,6 @@ */ public class HashStoreConverterTest { private static Path rootDirectory; - private static Path objStringFull; - private static Path objTmpStringFull; - private static Path metadataStringFull; - private static Path metadataTmpStringFull; private static final TestDataHarness testData = new TestDataHarness(); private HashStoreConverter hashstoreConverter; @@ -37,10 +32,6 @@ public class HashStoreConverterTest { public void initializeFileHashStore() { Path root = tempFolder; rootDirectory = root.resolve("hashstore"); - objStringFull = rootDirectory.resolve("objects"); - objTmpStringFull = rootDirectory.resolve("objects/tmp"); - metadataStringFull = rootDirectory.resolve("metadata"); - metadataTmpStringFull = rootDirectory.resolve("metadata/tmp"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -70,17 +61,6 @@ public void initializeFileHashStore() { @TempDir public Path tempFolder; - /** - * Check object store and tmp directories exist after initialization - */ - @Test - public void initObjDirectories() { - Path checkObjectStorePath = objStringFull; - assertTrue(Files.isDirectory(checkObjectStorePath)); - Path checkTmpPath = objTmpStringFull; - assertTrue(Files.isDirectory(checkTmpPath)); - } - /** * Check HashStoreConverter initializes with existing HashStore directory, does not throw * exception @@ -89,10 +69,6 @@ public void initObjDirectories() { public void hashStoreConverter_hashStoreExists() { Path root = tempFolder; rootDirectory = root.resolve("hashstore"); - objStringFull = rootDirectory.resolve("objects"); - objTmpStringFull = rootDirectory.resolve("objects/tmp"); - metadataStringFull = rootDirectory.resolve("metadata"); - metadataTmpStringFull = rootDirectory.resolve("metadata/tmp"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); From 6f416b37b012a68db8d82587318528d6140d6ac5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jul 2024 15:28:08 -0700 Subject: [PATCH 437/553] Refactor 'convert' method to conditionally store the data object or sysmeta, which may not always be available --- .../HashStoreConverter.java | 64 +++++++++++++------ 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 870bc04b..ba1a2729 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -3,6 +3,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import java.io.IOException; import java.io.InputStream; @@ -54,9 +55,12 @@ public HashStoreConverter(Properties hashstoreProperties) * hard link (to save disk space), store the supplied system metadata and return the * ObjectMetadata for the data object. * + * A 'filePath' may be null, in which case a data object will not be stored. The + * 'sysMetaStream' may be null, in which case the sysmeta will not be stored. + * * @param filePath Path to existing data object * @param pid Persistent or authority-based identifier - * @param sysmetaStream Stream to sysmeta content to store + * @param sysmetaStream Stream to sysmeta content to store. * @return ObjectMetadata for the given pid * @throws IOException An issue with calculating checksums or storing sysmeta * @throws NoSuchAlgorithmException An algorithm defined is not supported @@ -65,31 +69,49 @@ public HashStoreConverter(Properties hashstoreProperties) public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream) throws IOException, NoSuchAlgorithmException, InterruptedException { logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); + FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); + boolean storeHardlink = filePath != null; + boolean storeSysmeta = sysmetaStream != null; + ObjectMetadata objInfo = null; - try (InputStream fileStream = Files.newInputStream(filePath)) { - ObjectMetadata objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); - fileHashStoreLinks.storeMetadata(sysmetaStream, pid); - return objInfo; + if (storeHardlink) { + try (InputStream fileStream = Files.newInputStream(filePath)) { + objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); + logHashStoreConverter.info("Stored data object for pid: " + pid); - } catch (IOException ioe) { - String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); - logHashStoreConverter.error(errMsg); - throw ioe; + } catch (IOException ioe) { + String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); + logHashStoreConverter.error(errMsg); + throw ioe; - } catch (NoSuchAlgorithmException nsae) { - String errMsg = "A supplied algorithm is not supported: " + nsae.getMessage(); - logHashStoreConverter.error(errMsg); - throw nsae; + } catch (NoSuchAlgorithmException nsae) { + String errMsg = "A supplied algorithm is not supported: " + nsae.getMessage(); + logHashStoreConverter.error(errMsg); + throw nsae; - } catch (InterruptedException ie) { - String errMsg = - "Unexpected issue with synchronizing storing data objects or metadata: " - + ie.getMessage(); - logHashStoreConverter.error(errMsg); - throw ie; + } catch (InterruptedException ie) { + String errMsg = + "Unexpected issue with synchronizing storing data objects or metadata: " + + ie.getMessage(); + logHashStoreConverter.error(errMsg); + throw ie; + } + } else { + String warnMsg = "Supplied filePath is null, not storing data object."; + logHashStoreConverter.warn(warnMsg); + } - } finally { - sysmetaStream.close(); + if (storeSysmeta) { + try { + fileHashStoreLinks.storeMetadata(sysmetaStream, pid); + } finally { + sysmetaStream.close(); + } + } else { + String warnMsg = "Supplied sysmeta Stream is null, not storing sysmeta for pid: " + pid; + logHashStoreConverter.warn(warnMsg); } + + return objInfo; } } From 82446b8bb57ddb99cf5293f9fdb28c5addd1c31d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jul 2024 15:43:58 -0700 Subject: [PATCH 438/553] Refactor 'convert' method to check for null sysmeta stream (it can never be null) and add junit test --- .../hashstoreconverter/HashStoreConverter.java | 18 +++++++----------- .../HashStoreConverterTest.java | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index ba1a2729..0bd4f254 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -69,10 +69,11 @@ public HashStoreConverter(Properties hashstoreProperties) public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream) throws IOException, NoSuchAlgorithmException, InterruptedException { logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); + FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream", "convert"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); - boolean storeHardlink = filePath != null; - boolean storeSysmeta = sysmetaStream != null; + ObjectMetadata objInfo = null; + boolean storeHardlink = filePath != null; if (storeHardlink) { try (InputStream fileStream = Files.newInputStream(filePath)) { @@ -101,15 +102,10 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre logHashStoreConverter.warn(warnMsg); } - if (storeSysmeta) { - try { - fileHashStoreLinks.storeMetadata(sysmetaStream, pid); - } finally { - sysmetaStream.close(); - } - } else { - String warnMsg = "Supplied sysmeta Stream is null, not storing sysmeta for pid: " + pid; - logHashStoreConverter.warn(warnMsg); + try { + fileHashStoreLinks.storeMetadata(sysmetaStream, pid); + } finally { + sysmetaStream.close(); } return objInfo; diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index c6e71926..049344a8 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -15,6 +15,7 @@ import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.fail; /** @@ -129,4 +130,19 @@ public void convert() throws Exception { } } + /** + * Check that convert throws exception when sysmeta stream is null + */ + @Test + public void convert_nullSysmetaStream() { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + InputStream sysmetaStream = null; + + assertThrows( + IllegalArgumentException.class, () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStream) + ); + } + } } From cb803d6359bb5936e8ca47f897e1d384c639fba5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jul 2024 16:14:26 -0700 Subject: [PATCH 439/553] Refactor 'convert' to store sysmeta first, which is a hard requirement, and add new junit tests --- .../HashStoreConverter.java | 21 +++++---- .../HashStoreConverterTest.java | 44 +++++++++++++++++++ 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 0bd4f254..184026cc 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -53,10 +53,8 @@ public HashStoreConverter(Properties hashstoreProperties) /** * Take an existing path to a data object, store it into a new or existing HashStore via a * hard link (to save disk space), store the supplied system metadata and return the - * ObjectMetadata for the data object. - * - * A 'filePath' may be null, in which case a data object will not be stored. The - * 'sysMetaStream' may be null, in which case the sysmeta will not be stored. + * ObjectMetadata for the data object. The 'sysMetaStream' given may never be null. + * A 'filePath' may be null, in which case a data object will not be stored. * * @param filePath Path to existing data object * @param pid Persistent or authority-based identifier @@ -72,9 +70,16 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream", "convert"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); + // Store the sysmeta first - this can never be null and is always required. + try { + fileHashStoreLinks.storeMetadata(sysmetaStream, pid); + } finally { + sysmetaStream.close(); + } + + // Now store the hard link, which is optional ObjectMetadata objInfo = null; boolean storeHardlink = filePath != null; - if (storeHardlink) { try (InputStream fileStream = Files.newInputStream(filePath)) { objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); @@ -102,12 +107,6 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre logHashStoreConverter.warn(warnMsg); } - try { - fileHashStoreLinks.storeMetadata(sysmetaStream, pid); - } finally { - sysmetaStream.close(); - } - return objInfo; } } diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index 049344a8..fc39e2e9 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -1,6 +1,7 @@ package org.dataone.hashstore.hashstoreconverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -15,6 +16,7 @@ import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.fail; @@ -130,6 +132,48 @@ public void convert() throws Exception { } } + /** + * Check that convert throws 'HashStoreRefsAlreadyExistException' when called to store a + * data object with a pid that has already been accounted for + */ + @Test + public void convert_duplicatePid() throws Exception { + for (String pid : testData.pidList) { + // Path to test harness data file + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + hashstoreConverter.convert(testDataFile, pid, sysmetaStream); + + InputStream sysmetaStreamTwo = Files.newInputStream(testMetaDataFile); + assertThrows( + HashStoreRefsAlreadyExistException.class, + () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStreamTwo)); + } + } + + /** + * Confirm that convert still executes when filePath is null and stores the sysmeta + */ + @Test + public void convert_nullFilePath() throws Exception { + for (String pid : testData.pidList) { + // Path to test harness data file + String pidFormatted = pid.replace("/", "_"); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + + ObjectMetadata objInfo = + hashstoreConverter.convert(null, pid, sysmetaStream); + sysmetaStream.close(); + + assertNull(objInfo); + } + } + /** * Check that convert throws exception when sysmeta stream is null */ From 0ddde8cdfc241b2a8251a0cce6634e44e261f6bb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 31 Jul 2024 16:44:29 -0700 Subject: [PATCH 440/553] Add missing guard rail to check for null pid in 'convert' method --- .../dataone/hashstore/hashstoreconverter/HashStoreConverter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 184026cc..c12a4246 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -68,6 +68,7 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre throws IOException, NoSuchAlgorithmException, InterruptedException { logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream", "convert"); + FileHashStoreUtility.ensureNotNull(pid, "pid", "convert"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); // Store the sysmeta first - this can never be null and is always required. From 802912ee6648edd85ce9be638d920e783808ab68 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Aug 2024 08:43:08 -0700 Subject: [PATCH 441/553] Revise 'convert' method by moving boolean directly into if statement --- .../hashstore/hashstoreconverter/HashStoreConverter.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index c12a4246..911de9a9 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -80,8 +80,7 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre // Now store the hard link, which is optional ObjectMetadata objInfo = null; - boolean storeHardlink = filePath != null; - if (storeHardlink) { + if (filePath != null) { try (InputStream fileStream = Files.newInputStream(filePath)) { objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); logHashStoreConverter.info("Stored data object for pid: " + pid); From 9ed84dc606542dd21370c21691167cd70e1b158f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Aug 2024 08:58:01 -0700 Subject: [PATCH 442/553] Extract new public method 'createParentDirectories' from 'FileHashStoreLinks' into 'FileHashStoreUtility' --- .../filehashstore/FileHashStoreUtility.java | 22 +++++++++++++++++++ .../FileHashStoreLinks.java | 19 ++++------------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 26704362..9fe0c08f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -8,6 +8,7 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.nio.file.FileAlreadyExistsException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; @@ -104,6 +105,27 @@ public static String getPidHexDigest(String pid, String algorithm) return DatatypeConverter.printHexBinary(stringMessageDigest.digest()).toLowerCase(); } + + /** + * Create the parent directories for a given file path. + * + * @param desiredPath The path to your data object or file + * @throws IOException If there is an issue creating a directory + */ + public static void createParentDirectories(Path desiredPath) throws IOException { + File desiredPathParentDirs = new File(desiredPath.toFile().getParent()); + if (!desiredPathParentDirs.exists()) { + Path destinationDirectoryPath = desiredPathParentDirs.toPath(); + try { + Files.createDirectories(destinationDirectoryPath); + + } catch (FileAlreadyExistsException faee) { + logFHSU.warn("Directory already exists at: " + destinationDirectoryPath + + " - Skipping directory creation"); + } + } + } + /** * Checks whether a directory is empty or contains files. If a file is found, it returns true. * diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 5cef0d68..a6f8a26e 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -101,18 +101,7 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin Path objHardLinkPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); // Create parent directories to the hard link, otherwise // Files.createLink will throw a NoSuchFileException - File destinationDirectory = new File(objHardLinkPath.toFile().getParent()); - if (!destinationDirectory.exists()) { - Path destinationDirectoryPath = destinationDirectory.toPath(); - try { - Files.createDirectories(destinationDirectoryPath); - - } catch (FileAlreadyExistsException faee) { - logFileHashStoreLinks.warn( - "Directory already exists at: " + destinationDirectoryPath - + " - Skipping directory creation"); - } - } + FileHashStoreUtility.createParentDirectories(objHardLinkPath); try { Files.createLink(objHardLinkPath, filePath); @@ -165,9 +154,9 @@ protected Path getHashStoreLinksDataObjectPath(String pid) * @throws SecurityException Unable to write to tmpFile * @throws FileNotFoundException tmpFile cannot be found */ - protected Map generateChecksums(InputStream dataStream, - String additionalAlgorithm, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, SecurityException { + protected Map generateChecksums( + InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) + throws NoSuchAlgorithmException, IOException, SecurityException { // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { From 115199de8a42cb2ac15e2c08ac0b716e7a799c19 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Aug 2024 10:28:43 -0700 Subject: [PATCH 443/553] Update HashStoreConverter's convert and FileHashStoreLinks' storeHardLink method signatures with 'checksum' and 'checksumAlgorithm', and revise junit tests --- .../FileHashStoreLinks.java | 27 +++++++++++----- .../HashStoreConverter.java | 13 ++++++-- .../FileHashStoreLinksTest.java | 9 ++++-- .../HashStoreConverterTest.java | 31 ++++++++++--------- 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index a6f8a26e..54bf39bd 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -3,6 +3,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.filehashstore.FileHashStore; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; @@ -74,25 +75,41 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument * @param filePath Path to the source file which a hard link will be created for * @param fileStream Stream to the source file to calculate checksums for * @param pid Persistent or authority-based identifier for tagging + * @param checksum Value of checksum + * @param checksumAlgorithm Ex. "SHA-256" * @return ObjectMetadata encapsulating information about the data file * @throws NoSuchAlgorithmException Issue with one of the hashing algorithms to calculate * @throws IOException An issue with reading from the given file stream * @throws InterruptedException Sync issue when tagging pid and cid */ - public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, String pid) + public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, String pid, + String checksum, String checksumAlgorithm) throws NoSuchAlgorithmException, IOException, InterruptedException { // Validate input parameters FileHashStoreUtility.ensureNotNull(filePath, "filePath", "storeHardLink"); FileHashStoreUtility.ensureNotNull(fileStream, "fileStream", "storeHardLink"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeHardLink"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeHardLink"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeHardLink"); + FileHashStoreUtility.checkForEmptyAndValidString(checksum, "checksum", "storeHardLink"); + validateAlgorithm(checksumAlgorithm); if (!Files.exists(filePath)) { String errMsg = "Given file path: " + filePath + " does not exist."; throw new FileNotFoundException(errMsg); } try { - Map hexDigests = generateChecksums(fileStream, null, null); + Map hexDigests = generateChecksums(fileStream, null, checksumAlgorithm); + String checksumToMatch = hexDigests.get(checksumAlgorithm); + // TODO: Add junit test + if (!checksum.equalsIgnoreCase(checksumToMatch)) { + String errMsg = "Checksum supplied: " + checksum + " does not match what has been" + + " calculated: " + checksumToMatch + " for pid: " + pid + " and checksum" + + " algorithm: " + checksumAlgorithm; + logFileHashStoreLinks.error(errMsg); + throw new NonMatchingChecksumException(errMsg); + } + // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -160,17 +177,11 @@ protected Map generateChecksums( // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyAndValidString( - additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" - ); validateAlgorithm(additionalAlgorithm); generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } boolean generateCsAlgo = false; if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { - FileHashStoreUtility.checkForEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" - ); validateAlgorithm(checksumAlgorithm); generateCsAlgo = shouldCalculateAlgorithm(checksumAlgorithm); } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 911de9a9..22dc7af8 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -59,17 +59,25 @@ public HashStoreConverter(Properties hashstoreProperties) * @param filePath Path to existing data object * @param pid Persistent or authority-based identifier * @param sysmetaStream Stream to sysmeta content to store. + * @param checksum Value of checksum + * @param checksumAlgorithm Ex. "SHA-256" * @return ObjectMetadata for the given pid * @throws IOException An issue with calculating checksums or storing sysmeta * @throws NoSuchAlgorithmException An algorithm defined is not supported * @throws InterruptedException Issue with synchronizing storing metadata */ - public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream) + public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream, + String checksum, String checksumAlgorithm) throws IOException, NoSuchAlgorithmException, InterruptedException { logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream", "convert"); FileHashStoreUtility.ensureNotNull(pid, "pid", "convert"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "convert"); + FileHashStoreUtility.checkForEmptyAndValidString(checksum, "checksum", "convert"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "convert"); + FileHashStoreUtility.checkForEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm", "convert"); // Store the sysmeta first - this can never be null and is always required. try { @@ -82,7 +90,8 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre ObjectMetadata objInfo = null; if (filePath != null) { try (InputStream fileStream = Files.newInputStream(filePath)) { - objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid); + objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid, checksum, + checksumAlgorithm); logHashStoreConverter.info("Stored data object for pid: " + pid); } catch (IOException ioe) { diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 832e9d72..2cc49d0d 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -138,13 +138,14 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { @Test public void storeHardLink() throws Exception { for (String pid : testData.pidList) { + String sha256 = testData.pidData.get(pid).get("sha256"); String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); assertTrue(Files.exists(testDataFile)); InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = - fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid); + fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid, sha256, "SHA-256"); dataStream.close(); // Check id (content identifier based on the store algorithm) @@ -167,15 +168,17 @@ public void storeHardLink() throws Exception { @Test public void storeHardLink_alreadyExists() throws Exception { for (String pid : testData.pidList) { + String sha256 = testData.pidData.get(pid).get("sha256"); String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); assertTrue(Files.exists(testDataFile)); InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid); + fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid, sha256, "SHA-256"); dataStream.close(); InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStoreLinks.storeHardLink(testDataFile, dataStreamTwo, pid+".test.pid"); + fileHashStoreLinks.storeHardLink(testDataFile, dataStreamTwo, pid + ".test.pid", sha256, + "SHA-256"); dataStreamTwo.close(); } } diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index fc39e2e9..50cc47cd 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -101,6 +101,11 @@ public void hashStoreConverter_hashStoreExists() { @Test public void convert() throws Exception { for (String pid : testData.pidList) { + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); // Path to test harness data file String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -109,16 +114,11 @@ public void convert() throws Exception { InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); ObjectMetadata objInfo = - hashstoreConverter.convert(testDataFile, pid, sysmetaStream); + hashstoreConverter.convert(testDataFile, pid, sysmetaStream, sha256, "SHA-256"); sysmetaStream.close(); // Check checksums Map hexDigests = objInfo.getHexDigests(); - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); assertEquals(md5, hexDigests.get("MD5")); assertEquals(sha1, hexDigests.get("SHA-1")); assertEquals(sha256, hexDigests.get("SHA-256")); @@ -139,18 +139,19 @@ public void convert() throws Exception { @Test public void convert_duplicatePid() throws Exception { for (String pid : testData.pidList) { + String sha256 = testData.pidData.get(pid).get("sha256"); // Path to test harness data file String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); // Path to metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); - hashstoreConverter.convert(testDataFile, pid, sysmetaStream); + hashstoreConverter.convert(testDataFile, pid, sysmetaStream, sha256, "SHA-256"); InputStream sysmetaStreamTwo = Files.newInputStream(testMetaDataFile); - assertThrows( - HashStoreRefsAlreadyExistException.class, - () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStreamTwo)); + assertThrows(HashStoreRefsAlreadyExistException.class, + () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStreamTwo, + sha256, "SHA-256")); } } @@ -160,6 +161,7 @@ public void convert_duplicatePid() throws Exception { @Test public void convert_nullFilePath() throws Exception { for (String pid : testData.pidList) { + String sha256 = testData.pidData.get(pid).get("sha256"); // Path to test harness data file String pidFormatted = pid.replace("/", "_"); // Path to metadata file @@ -167,7 +169,7 @@ public void convert_nullFilePath() throws Exception { InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); ObjectMetadata objInfo = - hashstoreConverter.convert(null, pid, sysmetaStream); + hashstoreConverter.convert(null, pid, sysmetaStream, sha256, "SHA-256"); sysmetaStream.close(); assertNull(objInfo); @@ -180,13 +182,14 @@ public void convert_nullFilePath() throws Exception { @Test public void convert_nullSysmetaStream() { for (String pid : testData.pidList) { + String sha256 = testData.pidData.get(pid).get("sha256"); String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); InputStream sysmetaStream = null; - assertThrows( - IllegalArgumentException.class, () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStream) - ); + assertThrows(IllegalArgumentException.class, + () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStream, sha256, + "SHA-256")); } } } From 034af738ddd3397b2d9d068bce194e6ebe7f6366 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Aug 2024 11:23:21 -0700 Subject: [PATCH 444/553] Clean-up 'convert' method and add new junit tests --- .../FileHashStoreLinks.java | 3 +- .../HashStoreConverter.java | 11 ++- .../HashStoreConverterTest.java | 96 +++++++++++++++++++ 3 files changed, 103 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 54bf39bd..80820329 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -70,7 +70,7 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument } /** - * Creates a hard link to a data file and return a ObjectMetadata of the given data object. + * Store a hard link to an existing data object in HashStore. * * @param filePath Path to the source file which a hard link will be created for * @param fileStream Stream to the source file to calculate checksums for @@ -101,7 +101,6 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin try { Map hexDigests = generateChecksums(fileStream, null, checksumAlgorithm); String checksumToMatch = hexDigests.get(checksumAlgorithm); - // TODO: Add junit test if (!checksum.equalsIgnoreCase(checksumToMatch)) { String errMsg = "Checksum supplied: " + checksum + " does not match what has been" + " calculated: " + checksumToMatch + " for pid: " + pid + " and checksum" diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 22dc7af8..c6dfd57e 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -73,11 +73,6 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream", "convert"); FileHashStoreUtility.ensureNotNull(pid, "pid", "convert"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "convert"); - FileHashStoreUtility.checkForEmptyAndValidString(checksum, "checksum", "convert"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "convert"); - FileHashStoreUtility.checkForEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "convert"); // Store the sysmeta first - this can never be null and is always required. try { @@ -89,6 +84,12 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre // Now store the hard link, which is optional ObjectMetadata objInfo = null; if (filePath != null) { + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "convert"); + FileHashStoreUtility.checkForEmptyAndValidString(checksum, "checksum", "convert"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "convert"); + FileHashStoreUtility.checkForEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm", "convert"); + try (InputStream fileStream = Files.newInputStream(filePath)) { objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid, checksum, checksumAlgorithm); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index 50cc47cd..c039168d 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -2,6 +2,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -132,6 +133,78 @@ public void convert() throws Exception { } } + /** + * Check that convert created additional checksum algorithm that is not included in the + * default list + */ + @Test + public void convert_checksumAlgorithmIncluded() throws Exception { + for (String pid : testData.pidList) { + String sha256 = testData.pidData.get(pid).get("sha256"); + // Path to test harness data file + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + + ObjectMetadata objInfo = + hashstoreConverter.convert(testDataFile, pid, sysmetaStream, sha256, "SHA-256"); + sysmetaStream.close(); + + // Check checksums + Map hexDigests = objInfo.getHexDigests(); + assertEquals(sha256, hexDigests.get("SHA-256")); + } + } + + /** + * Check that convert created additional checksum algorithm that is not included in the + * default list + */ + @Test + public void convert_checksumAlgorithmSupportedButNotFound() throws Exception { + for (String pid : testData.pidList) { + String md2 = testData.pidData.get(pid).get("md2"); + // Path to test harness data file + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + + ObjectMetadata objInfo = + hashstoreConverter.convert(testDataFile, pid, sysmetaStream, md2, "MD2"); + sysmetaStream.close(); + + // Check checksums + Map hexDigests = objInfo.getHexDigests(); + assertEquals(md2, hexDigests.get("MD2")); + } + } + + /** + * Check that convert created additional checksum algorithm that is not included in the + * default list + */ + @Test + public void convert_checksumAlgorithmNotSupported() throws Exception { + for (String pid : testData.pidList) { + String md2 = testData.pidData.get(pid).get("md2"); + // Path to test harness data file + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + + assertThrows(NoSuchAlgorithmException.class, + () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStream, + md2, "blake2s")); + sysmetaStream.close(); + } + } + /** * Check that convert throws 'HashStoreRefsAlreadyExistException' when called to store a * data object with a pid that has already been accounted for @@ -152,6 +225,7 @@ public void convert_duplicatePid() throws Exception { assertThrows(HashStoreRefsAlreadyExistException.class, () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStreamTwo, sha256, "SHA-256")); + sysmetaStreamTwo.close(); } } @@ -192,4 +266,26 @@ public void convert_nullSysmetaStream() { "SHA-256")); } } + + /** + * Check that convert throws exception when checksum doesn't match + */ + @Test + public void convert_nonMatchingChecksum() throws Exception { + for (String pid : testData.pidList) { + // Incorrect checksum to compare against + String md5 = testData.pidData.get(pid).get("md5"); + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + // Path to metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + InputStream sysmetaStream = Files.newInputStream(testMetaDataFile); + + assertThrows( + NonMatchingChecksumException.class, + () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStream, md5, + "SHA-256")); + sysmetaStream.close(); + } + } } From 9a748d449f1005bc9ddd69e2285dafedce2a00bf Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Aug 2024 11:30:34 -0700 Subject: [PATCH 445/553] Clean-up code for linting warnings, unused imports and redundant variables --- .../FileHashStoreLinks.java | 26 ++----------------- .../HashStoreConverter.java | 2 +- .../FileHashStoreLinksTest.java | 13 +++++----- .../HashStoreConverterTest.java | 3 +-- 4 files changed, 10 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 80820329..b2edbad8 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -8,7 +8,6 @@ import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import javax.xml.bind.DatatypeConverter; -import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -99,7 +98,7 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin } try { - Map hexDigests = generateChecksums(fileStream, null, checksumAlgorithm); + Map hexDigests = generateChecksums(fileStream, checksumAlgorithm); String checksumToMatch = hexDigests.get(checksumAlgorithm); if (!checksum.equalsIgnoreCase(checksumToMatch)) { String errMsg = "Checksum supplied: " + checksum + " does not match what has been" @@ -162,8 +161,6 @@ protected Path getHashStoreLinksDataObjectPath(String pid) * * @param dataStream input stream of data to store * @param additionalAlgorithm additional algorithm to include in hex digest map - * @param checksumAlgorithm checksum algorithm to calculate hex digest for to verifying - * object * @return A map containing the hex digests of the default algorithms * @throws NoSuchAlgorithmException Unable to generate new instance of supplied algorithm * @throws IOException Issue with writing file from InputStream @@ -171,7 +168,7 @@ protected Path getHashStoreLinksDataObjectPath(String pid) * @throws FileNotFoundException tmpFile cannot be found */ protected Map generateChecksums( - InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) + InputStream dataStream, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, SecurityException { // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; @@ -179,11 +176,6 @@ protected Map generateChecksums( validateAlgorithm(additionalAlgorithm); generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } - boolean generateCsAlgo = false; - if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { - validateAlgorithm(checksumAlgorithm); - generateCsAlgo = shouldCalculateAlgorithm(checksumAlgorithm); - } MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); @@ -191,17 +183,11 @@ protected Map generateChecksums( MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); MessageDigest additionalAlgo = null; - MessageDigest checksumAlgo = null; if (generateAddAlgo) { logFileHashStoreLinks.debug( "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); } - if (generateCsAlgo) { - logFileHashStoreLinks.debug( - "Adding checksum algorithm to hex digest map, algorithm: " + checksumAlgorithm); - checksumAlgo = MessageDigest.getInstance(checksumAlgorithm); - } // Calculate hex digests try { @@ -216,9 +202,6 @@ protected Map generateChecksums( if (generateAddAlgo) { additionalAlgo.update(buffer, 0, bytesRead); } - if (generateCsAlgo) { - checksumAlgo.update(buffer, 0, bytesRead); - } } } catch (IOException ioe) { @@ -247,11 +230,6 @@ protected Map generateChecksums( .toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } - if (generateCsAlgo) { - String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) - .toLowerCase(); - hexDigests.put(checksumAlgorithm, extraChecksumDigest); - } return hexDigests; } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index c6dfd57e..2b8518a3 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -20,7 +20,7 @@ */ public class HashStoreConverter { private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); - private FileHashStoreLinks fileHashStoreLinks; + private final FileHashStoreLinks fileHashStoreLinks; /** * Constructor to initialize HashStoreConverter. Properties to an existing or desired HashStore diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 2cc49d0d..718fdcfb 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -105,14 +105,11 @@ public void initMetadataDirectories() { */ @Test public void initRefsDirectories() { - Path refsPath = rootDirectory.resolve("refs"); - assertTrue(Files.isDirectory(refsPath)); + assertTrue(Files.isDirectory(refStringFull)); + assertTrue(Files.isDirectory(refPidsStringFull)); + assertTrue(Files.isDirectory(refCidsStringFull)); Path refsTmpPath = rootDirectory.resolve("refs/tmp"); assertTrue(Files.isDirectory(refsTmpPath)); - Path refsPidPath = rootDirectory.resolve("refs/pids"); - assertTrue(Files.isDirectory(refsPidPath)); - Path refsCidPath = rootDirectory.resolve("refs/cids"); - assertTrue(Files.isDirectory(refsCidPath)); } /** @@ -195,15 +192,17 @@ public void testGenerateChecksums() throws Exception { InputStream dataStream = Files.newInputStream(testDataFile); Map hexDigests = - fileHashStoreLinks.generateChecksums(dataStream, null, null); + fileHashStoreLinks.generateChecksums(dataStream, "MD2"); dataStream.close(); // Validate checksum values + String md2 = testData.pidData.get(pid).get("md2"); String md5 = testData.pidData.get(pid).get("md5"); String sha1 = testData.pidData.get(pid).get("sha1"); String sha256 = testData.pidData.get(pid).get("sha256"); String sha384 = testData.pidData.get(pid).get("sha384"); String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md2, hexDigests.get("MD2")); assertEquals(md5, hexDigests.get("MD5")); assertEquals(sha1, hexDigests.get("SHA-1")); assertEquals(sha256, hexDigests.get("SHA-256")); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index c039168d..614b5ec4 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -259,10 +259,9 @@ public void convert_nullSysmetaStream() { String sha256 = testData.pidData.get(pid).get("sha256"); String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream sysmetaStream = null; assertThrows(IllegalArgumentException.class, - () -> hashstoreConverter.convert(testDataFile, pid, sysmetaStream, sha256, + () -> hashstoreConverter.convert(testDataFile, pid, null, sha256, "SHA-256")); } } From e045597a4c840d37e4553e8eff074440a408422e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 1 Aug 2024 11:46:12 -0700 Subject: [PATCH 446/553] Clean up javadocs --- .../hashstoreconverter/FileHashStoreLinks.java | 2 +- .../hashstoreconverter/HashStoreConverter.java | 10 +++++----- .../hashstoreconverter/HashStoreConverterTest.java | 10 ++++------ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index b2edbad8..c2dd532b 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -69,7 +69,7 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument } /** - * Store a hard link to an existing data object in HashStore. + * Store a hard link to HashStore from an existing data object in the filesystem. * * @param filePath Path to the source file which a hard link will be created for * @param fileStream Stream to the source file to calculate checksums for diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 2b8518a3..08e14a00 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -15,8 +15,8 @@ /** * HashStoreConverter is a utility tool to assist with ingesting existing data objects and their * respective system metadata into a HashStore. Instead of duplicating data objects (that already - * exists), HashStoreConverter provide a simple API to create a hard link to the data object with - * the expected HashStore object path. + * exist), HashStoreConverter provides a simple API to create a hard link to a data object with + * its expected HashStore object path. */ public class HashStoreConverter { private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); @@ -52,9 +52,9 @@ public HashStoreConverter(Properties hashstoreProperties) /** * Take an existing path to a data object, store it into a new or existing HashStore via a - * hard link (to save disk space), store the supplied system metadata and return the - * ObjectMetadata for the data object. The 'sysMetaStream' given may never be null. - * A 'filePath' may be null, in which case a data object will not be stored. + * hard link, store the supplied system metadata and return the ObjectMetadata for the data + * object. The 'sysMetaStream' given may never be null. A 'filePath' may be null, in which + * case a data object will not be stored. * * @param filePath Path to existing data object * @param pid Persistent or authority-based identifier diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index 614b5ec4..276c0b90 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -134,8 +134,7 @@ public void convert() throws Exception { } /** - * Check that convert created additional checksum algorithm that is not included in the - * default list + * Check that convert creates additional checksum algorithm that is included in the default list */ @Test public void convert_checksumAlgorithmIncluded() throws Exception { @@ -159,8 +158,8 @@ public void convert_checksumAlgorithmIncluded() throws Exception { } /** - * Check that convert created additional checksum algorithm that is not included in the - * default list + * Check that convert creates additional checksum that is not included in the default list but + * is supported. */ @Test public void convert_checksumAlgorithmSupportedButNotFound() throws Exception { @@ -184,8 +183,7 @@ public void convert_checksumAlgorithmSupportedButNotFound() throws Exception { } /** - * Check that convert created additional checksum algorithm that is not included in the - * default list + * Check that convert throws exception when supplied with an unsupported algorithm. */ @Test public void convert_checksumAlgorithmNotSupported() throws Exception { From ac8210426d6a9ae65e6c42c7ac41e4f11f842b1c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 2 Aug 2024 08:57:49 -0700 Subject: [PATCH 447/553] Revert 'convert' operation order back to storing hard link first before storing sysmeta --- .../HashStoreConverter.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 08e14a00..0e6d615a 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -53,8 +53,8 @@ public HashStoreConverter(Properties hashstoreProperties) /** * Take an existing path to a data object, store it into a new or existing HashStore via a * hard link, store the supplied system metadata and return the ObjectMetadata for the data - * object. The 'sysMetaStream' given may never be null. A 'filePath' may be null, in which - * case a data object will not be stored. + * object. A 'filePath' may be null, in which case a hard link will not be created, and only + * the sysmeta will be stored. * * @param filePath Path to existing data object * @param pid Persistent or authority-based identifier @@ -74,14 +74,7 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre FileHashStoreUtility.ensureNotNull(pid, "pid", "convert"); FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); - // Store the sysmeta first - this can never be null and is always required. - try { - fileHashStoreLinks.storeMetadata(sysmetaStream, pid); - } finally { - sysmetaStream.close(); - } - - // Now store the hard link, which is optional + // Store the hard link first if it's available ObjectMetadata objInfo = null; if (filePath != null) { FileHashStoreUtility.ensureNotNull(checksum, "checksum", "convert"); @@ -117,6 +110,13 @@ public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStre logHashStoreConverter.warn(warnMsg); } + // Now the sysmeta + try { + fileHashStoreLinks.storeMetadata(sysmetaStream, pid); + } finally { + sysmetaStream.close(); + } + return objInfo; } } From 41375e0916dbc3d153a89e95b60bfd5466331a6c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 9 Aug 2024 09:48:24 -0700 Subject: [PATCH 448/553] Update postgres version to 42.7.2 to resolve vulnerability warning --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 56b9668e..1b7a7342 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ org.postgresql postgresql - 42.4.3 + 42.7.2 From 1da579c487d7611b79cc592cc91c56726b578d93 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 9 Aug 2024 15:26:24 -0700 Subject: [PATCH 449/553] Update README.md for clarity --- README.md | 118 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 59335555..8a00810c 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ respective directories and utilizes an identifier-based API for interacting with HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. -###### Public API Methods +### Public API Methods - storeObject - tagObject @@ -49,9 +49,9 @@ the expected usage of HashStore. - deleteMetadata - getHexDigest -For details, please see the HashStore interface (HashStore.java) +For details, please see the HashStore interface [HashStore.java](https://github.com/DataONEorg/hashstore-java/blob/main/src/main/java/org/dataone/hashstore/HashStore.java) -###### How do I create a HashStore? +### How do I create a HashStore? To create or interact with a HashStore, instantiate a HashStore object with the following set of properties: @@ -62,7 +62,7 @@ properties: - storeAlgorithm - storeMetadataNamespace -``` +```java String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; Path rootDirectory = tempFolder.resolve("metacat"); @@ -79,18 +79,62 @@ storeProperties.setProperty( HashStore hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); // Store an object -hashStore.storeObject(stream, pid) +hashStore.storeObject(stream, pid); // ... ``` -###### Working with objects (store, retrieve, delete) +### What does HashStore look like? + +```sh +# Example layout in HashStore with a single file stored along with its metadata and reference files. +# This uses a store depth of 3 (number of nested levels/directories - e.g. '/4d/19/81/' within +# 'objects', see below), with a width of 2 (number of characters used in directory name - e.g. "4d", +# "19" etc.) and "SHA-256" as its default store algorithm +## Notes: +## - Objects are stored using their content identifier as the file address +## - The reference file for each pid contains a single cid +## - The reference file for each cid contains multiple pids each on its own line +## - There are two metadata docs under the metadata directory for the pid (sysmeta, annotations) + +.../metacat/hashstore +├── hashstore.yaml +└── objects +| └── 4d +| └── 19 +| └── 81 +| └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c +└── metadata +| └── 0d +| └── 55 +| └── 55 +| └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e +| └── 323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7 +| └── sha256(pid+formatId_annotations) +└── refs + ├── cids + | └── 4d + | └── 19 + | └── 81 + | └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c + └── pids + └── 0d + └── 55 + └── 55 + └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e +``` + +### Working with objects (store, retrieve, delete) In HashStore, objects are first saved as temporary files while their content identifiers are calculated. Once the default hash algorithm list and their hashes are generated, objects are stored in their permanent location using the store's algorithm's corresponding hash value, the store depth -and the store width. Lastly, reference files are created for the object so that they can be found -and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored -once and only once. +and the store width. Lastly, objects are 'tagged' with a given identifier (ex. persistent +identifier (pid)). This process produces reference files, which allow objects to be found and +retrieved with a given identifier. +- Note 1: An identifier can only be used once +- Note 2: Objects are stored once and only once using its content identifier (a checksum generated + from using a hashing algorithm). Clients that attempt to store duplicate objects will receive + the expected ObjectMetadata - with HashStore handling the de-duplication process under the hood. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an @@ -100,18 +144,18 @@ confirm that the object is what is expected. And to finalize the process (to mak discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: -``` +```java // All-in-one process which stores, validates and tags an object -objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) +objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize); // Manual Process // Store object -objectMetadata objInfo = storeObject(InputStream) +objectMetadata objInfo = storeObject(InputStream); // Validate object, if the parameters do not match, the data object associated with the objInfo // supplied will be deleted -- deleteInvalidObject(objInfo, checksum, checksumAlgorithn, objSize) +deleteInvalidObject(objInfo, checksum, checksumAlgorithn, objSize); // Tag object, makes the object discoverable (find, retrieve, delete) -tagObject(pid, cid) +tagObject(pid, cid); ``` **How do I retrieve an object if I have the pid?** @@ -132,7 +176,7 @@ tagObject(pid, cid) implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. -###### Working with metadata (store, retrieve, delete) +### Working with metadata (store, retrieve, delete) HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata documents related to a 'pid' are stored in a directory determined by calculating the hash of the @@ -155,7 +199,7 @@ that represents an object format for the metadata type (ex. `storeMetadata(strea which will delete the metadata object associated with the given pid. - To delete all metadata objects related to a given 'pid', call `deleteMetadata(String pid)` -###### What are HashStore reference files? +### What are HashStore reference files? HashStore assumes that every object to store has a respective identifier. This identifier is then used when storing, retrieving and deleting an object. In order to facilitate this process, we create @@ -186,46 +230,8 @@ HashStore (calling `storeObject(InputStream)`). - Cid (content identifier) reference files are created at the same time as pid reference files when storing an object with an identifier. - Cid reference files are located in HashStore's '/refs/cid' directory -- A cid reference file is a list of all the pids that reference a cid, delimited by a new line (" - \n") character - -###### What does HashStore look like? - -```sh -# Example layout in HashStore with a single file stored along with its metadata and reference files. -# This uses a store depth of 3, with a width of 2 and "SHA-256" as its default store algorithm -## Notes: -## - Objects are stored using their content identifier as the file address -## - The reference file for each pid contains a single cid -## - The reference file for each cid contains multiple pids each on its own line -## - There are two metadata docs under the metadata directory for the pid (sysmeta, annotations) - -.../metacat/hashstore -├── hashstore.yaml -└── objects -| └── 4d -| └── 19 -| └── 81 -| └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c -└── metadata -| └── 0d -| └── 55 -| └── 55 -| └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e -| └── 323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7 -| └── sha256(pid+formatId_annotations) -└── refs - ├── cids - | └── 4d - | └── 19 - | └── 81 - | └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c - └── pids - └── 0d - └── 55 - └── 55 - └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e -``` +- A cid reference file is a list of all the pids that reference a cid, delimited by a new line ("\n") + character ## Development Build From 8b8ef2df5f607b83288568e12d398f2677e93bad Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 08:18:16 -0700 Subject: [PATCH 450/553] Rename 'deleteInvalidObject' method to 'deleteIfInvalidObject' and update affected code --- .../java/org/dataone/hashstore/HashStore.java | 2 +- .../filehashstore/FileHashStore.java | 2 +- .../FileHashStoreInterfaceTest.java | 127 ++++-------------- 3 files changed, 29 insertions(+), 102 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 2d917e9e..60aa3f60 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -119,7 +119,7 @@ void tagObject(String pid, String cid) throws IOException, * @throws IOException Issue with recalculating supported algo for * checksum not found */ - void deleteInvalidObject( + void deleteIfInvalidObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b56c6163..9e70c43a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -822,7 +822,7 @@ public void deleteObject(String pid) @Override - public void deleteInvalidObject( + public void deleteIfInvalidObject( ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 5b586ca0..10bdfd02 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -44,7 +44,6 @@ import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -821,10 +820,10 @@ public void tagObject_PidRefsFileExistsException() throws Exception { } /** - * Check that deleteInvalidObject does not throw exception with matching values + * Check that deleteIfInvalidObject does not throw exception with matching values */ @Test - public void deleteInvalidObject_correctValues() throws Exception { + public void deleteIfInvalidObject_correctValues() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -835,11 +834,11 @@ public void deleteInvalidObject_correctValues() throws Exception { String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get deleteInvalidObject args + // Get deleteIfInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.deleteInvalidObject( + fileHashStore.deleteIfInvalidObject( objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); @@ -855,11 +854,11 @@ public void deleteInvalidObject_correctValues() throws Exception { } /** - * Check that deleteInvalidObject throws MissingHexDigestsException when objInfo hexDigests + * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests * is empty. */ @Test - public void deleteInvalidObject_objInfoEmptyHexDigests() { + public void deleteIfInvalidObject_objInfoEmptyHexDigests() { String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; long size = 1999999; Map hexDigests = new HashMap<>(); @@ -868,15 +867,15 @@ public void deleteInvalidObject_objInfoEmptyHexDigests() { assertThrows( MissingHexDigestsException.class, - () -> fileHashStore.deleteInvalidObject(objInfo, id, "MD2", size)); + () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); } /** - * Check that deleteInvalidObject throws MissingHexDigestsException when objInfo hexDigests + * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests * is null. */ @Test - public void deleteInvalidObject_objInfoNullHexDigests() { + public void deleteIfInvalidObject_objInfoNullHexDigests() { String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; long size = 1999999; Map hexDigests = null; @@ -884,15 +883,15 @@ public void deleteInvalidObject_objInfoNullHexDigests() { assertThrows( IllegalArgumentException.class, - () -> fileHashStore.deleteInvalidObject(objInfo, id, "MD2", size)); + () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); } /** - * Check that deleteInvalidObject calculates and verifies a checksum with a supported algorithm that is + * Check that deleteIfInvalidObject calculates and verifies a checksum with a supported algorithm that is * not included in the default list */ @Test - public void deleteInvalidObject_supportedAlgoNotInDefaultList() throws Exception { + public void deleteIfInvalidObject_supportedAlgoNotInDefaultList() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -901,11 +900,11 @@ public void deleteInvalidObject_supportedAlgoNotInDefaultList() throws Exception ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); dataStream.close(); - // Get deleteInvalidObject args + // Get deleteIfInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("md2"); long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, "MD2", expectedSize); + fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, "MD2", expectedSize); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -920,11 +919,11 @@ public void deleteInvalidObject_supportedAlgoNotInDefaultList() throws Exception } /** - * Check that deleteInvalidObject calculates throws exception when given a checksumAlgorithm that is + * Check that deleteIfInvalidObject calculates throws exception when given a checksumAlgorithm that is * not supported */ @Test - public void deleteInvalidObject_unsupportedAlgo() throws Exception { + public void deleteIfInvalidObject_unsupportedAlgo() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -935,7 +934,7 @@ public void deleteInvalidObject_unsupportedAlgo() throws Exception { assertThrows( UnsupportedHashAlgorithmException.class, - () -> fileHashStore.deleteInvalidObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); + () -> fileHashStore.deleteIfInvalidObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -950,10 +949,10 @@ public void deleteInvalidObject_unsupportedAlgo() throws Exception { } /** - * Check that deleteInvalidObject throws exception when non-matching size value provided + * Check that deleteIfInvalidObject throws exception when non-matching size value provided */ @Test - public void deleteInvalidObject_mismatchedSize() throws Exception { + public void deleteIfInvalidObject_mismatchedSize() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -964,14 +963,14 @@ public void deleteInvalidObject_mismatchedSize() throws Exception { String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get deleteInvalidObject args + // Get deleteIfInvalidObject args String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; assertThrows( NonMatchingObjSizeException.class, - () -> fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); @@ -986,10 +985,10 @@ public void deleteInvalidObject_mismatchedSize() throws Exception { } /** - * Check that deleteInvalidObject throws exception with non-matching checksum value + * Check that deleteIfInvalidObject throws exception with non-matching checksum value */ @Test - public void deleteInvalidObject_mismatchedChecksum() throws Exception { + public void deleteIfInvalidObject_mismatchedChecksum() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1000,86 +999,14 @@ public void deleteInvalidObject_mismatchedChecksum() throws Exception { String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get deleteInvalidObject args + // Get deleteIfInvalidObject args String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); assertThrows( NonMatchingChecksumException.class, - () -> fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that deleteInvalidObject throws exception when non-matching size value provided - */ - @Test - public void deleteInvalidObject_mismatchedSize_deleteInvalidObject_true() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get deleteInvalidObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = 123456789; - - assertThrows( - NonMatchingObjSizeException.class, - () -> fileHashStore.deleteInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); - - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); - } - } - - /** - * Check that deleteInvalidObject throws exception with non-matching checksum value - */ - @Test - public void deleteInvalidObject_mismatchedChecksum_deleteInvalidObject_true() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get deleteInvalidObject args - String expectedChecksum = "intentionallyWrongValue"; - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - assertThrows(NonMatchingChecksumException.class, () -> fileHashStore.deleteInvalidObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - )); + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); From a306271f41eb1cd4d0ff7297e4c8439819460ef5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 08:22:06 -0700 Subject: [PATCH 451/553] Update .idea's .gitignore and 'README.md' --- .idea/.gitignore | 1 + .idea/inspectionProfiles/Project_Default.xml | 147 ------------------- README.md | 2 +- 3 files changed, 2 insertions(+), 148 deletions(-) delete mode 100644 .idea/inspectionProfiles/Project_Default.xml diff --git a/.idea/.gitignore b/.idea/.gitignore index 26d33521..4de73123 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -1,3 +1,4 @@ # Default ignored files /shelf/ /workspace.xml +/inspectionProfiles diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index fd7d3768..00000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,147 +0,0 @@ - - - - \ No newline at end of file diff --git a/README.md b/README.md index 8a00810c..af5f32e2 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ retrieved with a given identifier. By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. -The client is then expected to call `deleteInvalidObject` when the relevant metadata is available to +The client is then expected to call `deleteIfInvalidObject` when the relevant metadata is available to confirm that the object is what is expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: From 96235b892f3e8d101e44de722aff8437e3387322 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 09:17:25 -0700 Subject: [PATCH 452/553] Revise javadoc usage of backticks to '{@code ... }' --- .../java/org/dataone/hashstore/HashStore.java | 68 +++++++++---------- .../filehashstore/FileHashStore.java | 27 ++++---- 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 60aa3f60..22d5c083 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -14,42 +14,42 @@ * HashStore is a content-addressable file management system that utilizes the content identifier of * an object to address files. The system stores both objects, references (refs) and metadata in its * respective directories and provides an API for interacting with the store. HashStore storage - * classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected - * usage of the system. + * classes (like {@code FileHashStore}) must implement the HashStore interface to ensure the + * expected usage of the system. */ public interface HashStore { /** - * The `storeObject` method is responsible for the atomic storage of objects to disk using a - * given InputStream. Upon successful storage, the method returns a (ObjectMetadata) object - * containing relevant file information, such as the file's id (which can be used to locate - * the object on disk), the file's size, and a hex digest dict of algorithms and checksums. - * Storing an object with `store_object` also tags an object (creating references) which - * allow the object to be discoverable. - * - * `storeObject` also ensures that an object is stored only once by synchronizing multiple - * calls and rejecting calls to store duplicate objects. Note, calling `storeObject` without - * a pid is a possibility, but should only store the object without tagging the object. It - * is then the caller's responsibility to finalize the process by calling `tagObject` after - * verifying the correct object is stored. - * + * The {@code storeObject} method is responsible for the atomic storage of objects to + * disk using a given InputStream. Upon successful storage, the method returns a + * (ObjectMetadata) object containing relevant file information, such as the file's id + * (which can be used to locate the object on disk), the file's size, and a hex digest + * dict of algorithms and checksums. Storing an object with {@code store_object} also + * tags an object (creating references) which allow the object to be discoverable. + * + * {@code storeObject} also ensures that an object is stored only once by synchronizing + * multiple calls and rejecting calls to store duplicate objects. Note, calling {@code + * storeObject} without a pid is a possibility, but should only store the object without + * tagging the object. It is then the caller's responsibility to finalize the process by + * calling {@code tagObject} after verifying the correct object is stored. + * * The file's id is determined by calculating the object's content identifier based on the * store's default algorithm, which is also used as the permanent address of the file. The * file's identifier is then sharded using the store's configured depth and width, delimited * by '/' and concatenated to produce the final permanent address and is stored in the - * `./[storePath]/objects/` directory. - * + * {@code ./[storePath]/objects/} directory. + * * By default, the hex digest map includes the following hash algorithms: MD5, SHA-1, * SHA-256, SHA-384, SHA-512 - which are the most commonly used algorithms in dataset * submissions to DataONE and the Arctic Data Center. If an additional algorithm is - * provided, the `storeObject` method checks if it is supported and adds it to the hex + * provided, the {@code storeObject} method checks if it is supported and adds it to the hex * digests dict along with its corresponding hex digest. An algorithm is considered - * "supported" if it is recognized as a valid hash algorithm in - * `java.security.MessageDigest` class. - * + * "supported" if it is recognized as a valid hash algorithm in {@code java.security + * .MessageDigest} class. + * * Similarly, if a file size and/or checksum & checksumAlgorithm value are provided, - * `storeObject` validates the object to ensure it matches the given arguments before moving - * the file to its permanent address. - * + * {@code storeObject} validates the object to ensure it matches the given arguments + * before moving the file to its permanent address. + * * @param object Input stream to file * @param pid Authority-based identifier * @param additionalAlgorithm Additional hex digest to include in hexDigests @@ -126,11 +126,11 @@ void deleteIfInvalidObject( IOException; /** - * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a - * persistent identifier (`pid`) and metadata format (`formatId`). All metadata documents - * for a given pid will be stored in the directory (under ../metadata) that is determined - * by calculating the hash of the given pid, with the document name being the hash of the - * metadata format (`formatId`). + * Adds/updates metadata (ex. {@code sysmeta}) to the HashStore by using a given + * InputStream, a persistent identifier ({@code pid}) and metadata format ({@code + * formatId}). All metadata documents for a given pid will be stored in the directory + * (under ../metadata) that is determined by calculating the hash of the given pid, with + * the document name being the hash of the metadata format ({@code formatId}). * * Note, multiple calls to store the same metadata content will all be accepted, but is not * guaranteed to execute sequentially. @@ -155,7 +155,7 @@ String storeMetadata(InputStream metadata, String pid, String formatId) * @see #storeMetadata(InputStream, String, String) * * If the '(InputStream metadata, String pid)' signature is used, the metadata format - * stored will default to `sysmeta`. + * stored will default to {@code sysmeta}. */ String storeMetadata(InputStream metadata, String pid) throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, @@ -194,8 +194,8 @@ InputStream retrieveMetadata(String pid, String formatId) /** * @see #retrieveMetadata(String, String) - * - * If `retrieveMetadata` is called with signature (String pid), the metadata + * + * If {@code retrieveMetadata} is called with signature (String pid), the metadata * document retrieved will be the given pid's 'sysmeta' */ InputStream retrieveMetadata(String pid) throws IllegalArgumentException, @@ -218,8 +218,8 @@ void deleteObject(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException; /** - * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given - * persistent identifier and its respective metadata namespace. + * Deletes a metadata document (ex. {@code sysmeta}) permanently from HashStore using a + * given persistent identifier and its respective metadata namespace. * * @param pid Authority-based identifier * @param formatId Metadata namespace/format diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9e70c43a..86ad5071 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -208,19 +208,20 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep /** * Determines whether FileHashStore can instantiate by validating a set of arguments and - * throwing exceptions. If HashStore configuration file (`hashstore.yaml`) exists, it will - * retrieve its properties and compare them with the given values; and if there is a - * mismatch, an exception will be thrown. If not, it will look to see if any relevant - * HashStore directories exist (i.e. '/objects', '/metadata', '/refs') in the given store - * path and throw an exception if any of those directories exist. + * throwing exceptions. If HashStore configuration file ({@code hashstore.yaml}) exists, it will + * retrieve its properties and compare them with the given values; and if there is a mismatch, + * an exception will be thrown. If not, it will look to see if any relevant HashStore + * directories exist (i.e. '/objects', '/metadata', '/refs') in the given store path and throw + * an exception if any of those directories exist. * * @param storePath Path where HashStore will store objects * @param storeDepth Depth of directories * @param storeWidth Width of directories * @param storeAlgorithm Algorithm to use when calculating object addresses - * @param storeMetadataNamespace Default metadata namespace (`formatId`) + * @param storeMetadataNamespace Default metadata namespace ({@code formatId}) * @throws NoSuchAlgorithmException If algorithm supplied is not supported - * @throws IOException If `hashstore.yaml` config file cannot be retrieved/opened + * @throws IOException If {@code hashstore.yaml} config file cannot be + * retrieved/opened * @throws IllegalArgumentException If depth or width is less than 0 * @throws IllegalStateException If dirs/objects exist, but HashStore config is missing */ @@ -295,7 +296,7 @@ protected void verifyHashStoreProperties( * * @param storePath Path to root of store * @return HashMap of the properties - * @throws IOException If `hashstore.yaml` doesn't exist + * @throws IOException If {@code hashstore.yaml} doesn't exist */ protected HashMap loadHashStoreYaml(Path storePath) throws IOException { Path hashStoreYamlPath = storePath.resolve(HASHSTORE_YAML); @@ -335,7 +336,7 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc * Write a 'hashstore.yaml' file to STORE_ROOT * * @param yamlString Content of the HashStore configuration - * @throws IOException If unable to write `hashstore.yaml` + * @throws IOException If unable to write {@code hashstore.yaml} */ protected void writeHashStoreYaml(String yamlString) throws IOException { Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); @@ -1987,7 +1988,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the - * default store namespace as defined by `hashstore.yaml` + * default store namespace as defined by {@code hashstore.yaml}. * * @param metadata InputStream to metadata * @param pid Authority-based identifier @@ -2316,9 +2317,9 @@ private static void releaseObjectLockedCids(String cid) { } /** - * Synchronize the pid tagging process since `tagObject` is a Public API method that can be - * called directly. This is used in the scenario when the client is missing metadata but must - * store the data object first. + * Synchronize the pid tagging process since {@code tagObject} is a Public API method that + * can be called directly. This is used in the scenario when the client is missing metadata + * but must store the data object first. * * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid From 4d382a5ce13b32109b19ddfc378adf14ea656a09 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 09:45:35 -0700 Subject: [PATCH 453/553] Update maven and idea settings java target to 17 from 1.8 --- .idea/misc.xml | 2 +- pom.xml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index d5cd6143..67e1e611 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -8,5 +8,5 @@ - + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 1b7a7342..f2dc363a 100644 --- a/pom.xml +++ b/pom.xml @@ -71,8 +71,8 @@ maven-compiler-plugin 3.8.1 - 8 - 8 + 17 + 17 From 1f33e7d3efa5003cf10d90558763285ab7af3808 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 09:45:50 -0700 Subject: [PATCH 454/553] Add new junit test to check for when pid is simply a new line --- .../filehashstore/FileHashStoreProtectedTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index edde872b..a9f8ec5c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2145,6 +2145,18 @@ public void fileHashStoreUtility_checkForEmptyAndValidString() { "storeObject")); } + /** + * Confirm getExpectedPath throws exception when requesting path for an object that does not + * exist + */ + @Test + public void fileHashStoreUtility_checkForEmptyAndValidString_newLine() { + assertThrows( + IllegalArgumentException.class, + () -> FileHashStoreUtility.checkForEmptyAndValidString("\n", "pid", + "storeObject")); + } + /** * Confirm that renamePathForDeletion adds '_delete' to the given path */ From 4770050e82e66e19261ae41d2f25d5045d522458 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 10:22:00 -0700 Subject: [PATCH 455/553] Refactor FileHashStoreUtility 'checkNotNegativeOrZero' to 'checkPositive' and update affected code --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- .../hashstore/filehashstore/FileHashStoreUtility.java | 4 ++-- src/test/java/org/dataone/hashstore/HashStoreRunnable.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 86ad5071..be62c44a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -422,7 +422,7 @@ public ObjectMetadata storeObject( validateAlgorithm(checksumAlgorithm); } if (objSize != -1) { - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); + FileHashStoreUtility.checkPositive(objSize, "storeObject"); } try { @@ -838,7 +838,7 @@ public void deleteIfInvalidObject( } FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteInvalidObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteInvalidObject"); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "deleteInvalidObject"); + FileHashStoreUtility.checkPositive(objSize, "deleteInvalidObject"); String objCid = objectInfo.getCid(); long objInfoRetrievedSize = objectInfo.getSize(); @@ -1179,7 +1179,7 @@ protected ObjectMetadata putObject( validateAlgorithm(additionalAlgorithm); } if (objSize != -1) { - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "putObject"); + FileHashStoreUtility.checkPositive(objSize, "putObject"); } // Generate tmp file and write to it diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 26704362..60ee3de8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -259,9 +259,9 @@ public static boolean isValidString(String string) { * @param method Calling method * @throws IllegalArgumentException If longInt is less than or equal */ - public static void checkNotNegativeOrZero(long longInt, String method) + public static void checkPositive(long longInt, String method) throws IllegalArgumentException { - if (longInt < 0 || longInt == 0) { + if (longInt <= 0) { String errMsg = "FileHashStoreUtility.checkNotNegative - Calling Method: " + method + "(): objSize cannot be less than or equal to 0."; throw new IllegalArgumentException(errMsg); diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 20f04ac4..5eb43fc6 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -26,7 +26,7 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream o String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", "HashStoreServiceRequestConstructor"); - FileHashStoreUtility.checkNotNegativeOrZero(publicAPIMethod, "HashStoreServiceRequestConstructor"); + FileHashStoreUtility.checkPositive(publicAPIMethod, "HashStoreServiceRequestConstructor"); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; this.objStream = objStream; @@ -36,7 +36,7 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream o public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", "HashStoreServiceRequestConstructor"); - FileHashStoreUtility.checkNotNegativeOrZero(publicAPIMethod, "HashStoreServiceRequestConstructor"); + FileHashStoreUtility.checkPositive(publicAPIMethod, "HashStoreServiceRequestConstructor"); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; this.pid = pid; From 2fea9ff9fa5e5ab2cceea77aa01d98ed3d4e5e70 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 10:24:23 -0700 Subject: [PATCH 456/553] Rename logging variable in 'HashStoreRunnable' --- .../java/org/dataone/hashstore/HashStoreRunnable.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 5eb43fc6..c7932a4e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -14,7 +14,7 @@ * provided by the Executor service. */ public class HashStoreRunnable implements Runnable { - private static final Log logHashStoreRunnable = LogFactory.getLog(HashStoreRunnable.class); + private static final Log log = LogFactory.getLog(HashStoreRunnable.class); public static final int storeObject = 1; public static final int deleteObject = 2; private final HashStore hashstore; @@ -43,7 +43,7 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { } public void run() { - logHashStoreRunnable.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); + log.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); try { switch (publicAPIMethod) { case storeObject: @@ -53,7 +53,7 @@ public void run() { String errMsg = "HashStoreRunnable ~ UnexpectedError - storeObject: " + e.getCause(); System.out.println(errMsg); - logHashStoreRunnable.error(errMsg); + log.error(errMsg); throw new HashStoreServiceException(errMsg); } objStream.close(); @@ -65,13 +65,13 @@ public void run() { String errMsg = "HashStoreRunnable ~ UnexpectedError - deleteObject: " + e.getCause(); System.out.println(errMsg); - logHashStoreRunnable.error(errMsg); + log.error(errMsg); throw new HashStoreServiceException(errMsg); } break; } } catch (HashStoreServiceException | IOException hse) { - logHashStoreRunnable.error( + log.error( "HashStoreServiceRequest ~ Unexpected Error: " + hse.getMessage()); } } From e327e916eac0a4f3d3ba949535728aa7506a4de5 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 10:28:19 -0700 Subject: [PATCH 457/553] Clean up 'FileHashStoreUtility' logging and exception statements --- .../filehashstore/FileHashStoreUtility.java | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 60ee3de8..7d0b8227 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -28,7 +28,7 @@ */ public class FileHashStoreUtility { - private static final Log logFHSU = LogFactory.getLog(FileHashStoreUtility.class); + private static final Log log = LogFactory.getLog(FileHashStoreUtility.class); /** * Checks whether a given object is null and throws an exception if so @@ -41,8 +41,7 @@ public class FileHashStoreUtility { public static void ensureNotNull(Object object, String argument, String method) throws IllegalArgumentException { if (object == null) { - String errMsg = "FileHashStoreUtility.ensureNotNull - Calling Method: " + method - + "(): " + argument + " cannot be null."; + String errMsg = "Calling Method: " + method + "(): " + argument + " cannot be null."; throw new IllegalArgumentException(errMsg); } } @@ -68,9 +67,7 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm } } catch (IOException ioe) { - String errMsg = - "FileHashStoreUtility.calculateHexDigest - Unexpected IOException encountered: " - + ioe.getMessage(); + String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); throw new IOException(errMsg); } finally { @@ -152,8 +149,7 @@ public static List getFilesFromDir(Path directory) throws IOException { public static Path renamePathForDeletion(Path pathToRename) throws IOException { ensureNotNull(pathToRename, "pathToRename", "renamePathForDeletion"); if (!Files.exists(pathToRename)) { - String errMsg = "FileHashStoreUtility.renamePathForDeletion - Given path to file: " - + pathToRename + " does not exist."; + String errMsg = "Given path to file: " + pathToRename + " does not exist."; throw new FileNotFoundException(errMsg); } Path parentPath = pathToRename.getParent(); @@ -174,8 +170,7 @@ public static Path renamePathForDeletion(Path pathToRename) throws IOException { public static void renamePathForRestoration(Path pathToRename) throws IOException { ensureNotNull(pathToRename, "pathToRename", "renamePathForRestoration"); if (!Files.exists(pathToRename)) { - String errMsg = "FileHashStoreUtility.renamePathForRestoration - Given path to file: " - + pathToRename + " does not exist."; + String errMsg = "Given path to file: " + pathToRename + " does not exist."; throw new FileNotFoundException(errMsg); } Path parentPath = pathToRename.getParent(); @@ -202,7 +197,7 @@ public static void deleteListItems(Collection deleteList) { String warnMsg = "Attempted to delete metadata document: " + deleteItem + " but failed." + " Additional Details: " + ge.getMessage(); - logFHSU.warn(warnMsg); + log.warn(warnMsg); } } @@ -221,15 +216,14 @@ public static void deleteListItems(Collection deleteList) { */ public static void checkForEmptyAndValidString(String string, String argument, String method) throws IllegalArgumentException { - ensureNotNull(string, "string", "checkForEmptyString"); + ensureNotNull(string, "string", "checkForEmptyAndValidString"); if (string.trim().isEmpty()) { - String errMsg = "FileHashStoreUtility.checkForEmptyString - Calling Method: " + method - + "(): " + argument + " cannot be empty."; + String errMsg = "Calling Method: " + method + "(): " + argument + " cannot be empty."; throw new IllegalArgumentException(errMsg); } if (!isValidString(string)) { - String errMsg = "FileHashStoreUtility.checkForEmptyString - Calling Method: " + method - + "(): " + argument + " contains empty white spaces, tabs or newlines."; + String errMsg = "Calling Method: " + method + "(): " + argument + + " contains empty white spaces, tabs or newlines."; throw new IllegalArgumentException(errMsg); } } @@ -262,8 +256,8 @@ public static boolean isValidString(String string) { public static void checkPositive(long longInt, String method) throws IllegalArgumentException { if (longInt <= 0) { - String errMsg = "FileHashStoreUtility.checkNotNegative - Calling Method: " + method - + "(): objSize cannot be less than or equal to 0."; + String errMsg = + "Calling Method: " + method + "(): objSize cannot be less than or equal to 0."; throw new IllegalArgumentException(errMsg); } } From a1870ccb499e86415cac87f6972cdd84771bea8f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 10:36:43 -0700 Subject: [PATCH 458/553] Update maven.yml workflow to use JDK 17 --- .github/workflows/maven.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 253c70fc..3941404b 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -21,10 +21,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: - java-version: '11' + java-version: '17' distribution: 'temurin' cache: maven - name: Build with Maven From b58b26cce38cc04d804c8455dbfe2eaad2272693 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 13:50:34 -0700 Subject: [PATCH 459/553] Add missing javadocs in 'HashStoreRunnable' class and missing javadocs --- .../dataone/hashstore/HashStoreRunnable.java | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index c7932a4e..5db0a6fc 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -22,28 +22,50 @@ public class HashStoreRunnable implements Runnable { private final String pid; private InputStream objStream; + /** + * Constructor for HashStoreRunnable to store a data object with a given pid + * + * @param hashstore HashStore object to interact with + * @param publicAPIMethod Integer representing action/Public API method (ex. 1 for storeObject) + * @param objStream Stream to data object + * @param pid Persistent or authority-based identifier + */ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", - "HashStoreServiceRequestConstructor"); - FileHashStoreUtility.checkPositive(publicAPIMethod, "HashStoreServiceRequestConstructor"); + "HashStoreRunnableConstructor ~ HashStore object is" + + " null."); + FileHashStoreUtility.checkPositive( + publicAPIMethod, "HashStoreRunnableConstructor ~ Must" + " supply an integer."); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; this.objStream = objStream; this.pid = pid; } + /** + * Constructor for HashStoreRunnable where only a pid is necessary (ex. to delete an object). + * + * @param hashstore HashStore object to interact with + * @param publicAPIMethod Integer representing action/Public API method (ex. 2 for deleteObject) + * @param pid Persistent or authority-based identifier + */ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", - "HashStoreServiceRequestConstructor"); - FileHashStoreUtility.checkPositive(publicAPIMethod, "HashStoreServiceRequestConstructor"); + "HashStoreRunnableConstructor ~ HashStore object is" + + " null."); + FileHashStoreUtility.checkPositive( + publicAPIMethod, "HashStoreRunnableConstructor ~ Must" + " supply an integer."); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; this.pid = pid; } + /** + * Executes a HashStore action (ex. storeObject, deleteObject) + */ public void run() { - log.debug("HashStoreServiceRequest - Called to: " + publicAPIMethod); + log.debug("HashStoreRunnable - Called to: " + publicAPIMethod); try { switch (publicAPIMethod) { case storeObject: @@ -72,7 +94,7 @@ public void run() { } } catch (HashStoreServiceException | IOException hse) { log.error( - "HashStoreServiceRequest ~ Unexpected Error: " + hse.getMessage()); + "HashStoreRunnable ~ Unexpected Error: " + hse.getMessage()); } } } From 9e75b3055a42d3f173694c880b32c68d314c3487 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 15:17:21 -0700 Subject: [PATCH 460/553] Refactor 'FileHashStoreProtected' junit tests to create InputStreams in try statements for automatic resource management --- .../FileHashStoreProtectedTest.java | 817 +++++++++--------- 1 file changed, 398 insertions(+), 419 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index a9f8ec5c..f12e26b8 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -103,14 +103,14 @@ public void findObject_cid() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - Map objInfoMap = fileHashStore.findObject(pid); - assertEquals(objInfoMap.get("cid"), objInfo.getCid()); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + + Map objInfoMap = fileHashStore.findObject(pid); + assertEquals(objInfoMap.get("cid"), objInfo.getCid()); + } } } @@ -123,23 +123,23 @@ public void findObject_cidPath() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - Map objInfoMap = fileHashStore.findObject(pid); - String objectPath = objInfoMap.get("cid_object_path"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + Map objInfoMap = fileHashStore.findObject(pid); + String objectPath = objInfoMap.get("cid_object_path"); - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); - assertEquals(objectPath, realPath.toString()); + assertEquals(objectPath, realPath.toString()); + } } } @@ -152,21 +152,21 @@ public void findObject_refsPaths() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); - Map objInfoMap = fileHashStore.findObject(pid); - String cidRefsPath = objInfoMap.get("cid_refs_path"); - String pidRefsPath = objInfoMap.get("pid_refs_path"); + Map objInfoMap = fileHashStore.findObject(pid); + String cidRefsPath = objInfoMap.get("cid_refs_path"); + String pidRefsPath = objInfoMap.get("pid_refs_path"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), "cid"); - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), "cid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - assertEquals(cidRefsPath, cidRefsFilePath.toString()); - assertEquals(pidRefsPath, pidRefsFilePath.toString()); + assertEquals(cidRefsPath, cidRefsFilePath.toString()); + assertEquals(pidRefsPath, pidRefsFilePath.toString()); + } } } @@ -178,30 +178,24 @@ public void findObject_sysmetaPath_exists() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - - // Store Object - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - // Store Metadata Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); - System.out.println(metadataPath); + try (InputStream dataStream = Files.newInputStream(testDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - Map objInfoMap = fileHashStore.findObject(pid); - String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + // Store Metadata + fileHashStore.storeMetadata(metadataStream, pid); - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path sysmetaPath = fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); - System.out.println(sysmetaPath); + Map objInfoMap = fileHashStore.findObject(pid); + String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); - assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path sysmetaPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + + assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); + } } } @@ -214,17 +208,16 @@ public void findObject_sysmetaPath_doesNotExist() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); - Map objInfoMap = fileHashStore.findObject(pid); - String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + Map objInfoMap = fileHashStore.findObject(pid); + String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); - assertEquals(objInfoSysmetaPath, "Does not exist"); + assertEquals(objInfoSysmetaPath, "Does not exist"); + } } } @@ -292,13 +285,13 @@ public void putObject_testHarness_id() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); - // Check id (sha-256 hex digest of the ab_id, aka object_cid) - String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, address.getCid()); + // Check id (sha-256 hex digest of the ab_id, aka object_cid) + String objContentId = testData.pidData.get(pid).get("sha256"); + assertEquals(objContentId, address.getCid()); + } } } @@ -311,13 +304,15 @@ public void putObject_objSize() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.getSize()); + } + - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); } } @@ -330,23 +325,23 @@ public void putObject_testHarness_hexDigests() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - - Map hexDigests = address.getHexDigests(); - - // Validate checksum values - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + + Map hexDigests = address.getHexDigests(); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } } } @@ -361,18 +356,18 @@ public void putObject_validateChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); - String objCid = address.getCid(); - // Get relative path - String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path objCidAbsPath = storePath.resolve("objects/" + objCidShardString); + String objCid = address.getCid(); + // Get relative path + String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); + // Get absolute path + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + Path objCidAbsPath = storePath.resolve("objects/" + objCidShardString); - assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(objCidAbsPath)); + } } /** @@ -386,12 +381,12 @@ public void putObject_additionalAlgo_correctChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "MD2", null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, "MD2", null, null, -1); - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(checksumCorrect, md2); + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(checksumCorrect, md2); + } } /** @@ -406,9 +401,9 @@ public void putObject_incorrectChecksumValue() { String checksumIncorrect = "1c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", -1); + } }); } @@ -422,9 +417,9 @@ public void putObject_emptyChecksumValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, " ", "MD2", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, " ", "MD2", -1); + } }); } @@ -438,9 +433,9 @@ public void putObject_nullChecksumValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, "MD2", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, null, "MD2", -1); + } }); } @@ -454,9 +449,9 @@ public void putObject_emptyChecksumAlgorithmValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", " ", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, "abc", " ", -1); + } }); } @@ -469,9 +464,10 @@ public void putObject_nullChecksumAlgorithmValue() { // Get test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", null, -1); - dataStream.close(); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, "abc", null, -1); + } }); } @@ -486,12 +482,12 @@ public void putObject_objSizeCorrect() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); - // Check id (sha-256 hex digest of the ab_id (pid)) - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + assertEquals(objectSize, objInfo.getSize()); + } } } @@ -505,13 +501,13 @@ public void putObject_objSizeIncorrect() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, 1000); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, 1000); - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.getSize()); + } }); } } @@ -526,15 +522,16 @@ public void putObject_duplicateObject() throws Exception { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, null, null, -1); + } + // Try duplicate upload String pidTwo = pid + ".test"; - InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStreamTwo, pidTwo, null, null, null, -1); - dataStreamTwo.close(); + try (InputStream dataStreamTwo = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStreamTwo, pidTwo, null, null, null, -1); + } // Confirm there are no files in 'objects/tmp' directory Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -552,9 +549,9 @@ public void putObject_invalidAlgorithm() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "SM2", null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, "SM2", null, null, -1); + } }); } @@ -568,9 +565,9 @@ public void putObject_emptyAlgorithm() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, " ", null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, " ", null, null, -1); + } }); } @@ -812,22 +809,22 @@ public void writeToTmpFileAndGenerateChecksums() throws Exception { // Get test file Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, null); - dataStream.close(); - - // Validate checksum values - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, null); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } } } @@ -846,9 +843,9 @@ public void writeToTmpFileAndGenerateChecksums_tmpFileSize() throws Exception { // Extra algo to calculate - MD2 String addAlgo = "MD2"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, null); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, null); + } long testDataFileSize = Files.size(testDataFile); long tmpFileSize = Files.size(newTmpFile.toPath()); @@ -871,15 +868,14 @@ public void writeToTmpFileAndGenerateChecksums_addAlgo() throws Exception { // Extra algo to calculate - MD2 String addAlgo = "MD2"; - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, - null); - dataStream.close(); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(md2, hexDigests.get("MD2")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, + null); + // Validate checksum values + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(md2, hexDigests.get("MD2")); + } } } @@ -898,15 +894,14 @@ public void writeToTmpFileAndGenerateChecksums_checksumAlgo() throws Exception { // Extra algo to calculate - MD2 String checksumAlgo = "SHA-512/224"; - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, - checksumAlgo); - dataStream.close(); - - // Validate checksum values - String sha512224 = testData.pidData.get(pid).get("sha512-224"); - assertEquals(sha512224, hexDigests.get("SHA-512/224")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, + checksumAlgo); + // Validate checksum values + String sha512224 = testData.pidData.get(pid).get("sha512-224"); + assertEquals(sha512224, hexDigests.get("SHA-512/224")); + } } } @@ -926,17 +921,16 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce String addAlgo = "MD2"; String checksumAlgo = "SHA-512/224"; - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, - checksumAlgo); - dataStream.close(); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - String sha512224 = testData.pidData.get(pid).get("sha512-224"); - assertEquals(md2, hexDigests.get("MD2")); - assertEquals(sha512224, hexDigests.get("SHA-512/224")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, + checksumAlgo); + // Validate checksum values + String md2 = testData.pidData.get(pid).get("md2"); + String sha512224 = testData.pidData.get(pid).get("sha512-224"); + assertEquals(md2, hexDigests.get("MD2")); + assertEquals(sha512224, hexDigests.get("SHA-512/224")); + } } } @@ -956,10 +950,10 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { // Extra algo to calculate - MD2 String addAlgo = "SM2"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, null); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.writeToTmpFileAndGenerateChecksums( + newTmpFile, dataStream, addAlgo, null); + } }); } } @@ -1039,23 +1033,23 @@ public void deleteObjectByCid() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - // Store object only - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - String cid = objInfo.getCid(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + // Store object only + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.getCid(); - // Try deleting the object - fileHashStore.deleteObjectByCid(cid); + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); - // Get permanent address of the actual cid - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + // Get permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); - Path objRealPath = storePath.resolve("objects").resolve(objShardString); - assertFalse(Files.exists(objRealPath)); + Path objRealPath = storePath.resolve("objects").resolve(objShardString); + assertFalse(Files.exists(objRealPath)); + } } } @@ -1069,17 +1063,17 @@ public void deleteObjectByCid_cidRefsFileContainsPids() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - String cid = objInfo.getCid(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.getCid(); - // Try deleting the object - fileHashStore.deleteObjectByCid(cid); + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); - // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); - assertTrue(Files.exists(objRealPath)); + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objRealPath)); + } } } @@ -1652,15 +1646,15 @@ public void putMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); - // Calculate absolute path - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = - fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); - assertEquals(metadataPath, metadataPidExpectedPath.toString()); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); + } } } @@ -1687,10 +1681,9 @@ public void putMetadata_pidNull() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.putMetadata(metadataStream, null, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.putMetadata(metadataStream, null, null); + } }); } } @@ -1707,10 +1700,9 @@ public void putMetadata_pidEmpty() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.putMetadata(metadataStream, "", null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.putMetadata(metadataStream, "", null); + } }); } } @@ -1727,10 +1719,9 @@ public void putMetadata_pidEmptySpaces() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.putMetadata(metadataStream, " ", null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.putMetadata(metadataStream, " ", null); + } }); } } @@ -1747,10 +1738,10 @@ public void writeToTmpMetadataFile() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); - metadataStream.close(); - assertTrue(metadataWritten); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + assertTrue(metadataWritten); + } } } @@ -1766,15 +1757,15 @@ public void writeToTmpMetadataFile_tmpFileSize() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); - metadataStream.close(); - assertTrue(metadataWritten); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + assertTrue(metadataWritten); - long tmpMetadataFileSize = Files.size(newTmpFile.toPath()); - long testMetadataFileSize = Files.size(testMetaDataFile); - assertTrue(tmpMetadataFileSize > 0); - assertEquals(tmpMetadataFileSize, testMetadataFileSize); + long tmpMetadataFileSize = Files.size(newTmpFile.toPath()); + long testMetadataFileSize = Files.size(testMetaDataFile); + assertTrue(tmpMetadataFileSize > 0); + assertEquals(tmpMetadataFileSize, testMetadataFileSize); + } } } @@ -1786,48 +1777,36 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { for (String pid : testData.pidList) { File newTmpFile = generateTemporaryFile(); String pidFormatted = pid.replace("/", "_"); - // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - // Write it to the tmpFile - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); - metadataStream.close(); // Create InputStream to tmp File - InputStream metadataStoredStream; - try { - metadataStoredStream = Files.newInputStream(newTmpFile.toPath()); - - } catch (Exception e) { - e.printStackTrace(); - throw e; - - } + try (InputStream metadataStoredStream = Files.newInputStream(newTmpFile.toPath()); + InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + // Write it to the tmpFile + + fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + // Calculate checksum of metadata content + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = metadataStoredStream.read(buffer)) != -1) { + sha256.update(buffer, 0, bytesRead); + } + + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; - // Calculate checksum of metadata content - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = metadataStoredStream.read(buffer)) != -1) { - sha256.update(buffer, 0, bytesRead); } - metadataStoredStream.close(); - - } catch (IOException ioe) { - ioe.printStackTrace(); - throw ioe; + String sha256Digest = + DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256MetadataDigestFromTestData = + testData.pidData.get(pid).get("metadata_cid_sha256"); + assertEquals(sha256Digest, sha256MetadataDigestFromTestData); } - - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha256MetadataDigestFromTestData = - testData.pidData.get(pid).get("metadata_cid_sha256"); - assertEquals(sha256Digest, sha256MetadataDigestFromTestData); - - // Close stream - metadataStoredStream.close(); } } @@ -1842,39 +1821,38 @@ public void syncRenameMetadataDocForDeletion_renamesAsExpected() throws Exceptio // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metaStream = Files.newInputStream(testMetaDataFile); - String pathToMetadata = fileHashStore.putMetadata(metaStream, pid, null); - String pathToMetadataTwo = fileHashStore.putMetadata(metaStream, pid, "ns.test.1"); - String pathToMetadataThree = - fileHashStore.putMetadata(metaStream, pid, "ns.test" + ".3"); - metaStream.close(); - - // Confirm that metadata documents are present - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, pidHexDigest - ); - Path expectedPidMetadataDirectory = - storePath.resolve("metadata").resolve(pidRelativePath); - List metadataDocPaths = - FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); - - assertEquals(3, metadataDocPaths.size()); - - Collection deleteList = - fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths); - - Collection renamedDocStrings = new ArrayList<>(); - for (Path renamedDoc : deleteList) { - renamedDocStrings.add(renamedDoc.toString()); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); + String pathToMetadataTwo = fileHashStore.putMetadata(metadataStream, pid, "ns.test.1"); + String pathToMetadataThree = fileHashStore.putMetadata(metadataStream, pid, "ns.test" + ".3"); + + // Confirm that metadata documents are present + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, pidHexDigest + ); + Path expectedPidMetadataDirectory = + storePath.resolve("metadata").resolve(pidRelativePath); + List metadataDocPaths = + FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); + + assertEquals(3, metadataDocPaths.size()); + + Collection deleteList = + fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths); + + Collection renamedDocStrings = new ArrayList<>(); + for (Path renamedDoc : deleteList) { + renamedDocStrings.add(renamedDoc.toString()); + } + assertTrue(renamedDocStrings.contains(pathToMetadata + "_delete")); + assertTrue(renamedDocStrings.contains(pathToMetadataTwo + "_delete")); + assertTrue(renamedDocStrings.contains(pathToMetadataThree + "_delete")); } - assertTrue(renamedDocStrings.contains(pathToMetadata + "_delete")); - assertTrue(renamedDocStrings.contains(pathToMetadataTwo + "_delete")); - assertTrue(renamedDocStrings.contains(pathToMetadataThree + "_delete")); } } @@ -1908,18 +1886,19 @@ public void isStringInRefsFile_pidFound() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } String pidTwo = pid + ".test"; - InputStream dataStreamDup = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); - dataStreamDup.close(); - String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); + try (InputStream dataStreamDup = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); + + String cid = objInfo.getCid(); + Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); + } } } @@ -1932,13 +1911,13 @@ public void isStringInRefsFile_pidNotFound() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); + String cid = objInfo.getCid(); + Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); + } } } @@ -1951,21 +1930,21 @@ public void getHashStoreDataObjectPath() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - String cid = objInfo.getCid(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.getCid(); - // Manually form the permanent address of the actual cid - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); - Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); - Path expectedObjCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path expectedObjCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - assertEquals(expectedObjCidAbsPath, calculatedObjRealPath); + assertEquals(expectedObjCidAbsPath, calculatedObjRealPath); + } } } @@ -1980,33 +1959,33 @@ public void getHashStoreMetadataPath() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid); - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - String storeFormatId = fhsProperties.getProperty("storeMetadataNamespace"); - String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + String storeFormatId = fhsProperties.getProperty("storeMetadataNamespace"); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // Document ID - String hashId = FileHashStoreUtility.getPidHexDigest(pid + storeFormatId, storeAlgo); + // Document ID + String hashId = FileHashStoreUtility.getPidHexDigest(pid + storeFormatId, storeAlgo); - // Metadata directory of the given pid - String metadataPidDirId = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String metadataPidDirIdSharded = - FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, - metadataPidDirId); + // Metadata directory of the given pid + String metadataPidDirId = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String metadataPidDirIdSharded = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidDirId); - // Complete path - Path calculatedMetadataRealPath = - storePath.resolve("metadata").resolve(metadataPidDirIdSharded).resolve(hashId); + // Complete path + Path calculatedMetadataRealPath = + storePath.resolve("metadata").resolve(metadataPidDirIdSharded).resolve(hashId); - Path expectedMetadataPidPath = - fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); + Path expectedMetadataPidPath = + fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); - assertEquals(expectedMetadataPidPath, calculatedMetadataRealPath); + assertEquals(expectedMetadataPidPath, calculatedMetadataRealPath); + } } } @@ -2021,14 +2000,14 @@ public void getHashStoreMetadataInputStream() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream metadataCidInputStream = fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId); - assertNotNull(metadataCidInputStream); + InputStream metadataCidInputStream = fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId); + assertNotNull(metadataCidInputStream); + } } } @@ -2056,27 +2035,27 @@ public void getHashStoreRefsPath_pid() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - // Manually form the permanent address of the actual cid - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); - // Pid refs file - String metadataPidHash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String metadataPidHashSharded = - FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, - metadataPidHash); - Path calculatedPidRefsRealPath = - storePath.resolve("refs/pids").resolve(metadataPidHashSharded); + // Pid refs file + String metadataPidHash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String metadataPidHashSharded = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidHash); + Path calculatedPidRefsRealPath = + storePath.resolve("refs/pids").resolve(metadataPidHashSharded); - Path expectedPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path expectedPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - assertEquals(expectedPidRefsPath, calculatedPidRefsRealPath); + assertEquals(expectedPidRefsPath, calculatedPidRefsRealPath); + } } } @@ -2089,23 +2068,23 @@ public void getHashStoreRefsPath_cid() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); - String cid = objInfo.getCid(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.getCid(); - // Manually form the permanent address of the actual cid - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // Cid refs file - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); - Path calculatedCidRefsRealPath = storePath.resolve("refs/cids").resolve(objShardString); + // Cid refs file + String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + Path calculatedCidRefsRealPath = storePath.resolve("refs/cids").resolve(objShardString); - Path expectedCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path expectedCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertEquals(expectedCidRefsPath, calculatedCidRefsRealPath); + assertEquals(expectedCidRefsPath, calculatedCidRefsRealPath); + } } } @@ -2168,15 +2147,15 @@ public void fileHashStoreUtility_renamePathForDeletion() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); - Path metadataPath = Paths.get(pathToMetadata); - FileHashStoreUtility.renamePathForDeletion(metadataPath); + Path metadataPath = Paths.get(pathToMetadata); + FileHashStoreUtility.renamePathForDeletion(metadataPath); - Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); - assertTrue(Files.exists(expectedMetadataPathRenamed)); + Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); + assertTrue(Files.exists(expectedMetadataPathRenamed)); + } } } @@ -2191,20 +2170,20 @@ public void fileHashStoreUtility_renamePathForRestoration() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); - Path metadataPath = Paths.get(pathToMetadata); - FileHashStoreUtility.renamePathForDeletion(metadataPath); + Path metadataPath = Paths.get(pathToMetadata); + FileHashStoreUtility.renamePathForDeletion(metadataPath); - Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); - assertFalse(Files.exists(metadataPath)); - assertTrue(Files.exists(expectedMetadataPathRenamed)); + Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); + assertFalse(Files.exists(metadataPath)); + assertTrue(Files.exists(expectedMetadataPathRenamed)); - FileHashStoreUtility.renamePathForRestoration(expectedMetadataPathRenamed); - assertFalse(Files.exists(expectedMetadataPathRenamed)); - assertTrue(Files.exists(metadataPath)); + FileHashStoreUtility.renamePathForRestoration(expectedMetadataPathRenamed); + assertFalse(Files.exists(expectedMetadataPathRenamed)); + assertTrue(Files.exists(metadataPath)); + } } } } From 58e73ab75cfcd0b09bcba25d637742574197db12 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 15:26:49 -0700 Subject: [PATCH 461/553] Refactor 'FileHashStoreInit' junit tests to create InputStreams in try statements for automatic resource management --- .../hashstore/filehashstore/FileHashStoreInitTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java index eb0bdae1..445a2988 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java @@ -433,9 +433,9 @@ public void testExistingHashStoreConfiguration_missingYaml() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - secondHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + secondHashStore.storeObject(dataStream, pid, null, null, null, -1); + } } // Delete configuration From 252cefa500444ae22ea05f890d667a1ace2e12e2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 16:11:38 -0700 Subject: [PATCH 462/553] Refactor 'FileHashStoreClient' junit tests to create InputStreams in try statements for automatic resource management --- .../hashstore/HashStoreClientTest.java | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 96f0cb58..312aff78 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -275,9 +275,10 @@ public void client_retrieveObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + hashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Call client String optRetrieveObject = "-retrieveobject"; @@ -311,9 +312,10 @@ public void client_retrieveMetadata() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - hashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + hashStore.storeMetadata(metadataStream, pid); + } // Call client String optRetrieveMetadata = "-retrievemetadata"; @@ -350,9 +352,10 @@ public void client_deleteObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + hashStore.storeObject(dataStream, pid, null, null, null, -1);; + } // Call client String optDeleteObject = "-deleteobject"; @@ -390,9 +393,10 @@ public void client_deleteMetadata() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - hashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + hashStore.storeMetadata(metadataStream, pid); + } // Call client String optDeleteMetadata = "-deletemetadata"; @@ -433,9 +437,10 @@ public void client_getHexDigest() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + hashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Call client String optGetChecksum = "-getchecksum"; From d961d35b7ba6c954e2dc58cedaf33e7432bb72ba Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 16:13:51 -0700 Subject: [PATCH 463/553] Refactor 'HashStore' junit tests to create InputStreams in try statements for automatic resource management --- .../org/dataone/hashstore/HashStoreClientTest.java | 2 +- .../java/org/dataone/hashstore/HashStoreTest.java | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 312aff78..15bb683c 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -354,7 +354,7 @@ public void client_deleteObjects() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - hashStore.storeObject(dataStream, pid, null, null, null, -1);; + hashStore.storeObject(dataStream, pid, null, null, null, -1); } // Call client diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 92607ab0..fbb3249e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -126,13 +126,13 @@ public void hashStore_storeObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); - // Check id (sha-256 hex digest of the ab_id, aka object_cid) - String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, objInfo.getCid()); + // Check id (sha-256 hex digest of the ab_id, aka object_cid) + String objContentId = testData.pidData.get(pid).get("sha256"); + assertEquals(objContentId, objInfo.getCid()); + } } } From 5542a9cbc326a38d73c796e799dfa5b67abb56fa Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 16:20:27 -0700 Subject: [PATCH 464/553] Refactor 'FileHashStoreUtility' class' calculateHexDigest method to use try statement on the given stream for automatic resource management --- .../hashstore/filehashstore/FileHashStoreUtility.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 7d0b8227..07323ea1 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -58,7 +58,7 @@ public static void ensureNotNull(Object object, String argument, String method) public static String calculateHexDigest(InputStream dataStream, String algorithm) throws IOException, NoSuchAlgorithmException { MessageDigest mdObject = MessageDigest.getInstance(algorithm); - try { + try (dataStream) { byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { @@ -70,9 +70,6 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); throw new IOException(errMsg); - } finally { - // Close dataStream - dataStream.close(); } // mdObjectHexDigest return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); From 1c61165423d44a808a60bd9887517bcf9335c075 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 16:27:30 -0700 Subject: [PATCH 465/553] Refactor 'FileHashStore' to use try statements for automatic resource management and 'buildHashStoreYamlString' as a text block for clarity --- .../filehashstore/FileHashStore.java | 82 +++++++++---------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index be62c44a..db8d7a88 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -366,34 +366,39 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { protected String buildHashStoreYamlString( int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace ) { - return String.format( - "# Default configuration variables for HashStore\n\n" - + "############### Directory Structure ###############\n" - + "# Desired amount of directories when sharding an object to " - + "form the permanent address\n" - + "store_depth: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP " + "NEW HASHSTORE\n" - + "# Width of directories created when sharding an object to " - + "form the permanent address\n" - + "store_width: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP " + "NEW HASHSTORE\n" - + "# Example:\n" + "# Below, objects are shown listed in directories that are # " - + "levels deep (DIR_DEPTH=3),\n" - + "# with each directory consisting of 2 characters " + "(DIR_WIDTH=2).\n" - + "# /var/filehashstore/objects\n" + "# ├── 7f\n" + "# │ └── 5c\n" - + "# │ └── c1\n" + "# │ └── " - + "8f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6\n\n" - + "############### Format of the Metadata ###############\n" - + "store_metadata_namespace: \"%s\"\n" - + "############### Hash Algorithms ###############\n" - + "# Hash algorithm to use when calculating object's hex digest " - + "for the permanent address\n" + "store_algorithm: \"%s\"\n" - + "############### Hash Algorithms ###############\n" - + "# Hash algorithm to use when calculating object's hex digest " - + "for the permanent address\n" - + "# The default algorithm list includes the hash algorithms " - + "calculated when storing an\n" - + "# object to disk and returned to the caller after successful " + "storage.\n" - + "store_default_algo_list:\n" + "- \"MD5\"\n" + "- \"SHA-1\"\n" + "- \"SHA-256\"\n" - + "- \"SHA-384\"\n" + "- \"SHA-512\"\n", storeDepth, storeWidth, storeMetadataNamespace, storeAlgorithm + return String.format(""" + # Default configuration variables for HashStore + + ############### Directory Structure ############### + # Desired amount of directories when sharding an object to form the permanent address + store_depth: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP NEW HASHSTORE + # Width of directories created when sharding an object to form the permanent address + store_width: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP NEW HASHSTORE + # Example: + # Below, objects are shown listed in directories that are # levels deep (DIR_DEPTH=3), + # with each directory consisting of 2 characters (DIR_WIDTH=2). + # /var/filehashstore/objects + # ├── 7f + # │ └── 5c + # │ └── c1 + # │ └── 8f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6 + + ############### Format of the Metadata ############### + store_metadata_namespace: "%s" + ############### Hash Algorithms ############### + # Hash algorithm to use when calculating object's hex digest for the permanent address + store_algorithm: "%s" + ############### Hash Algorithms ############### + # Hash algorithm to use when calculating object's hex digest for the permanent address + # The default algorithm list includes the hash algorithms calculated when storing an + # object to disk and returned to the caller after successful storage. + store_default_algo_list: + - "MD5" + - "SHA-1" + - "SHA-256" + - "SHA-384" + - "SHA-512" + """, storeDepth, storeWidth, storeMetadataNamespace, storeAlgorithm ); } @@ -425,14 +430,11 @@ public ObjectMetadata storeObject( FileHashStoreUtility.checkPositive(objSize, "storeObject"); } - try { + try (object) { return syncPutObject( - object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize - ); - } finally { - // Close stream - object.close(); + object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize); } + // Close stream } /** @@ -522,12 +524,10 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // call 'deleteInvalidObject' (optional) to check that the object is valid, and then // 'tagObject' (required) to create the reference files needed to associate the // respective pids/cids. - try { + try (object) { return putObject(object, "HashStoreNoPid", null, null, null, -1); - } finally { - // Close stream - object.close(); } + // Close stream } @@ -592,12 +592,10 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) checkedFormatId = formatId; } - try { + try (metadata) { return syncPutMetadata(metadata, pid, checkedFormatId); - } finally { - // Close stream - metadata.close(); } + // Close stream } /** From 6591f4429d1c30e792461d0d52fd0e790c2d2273 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 12 Aug 2024 17:02:40 -0700 Subject: [PATCH 466/553] Refactor 'FileHashStoreInterface' junit tests to create InputStreams in try statements for automatic resource management where relevant --- .../FileHashStoreInterfaceTest.java | 1059 ++++++++--------- 1 file changed, 513 insertions(+), 546 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 10bdfd02..bc724d8c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -101,16 +101,16 @@ public void storeObject() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); - // Check id (content identifier based on the store algorithm) - String objectCid = testData.pidData.get(pid).get("sha256"); - assertEquals(objectCid, objInfo.getCid()); - assertEquals(pid, objInfo.getPid()); + // Check id (content identifier based on the store algorithm) + String objectCid = testData.pidData.get(pid).get("sha256"); + assertEquals(objectCid, objInfo.getCid()); + assertEquals(pid, objInfo.getPid()); + } } } @@ -123,15 +123,17 @@ public void storeObject_objSize() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + + // Check the object size + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.getSize()); + } + - // Check the object size - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); } } @@ -144,25 +146,25 @@ public void storeObject_hexDigests() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + + Map hexDigests = objInfo.getHexDigests(); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } } } @@ -187,9 +189,9 @@ public void storeObject_nullPid() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, null, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, null, null, null, null, -1); + } }); } } @@ -204,9 +206,9 @@ public void storeObject_emptyPid() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, "", null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, "", null, null, null, -1); + } }); } } @@ -221,9 +223,9 @@ public void storeObject_pidWithNewLine() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, "dou.test.1\n", null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, "dou.test.1\n", null, null, null, -1); + } }); } } @@ -238,9 +240,9 @@ public void storeObject_pidWithTab() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, "dou.test.1\t", null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, "dou.test.1\t", null, null, null, -1); + } }); } } @@ -255,9 +257,9 @@ public void storeObject_zeroObjSize() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + } }); } } @@ -272,20 +274,20 @@ public void storeObject_overloadInputStreamOnly() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - Map hexDigests = objInfo.getHexDigests(); - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - String cid = objInfo.getCid(); + Map hexDigests = objInfo.getHexDigests(); + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + String cid = objInfo.getCid(); - assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); + assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); - assertThrows(FileNotFoundException.class, () -> fileHashStore.findObject(pid)); + assertThrows(FileNotFoundException.class, () -> fileHashStore.findObject(pid)); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertFalse(Files.exists(cidRefsFilePath)); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertFalse(Files.exists(cidRefsFilePath)); + } } } @@ -300,12 +302,12 @@ public void storeObject_validateChecksumValue() throws Exception { String checksumCorrect = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); - Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - assertTrue(Files.exists(objCidAbsPath)); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objCidAbsPath)); + } } /** @@ -319,12 +321,12 @@ public void storeObject_correctChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "MD2", null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, "MD2", null, null, -1); - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(checksumCorrect, md2); + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(checksumCorrect, md2); + } } /** @@ -340,9 +342,9 @@ public void storeObject_incorrectChecksumValue() { String checksumIncorrect = "aaf9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", -1); + } }); } @@ -358,9 +360,9 @@ public void storeObject_emptyChecksumValue() { String checksumEmpty = ""; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", -1); + } }); } @@ -374,9 +376,9 @@ public void storeObject_nullChecksumValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", -1); + } }); } @@ -390,14 +392,14 @@ public void storeObject_objSizeCorrect() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, objectSize - ); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, objectSize + ); - // Check id (sha-256 hex digest of the ab_id (pid)) - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + assertEquals(objectSize, objInfo.getSize()); + } } } @@ -411,15 +413,15 @@ public void storeObject_objSizeIncorrect() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 1000 - ); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, 1000 + ); - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.getSize()); + } }); } } @@ -434,9 +436,9 @@ public void storeObject_invalidAlgorithm() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "SM2", null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, "SM2", null, null, -1); + } }); } @@ -450,21 +452,20 @@ public void storeObject_duplicate() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile); + InputStream dataStreamDup = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String pidTwo = pid + ".test"; - InputStream dataStreamDup = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStreamDup, pidTwo, null, null, null, -1 - ); - dataStreamDup.close(); + String pidTwo = pid + ".test"; + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStreamDup, pidTwo, null, null, null, -1 + ); - String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); - assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); + String cid = objInfo.getCid(); + Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); + assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); + } } } @@ -478,10 +479,10 @@ public void storeObject_largeSparseFile() throws Exception { long fileSize = 1024L * 1024L * 1024L; // 1GB // Get tmp directory to initially store test file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path testFilePath = storePath.resolve("random_file.bin"); + Path testDataFile = storePath.resolve("random_file.bin"); // Generate a random file with the specified size - try (FileOutputStream fileOutputStream = new FileOutputStream(testFilePath.toString())) { + try (FileOutputStream fileOutputStream = new FileOutputStream(testDataFile.toString())) { FileChannel fileChannel = fileOutputStream.getChannel(); FileLock lock = fileChannel.lock(); fileChannel.position(fileSize - 1); @@ -492,13 +493,13 @@ public void storeObject_largeSparseFile() throws Exception { throw ioe; } - InputStream dataStream = Files.newInputStream(testFilePath); - String pid = "dou.sparsefile.1"; - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + String pid = "dou.sparsefile.1"; + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - assertTrue(Files.exists(objCidAbsPath)); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objCidAbsPath)); + } } @@ -511,10 +512,10 @@ public void storeObject_interruptProcess() throws Exception { long fileSize = 1024L * 1024L * 1024L; // 1GB // Get tmp directory to initially store test file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path testFilePath = storePath.resolve("random_file.bin"); + Path testDataFile = storePath.resolve("random_file.bin"); // Generate a random file with the specified size - try (FileOutputStream fileOutputStream = new FileOutputStream(testFilePath.toString())) { + try (FileOutputStream fileOutputStream = new FileOutputStream(testDataFile.toString())) { FileChannel fileChannel = fileOutputStream.getChannel(); FileLock lock = fileChannel.lock(); fileChannel.position(fileSize - 1); @@ -526,11 +527,10 @@ public void storeObject_interruptProcess() throws Exception { } Thread toInterrupt = new Thread(() -> { - try { - InputStream dataStream = Files.newInputStream(testFilePath); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + } catch (IOException | NoSuchAlgorithmException | InterruptedException ioe) { ioe.printStackTrace(); } @@ -571,12 +571,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { // Submit 5 futures to the thread pool, each calling storeObject Future future1 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -592,12 +590,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } }); Future future2 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -613,12 +609,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } }); Future future3 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -634,12 +628,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } }); Future future4 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -655,12 +647,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } }); Future future5 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - dataStream.close(); if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -828,28 +818,28 @@ public void deleteIfInvalidObject_correctValues() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get deleteIfInvalidObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + // Get deleteIfInvalidObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.deleteIfInvalidObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize); + fileHashStore.deleteIfInvalidObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } } } @@ -878,7 +868,6 @@ public void deleteIfInvalidObject_objInfoEmptyHexDigests() { public void deleteIfInvalidObject_objInfoNullHexDigests() { String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; long size = 1999999; - Map hexDigests = null; ObjectMetadata objInfo = new ObjectMetadata(null, id, size, null); assertThrows( @@ -896,25 +885,25 @@ public void deleteIfInvalidObject_supportedAlgoNotInDefaultList() throws Excepti String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - // Get deleteIfInvalidObject args - String expectedChecksum = testData.pidData.get(pid).get("md2"); - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + // Get deleteIfInvalidObject args + String expectedChecksum = testData.pidData.get(pid).get("md2"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, "MD2", expectedSize); + fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, "MD2", expectedSize); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } } } @@ -928,23 +917,23 @@ public void deleteIfInvalidObject_unsupportedAlgo() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - assertThrows( - UnsupportedHashAlgorithmException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertThrows( + UnsupportedHashAlgorithmException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } } } @@ -957,30 +946,30 @@ public void deleteIfInvalidObject_mismatchedSize() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get deleteIfInvalidObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = 123456789; + // Get deleteIfInvalidObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = 123456789; - assertThrows( - NonMatchingObjSizeException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertThrows( + NonMatchingObjSizeException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } } } @@ -993,30 +982,30 @@ public void deleteIfInvalidObject_mismatchedChecksum() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - // Get deleteIfInvalidObject args - String expectedChecksum = "intentionallyWrongValue"; - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + // Get deleteIfInvalidObject args + String expectedChecksum = "intentionallyWrongValue"; + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertThrows( - NonMatchingChecksumException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() - ); - // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertThrows( + NonMatchingChecksumException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, + expectedSize)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, objInfo.getCid() + ); + // Real path to the data object + assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( + "objects").resolve(objRelativePath))); + } } } @@ -1031,17 +1020,18 @@ public void storeMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + metadataStream.close(); - // Calculate absolute path - Path metadataPidExpectedPath = - fileHashStore.getHashStoreMetadataPath(pid, testFormatId); + // Calculate absolute path + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, testFormatId); - assertEquals(metadataPidExpectedPath.toString(), metadataPath); - assertTrue(Files.exists(metadataPidExpectedPath)); + assertEquals(metadataPidExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataPidExpectedPath)); + } } } @@ -1052,21 +1042,19 @@ public void storeMetadata() throws Exception { public void storeMetadata_defaultFormatId_overload() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); - - // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); - // Calculate absolute path - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataPidExpectedPath = - fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); - assertEquals(metadataPidExpectedPath.toString(), metadataPath); - assertTrue(Files.exists(metadataPidExpectedPath)); + assertEquals(metadataPidExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataPidExpectedPath)); + } } } @@ -1081,22 +1069,22 @@ public void storeMetadata_pidHashIsDirectory() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid); - String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String metadataPidhash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, metadataPidhash - ); - Path expectedPidMetadataDirectory = rootDirectory.resolve("metadata").resolve( - pidMetadataDirectory - ); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String metadataPidhash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, metadataPidhash + ); + Path expectedPidMetadataDirectory = rootDirectory.resolve("metadata").resolve( + pidMetadataDirectory + ); - assertTrue(Files.isDirectory(expectedPidMetadataDirectory)); + assertTrue(Files.isDirectory(expectedPidMetadataDirectory)); + } } } @@ -1111,26 +1099,24 @@ public void storeMetadata_multipleFormatIds() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); - metadataStream.close(); - - InputStream metadataStreamDup = Files.newInputStream(testMetaDataFile); - String metadataDefaultPath = fileHashStore.storeMetadata(metadataStreamDup, pid); - metadataStreamDup.close(); - - // Calculate absolute path - Path metadataTestFormatIdExpectedPath = - fileHashStore.getHashStoreMetadataPath(pid, testFormatId); - String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataDefaultExpectedPath = - fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); - - assertEquals(metadataTestFormatIdExpectedPath.toString(), metadataPath); - assertTrue(Files.exists(metadataTestFormatIdExpectedPath)); - assertEquals(metadataDefaultExpectedPath.toString(), metadataDefaultPath); - assertTrue(Files.exists(metadataDefaultExpectedPath)); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamDup = Files.newInputStream(testMetaDataFile)) { + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + String metadataDefaultPath = fileHashStore.storeMetadata(metadataStreamDup, pid); + + // Calculate absolute path + Path metadataTestFormatIdExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, testFormatId); + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataDefaultExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + + assertEquals(metadataTestFormatIdExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataTestFormatIdExpectedPath)); + assertEquals(metadataDefaultExpectedPath.toString(), metadataDefaultPath); + assertTrue(Files.exists(metadataDefaultExpectedPath)); + } } } @@ -1145,13 +1131,13 @@ public void storeMetadata_fileSize() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, null); - long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(Paths.get(metadataPath)); - assertEquals(writtenMetadataFile, originalMetadataFie); + long writtenMetadataFile = Files.size(testMetaDataFile); + long originalMetadataFie = Files.size(Paths.get(metadataPath)); + assertEquals(writtenMetadataFile, originalMetadataFie); + } } } @@ -1179,10 +1165,9 @@ public void storeMetadata_pidNull() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.storeMetadata(metadataStream, null, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, null, null); + } }); } } @@ -1199,10 +1184,9 @@ public void storeMetadata_pidEmpty() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.storeMetadata(metadataStream, "", null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, "", null); + } }); } } @@ -1219,10 +1203,9 @@ public void storeMetadata_pidEmptySpaces() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.storeMetadata(metadataStream, " ", null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, " ", null); + } }); } } @@ -1250,11 +1233,9 @@ public void storeMetadata_metadataLockedIds() throws Exception { // Submit 3 threads, each calling storeMetadata Future future1 = executorService.submit(() -> { - try { + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); - metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path metadataPidExpectedPath = @@ -1265,11 +1246,9 @@ public void storeMetadata_metadataLockedIds() throws Exception { } }); Future future2 = executorService.submit(() -> { - try { + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); - metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path metadataPidExpectedPath = @@ -1280,11 +1259,9 @@ public void storeMetadata_metadataLockedIds() throws Exception { } }); Future future3 = executorService.submit(() -> { - try { + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); - metadataStream.close(); // Calculate absolute path String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path metadataPidExpectedPath = @@ -1326,14 +1303,14 @@ public void retrieveObject() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - // Retrieve object - InputStream objectCidInputStream = fileHashStore.retrieveObject(pid); - assertNotNull(objectCidInputStream); - objectCidInputStream.close(); + // Retrieve object + try (InputStream objectCidInputStream = fileHashStore.retrieveObject(pid)) { + assertNotNull(objectCidInputStream); + } + } } } @@ -1390,43 +1367,38 @@ public void retrieveObject_verifyContent() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } + // Retrieve object - InputStream objectCidInputStream; - try { - objectCidInputStream = fileHashStore.retrieveObject(pid); + try (InputStream objectCidInputStream = fileHashStore.retrieveObject(pid)) { + // Read content and compare it to the SHA-256 checksum from TestDataHarness + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = objectCidInputStream.read(buffer)) != -1) { + sha256.update(buffer, 0, bytesRead); + } + + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; + + } + + // Get hex digest + String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + assertEquals(sha256Digest, sha256DigestFromTestData); } catch (Exception e) { e.printStackTrace(); throw e; } - - // Read content and compare it to the SHA-256 checksum from TestDataHarness - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = objectCidInputStream.read(buffer)) != -1) { - sha256.update(buffer, 0, bytesRead); - } - - } catch (IOException ioe) { - ioe.printStackTrace(); - throw ioe; - - } finally { - // Close stream - objectCidInputStream.close(); - } - - // Get hex digest - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - assertEquals(sha256Digest, sha256DigestFromTestData); } } @@ -1441,14 +1413,17 @@ public void retrieveMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + } String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, storeFormatId); - assertNotNull(metadataCidInputStream); + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, + storeFormatId)) { + assertNotNull(metadataCidInputStream); + } + } } @@ -1463,12 +1438,13 @@ public void retrieveMetadata_overload() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + } - InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid); - assertNotNull(metadataCidInputStream); + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid)) { + assertNotNull(metadataCidInputStream); + } } } @@ -1566,48 +1542,42 @@ public void retrieveMetadata_verifyContent() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + } String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); // Retrieve object - InputStream metadataCidInputStream; - try { - metadataCidInputStream = fileHashStore.retrieveMetadata(pid, storeFormatId); + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, storeFormatId)) { + // Read content and compare it to the SHA-256 checksum from TestDataHarness + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = metadataCidInputStream.read(buffer)) != -1) { + sha256.update(buffer, 0, bytesRead); + } + + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; + + } + + // Get hex digest + String sha256MetadataDigest = DatatypeConverter.printHexBinary(sha256.digest()) + .toLowerCase(); + String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( + "metadata_cid_sha256" + ); + assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); } catch (Exception e) { e.printStackTrace(); throw e; } - - // Read content and compare it to the SHA-256 checksum from TestDataHarness - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = metadataCidInputStream.read(buffer)) != -1) { - sha256.update(buffer, 0, bytesRead); - } - - } catch (IOException ioe) { - ioe.printStackTrace(); - throw ioe; - - } finally { - // Close stream - metadataCidInputStream.close(); - } - - // Get hex digest - String sha256MetadataDigest = DatatypeConverter.printHexBinary(sha256.digest()) - .toLowerCase(); - String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_cid_sha256" - ); - assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); } } @@ -1620,36 +1590,36 @@ public void deleteObject_dataObjAndMetadataDocs() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Get metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPathString = fileHashStore.storeMetadata( - metadataStream, pid, testFormatId - ); - metadataStream.close(); - InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); - String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStreamTwo, pid); - Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path metadataPath = Paths.get(metadataPathString); - Path metadataDefaultPath = Paths.get(metadataDefaultPathString); - metadataStreamTwo.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile)) { + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPathString = fileHashStore.storeMetadata( + metadataStream, pid, testFormatId + ); - // Confirm expected documents exist - assertTrue(Files.exists(metadataPath)); - assertTrue(Files.exists(metadataDefaultPath)); - assertTrue(Files.exists(objCidAbsPath)); + String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStreamTwo, pid); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path metadataPath = Paths.get(metadataPathString); + Path metadataDefaultPath = Paths.get(metadataDefaultPathString); - fileHashStore.deleteObject(pid); + // Confirm expected documents exist + assertTrue(Files.exists(metadataPath)); + assertTrue(Files.exists(metadataDefaultPath)); + assertTrue(Files.exists(objCidAbsPath)); - // Check documents have been deleted - assertFalse(Files.exists(metadataPath)); - assertFalse(Files.exists(metadataDefaultPath)); - assertFalse(Files.exists(objCidAbsPath)); + fileHashStore.deleteObject(pid); + + // Check documents have been deleted + assertFalse(Files.exists(metadataPath)); + assertFalse(Files.exists(metadataDefaultPath)); + assertFalse(Files.exists(objCidAbsPath)); + } } } @@ -1664,9 +1634,9 @@ public void deleteObject_stringPidNoMetadataDocs() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Get metadata file Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -1691,9 +1661,9 @@ public void deleteObject_objectDeleted() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); fileHashStore.deleteObject(pid); @@ -1720,19 +1690,19 @@ public void deleteObject_referencesDeleted() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - String cid = objInfo.getCid(); - - // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.deleteObject(pid); - assertFalse(Files.exists(absPathPidRefsPath)); - assertFalse(Files.exists(absPathCidRefsPath)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + + // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.deleteObject(pid); + assertFalse(Files.exists(absPathPidRefsPath)); + assertFalse(Files.exists(absPathCidRefsPath)); + } } } @@ -1747,23 +1717,23 @@ public void deleteObject_cidRefsFileNotEmptyObjectExistsStill() throws Exception String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - String pidExtra = "dou.test" + pid; - String cid = objInfo.getCid(); - fileHashStore.tagObject(pidExtra, cid); - - Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.deleteObject(pid); - - assertFalse(Files.exists(absPathPidRefsPath)); - assertTrue(Files.exists(objCidAbsPath)); - assertTrue(Files.exists(absPathCidRefsPath)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String pidExtra = "dou.test" + pid; + String cid = objInfo.getCid(); + fileHashStore.tagObject(pidExtra, cid); + + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.deleteObject(pid); + + assertFalse(Files.exists(absPathPidRefsPath)); + assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(absPathCidRefsPath)); + } } } @@ -1777,32 +1747,32 @@ public void deleteObject_pidOrphan() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - String cid = objInfo.getCid(); - String pidExtra = "dou.test" + pid; - Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); - - // Manually change the pid found in the cid refs file - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, "add"); - // Create an orphaned pid refs file - fileHashStore.updateRefsFile(pid, absPathCidRefsPath, "remove"); - - fileHashStore.deleteObject(pid); - - // Confirm cid refs file still exists - assertTrue(Files.exists(absPathCidRefsPath)); - // Confirm the original (and now orphaned) pid refs file is deleted - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - assertFalse(Files.exists(absPathPidRefsPath)); - // Confirm the object has not been deleted - assertTrue(Files.exists(objRealPath)); - // Confirm the cid refs file still exists - assertTrue(Files.exists(absPathCidRefsPath)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); + String pidExtra = "dou.test" + pid; + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + + // Manually change the pid found in the cid refs file + Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, "add"); + // Create an orphaned pid refs file + fileHashStore.updateRefsFile(pid, absPathCidRefsPath, "remove"); + + fileHashStore.deleteObject(pid); + + // Confirm cid refs file still exists + assertTrue(Files.exists(absPathCidRefsPath)); + // Confirm the original (and now orphaned) pid refs file is deleted + Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + assertFalse(Files.exists(absPathPidRefsPath)); + // Confirm the object has not been deleted + assertTrue(Files.exists(objRealPath)); + // Confirm the cid refs file still exists + assertTrue(Files.exists(absPathCidRefsPath)); + } } } @@ -1871,22 +1841,22 @@ public void deleteObjectByCid() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - dataStream.close(); - String cid = objInfo.getCid(); - - fileHashStore.deleteObjectByCid(cid); - - // Get permanent address of the actual cid - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertFalse(Files.exists(objectStoreDirectory)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.getCid(); + + fileHashStore.deleteObjectByCid(cid); + + // Get permanent address of the actual cid + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String actualCid = objInfo.getCid(); + String cidShardString = FileHashStoreUtility.getHierarchicalPathString( + storeDepth, storeWidth, actualCid + ); + Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); + assertFalse(Files.exists(objectStoreDirectory)); + } } } @@ -1900,21 +1870,21 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - String cid = objInfo.getCid(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + String cid = objInfo.getCid(); - fileHashStore.deleteObjectByCid(cid); + fileHashStore.deleteObjectByCid(cid); - // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); - assertTrue(Files.exists(objRealPath)); - // Confirm cid refs file still exists - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - assertTrue(Files.exists(cidRefsPath)); + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objRealPath)); + // Confirm cid refs file still exists + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + assertTrue(Files.exists(cidRefsPath)); + } } } @@ -1982,24 +1952,24 @@ public void deleteMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - fileHashStore.deleteMetadata(pid, storeFormatId); + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + fileHashStore.deleteMetadata(pid, storeFormatId); - // Check that file doesn't exist - Path metadataCidPath = fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); - assertFalse(Files.exists(metadataCidPath)); + // Check that file doesn't exist + Path metadataCidPath = fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); + assertFalse(Files.exists(metadataCidPath)); - // Check that parent directories are not deleted - assertTrue(Files.exists(metadataCidPath.getParent())); + // Check that parent directories are not deleted + assertTrue(Files.exists(metadataCidPath.getParent())); - // Check that metadata directory still exists - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path storeObjectPath = storePath.resolve("metadata"); - assertTrue(Files.exists(storeObjectPath)); + // Check that metadata directory still exists + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + Path storeObjectPath = storePath.resolve("metadata"); + assertTrue(Files.exists(storeObjectPath)); + } } } @@ -2016,16 +1986,13 @@ public void deleteMetadata_overload() throws Exception { String formatIdTwo = "ns.type.2"; String formatIdThree = "ns.type.3"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); - metadataStream.close(); - InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStreamTwo, pid, formatIdTwo); - metadataStreamTwo.close(); - InputStream metadataStreamThree = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStreamThree, pid, formatIdThree); - metadataStreamThree.close(); - + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamThree = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + fileHashStore.storeMetadata(metadataStreamTwo, pid, formatIdTwo); + fileHashStore.storeMetadata(metadataStreamThree, pid, formatIdThree); + } fileHashStore.deleteMetadata(pid); @@ -2135,18 +2102,18 @@ public void getHexDigest() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - dataStream.close(); - - // Then get the checksum - String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); - String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - String objSha256Checksum = objInfo.getHexDigests().get("SHA-256"); - assertEquals(pidHexDigest, sha256DigestFromTestData); - assertEquals(pidHexDigest, objSha256Checksum); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject( + dataStream, pid, null, null, null, -1 + ); + + // Then get the checksum + String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); + String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + String objSha256Checksum = objInfo.getHexDigests().get("SHA-256"); + assertEquals(pidHexDigest, sha256DigestFromTestData); + assertEquals(pidHexDigest, objSha256Checksum); + } } } @@ -2204,11 +2171,11 @@ public void getHexDigest_badAlgo() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - dataStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - fileHashStore.getHexDigest(pid, "BLAKE2S"); + fileHashStore.getHexDigest(pid, "BLAKE2S"); + } }); } } From dd9a2a7932603b9b763de8f1f767d29e3452070f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 08:41:58 -0700 Subject: [PATCH 467/553] Add new enum 'HashStoreRefUpdateTypes' and refactor affected code in FileHashStore (related to updateRefsFile) --- .../filehashstore/FileHashStore.java | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index db8d7a88..c9eed4f2 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -112,6 +112,21 @@ enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } + enum HashStoreRefUpdateTypes { + + add("add"), remove("remove"); + + final String refUpdateType; + + HashStoreRefUpdateTypes(String updateType) { + refUpdateType = updateType; + } + + public String getName() { + return refUpdateType; + } + } + /** * Constructor to initialize FileHashStore, properties are required. FileHashStore is not * responsible for ensuring that the given store path is accurate. Upon initialization, if @@ -740,7 +755,7 @@ public void deleteObject(String pid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); // Begin deletion process - updateRefsFile(pid, absCidRefsPath, "remove"); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); if (Files.size(absCidRefsPath) == 0) { Path objRealPath = getHashStoreDataObjectPath(pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); @@ -785,7 +800,7 @@ public void deleteObject(String pid) Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); - updateRefsFile(pid, absCidRefsPath, "remove"); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } @@ -1689,7 +1704,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, "add"); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add.getName()); } // Get the pid refs file and verify tagging process File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); @@ -1753,7 +1768,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); // Begin deletion process - updateRefsFile(pid, absCidRefsPath, "remove"); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { @@ -1793,7 +1808,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); - updateRefsFile(pid, absCidRefsPath, "remove"); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } @@ -1826,7 +1841,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, "remove"); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); String errMsg = "Pid refs file not found, removed pid found in cid refs file: " + absCidRefsPath; logFileHashStore.warn(errMsg); @@ -1943,7 +1958,7 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce * * @param ref Authority-based or persistent identifier * @param absRefsPath Path to the refs file to update - * @param updateType "add" or "remove" + * @param updateType enum HashStoreRefUpdateType (add, remove) * @throws IOException Issue with updating or accessing a refs file */ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) @@ -1960,7 +1975,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) ); FileLock ignored = channel.lock()) { Collection lines = new ArrayList<>(Files.readAllLines(absRefsPath)); - if (updateType.equals("add")) { + if (updateType.equals(HashStoreRefUpdateTypes.add.getName())) { lines.add(ref); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); @@ -1968,7 +1983,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) "Ref: " + ref + " has been added to refs file: " + absRefsPath); } - if (updateType.equals("remove")) { + if (updateType.equals(HashStoreRefUpdateTypes.remove.getName())) { lines.remove(ref); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); From 56afebc53656af4825fbd1ca00c62545de40a415 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 09:52:35 -0700 Subject: [PATCH 468/553] Refactor 'updateRefsFile' and 'getHashStoreRefsPath' to require enum parameter in signature, and refactor affected code and junit tests --- .../filehashstore/FileHashStore.java | 100 +++++++------- .../FileHashStoreInterfaceTest.java | 72 ++++++---- .../FileHashStoreProtectedTest.java | 128 ++++++++++-------- 3 files changed, 172 insertions(+), 128 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c9eed4f2..cb7e186c 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -751,11 +751,11 @@ public void deleteObject(String pid) try { // Proceed with comprehensive deletion - cid exists, nothing out of place - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { Path objRealPath = getHashStoreDataObjectPath(pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); @@ -779,7 +779,7 @@ public void deleteObject(String pid) } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file and related metadata documents - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); @@ -791,7 +791,7 @@ public void deleteObject(String pid) } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when the pid and cid refs file exists, // but the actual object being referenced by the pid does not exist - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); try { @@ -799,8 +799,8 @@ public void deleteObject(String pid) synchronizeObjectLockedCids(cidRead); Path absCidRefsPath = - getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); + getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } @@ -820,7 +820,7 @@ public void deleteObject(String pid) } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file. - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); FileHashStoreUtility.deleteListItems(deleteList); deleteMetadata(pid); @@ -1084,11 +1084,11 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "findObject"); // Get path of the pid references file - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); if (Files.exists(absPidRefsPath)) { String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); // Throw exception if the cid refs file doesn't exist if (!Files.exists(absCidRefsPath)) { @@ -1621,7 +1621,7 @@ protected void deleteObjectByCid(String cid) throws IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug("Called to delete data object with cid: " + cid); // Get expected path of the cid refs file & permanent address of the actual cid - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); @@ -1657,8 +1657,8 @@ protected void deleteObjectByCid(String cid) */ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgorithmException, IOException { - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Confirm that reference files are where they are expected to be @@ -1686,9 +1686,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo return; } else { // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = getHashStoreRefsPath( - retrievedCid, HashStoreIdTypes.cid.getName() - ); + Path retrievedAbsCidRefsPath = + getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath )) { // This pid is accounted for and tagged as expected. @@ -1704,7 +1703,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add.getName()); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); } // Get the pid refs file and verify tagging process File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); @@ -1764,11 +1763,11 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, try { // Get paths to reference files to work on - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { @@ -1788,7 +1787,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); @@ -1799,7 +1798,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when the pid and cid refs file exists, // but the actual object being referenced by the pid does not exist - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); try { @@ -1807,8 +1806,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, synchronizeObjectLockedCids(cidRead); Path absCidRefsPath = - getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid.getName()); - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); + getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } @@ -1828,7 +1827,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // but the pid is not found in the cid refs file. // Rename pid refs file for deletion - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid.getName()); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); @@ -1839,9 +1838,9 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // `findObject` throws this exception if the pid refs file is not found // Check to see if pid is in the `cid refs file`and attempt to remove it Path absCidRefsPath = - getHashStoreRefsPath(cid, HashStoreIdTypes.cid.getName()); + getHashStoreRefsPath(cid, HashStoreIdTypes.cid); if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove.getName()); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); String errMsg = "Pid refs file not found, removed pid found in cid refs file: " + absCidRefsPath; logFileHashStore.warn(errMsg); @@ -1908,7 +1907,7 @@ protected void verifyHashStoreRefsFiles( * where it belongs otherwise it will be removed during garbage collection. * * @param ref Authority-based or persistent identifier to write - * @param refType Type of reference 'pid', 'cid' or 'sysmeta' + * @param refType Type of reference 'pid' or 'cid' * @throws IOException Failure to write refs file * @return File object with single reference */ @@ -1958,10 +1957,10 @@ protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOExce * * @param ref Authority-based or persistent identifier * @param absRefsPath Path to the refs file to update - * @param updateType enum HashStoreRefUpdateType (add, remove) + * @param updateType {@link HashStoreRefUpdateTypes} * @throws IOException Issue with updating or accessing a refs file */ - protected void updateRefsFile(String ref, Path absRefsPath, String updateType) + protected void updateRefsFile(String ref, Path absRefsPath, HashStoreRefUpdateTypes updateType) throws IOException { // This update process is atomic, so we first write the updated content // into a temporary file before overwriting it. @@ -1975,7 +1974,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) ); FileLock ignored = channel.lock()) { Collection lines = new ArrayList<>(Files.readAllLines(absRefsPath)); - if (updateType.equals(HashStoreRefUpdateTypes.add.getName())) { + if (updateType.equals(HashStoreRefUpdateTypes.add)) { lines.add(ref); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); @@ -1983,7 +1982,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, String updateType) "Ref: " + ref + " has been added to refs file: " + absRefsPath); } - if (updateType.equals(HashStoreRefUpdateTypes.remove.getName())) { + if (updateType.equals(HashStoreRefUpdateTypes.remove)) { lines.remove(ref); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); @@ -2182,25 +2181,32 @@ protected InputStream getHashStoreMetadataInputStream(String pid, String formatI * Get the absolute path to a HashStore pid or cid ref file * * @param abpcId Authority-based identifier, persistent identifier or content identifier - * @param refType "cid" or "pid + * @param refType {@link HashStoreIdTypes} * @return Path to the requested refs file * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported */ - protected Path getHashStoreRefsPath(String abpcId, String refType) + protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) throws NoSuchAlgorithmException { - Path realPath; - if (refType.equalsIgnoreCase(HashStoreIdTypes.pid.getName())) { - String hashedId = FileHashStoreUtility.getPidHexDigest(abpcId, OBJECT_STORE_ALGORITHM); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId - ); - realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); - } else if (refType.equalsIgnoreCase(HashStoreIdTypes.cid.getName())) { - String cidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, abpcId - ); - realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); - } else { + Path realPath = null; + + switch (refType) { + case pid -> { + String hashedId = + FileHashStoreUtility.getPidHexDigest(abpcId, OBJECT_STORE_ALGORITHM); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + hashedId); + realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); + } + case cid -> { + String cidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + abpcId); + realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); + realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); + } + } + if (realPath == null) { String errMsg = "formatId must be 'pid' or 'cid'"; logFileHashStore.error(errMsg); throw new IllegalArgumentException(errMsg); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index bc724d8c..898fc389 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -285,7 +285,8 @@ public void storeObject_overloadInputStreamOnly() throws Exception { assertThrows(FileNotFoundException.class, () -> fileHashStore.findObject(pid)); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertFalse(Files.exists(cidRefsFilePath)); } } @@ -462,7 +463,8 @@ public void storeObject_duplicate() throws Exception { ); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); } @@ -578,8 +580,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -597,8 +601,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -616,8 +622,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -635,8 +643,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -654,8 +664,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { if (objInfo != null) { String cid = objInfo.getCid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); @@ -707,7 +719,8 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { // Check cid refs file that every pid is found String cidSha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cidSha256DigestFromTestData, "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cidSha256DigestFromTestData, + FileHashStore.HashStoreIdTypes.cid); Collection stringSet = new HashSet<>(pidModifiedList); List lines = Files.readAllLines(cidRefsFilePath); boolean allFoundPidsFound = true; @@ -1697,8 +1710,10 @@ public void deleteObject_referencesDeleted() throws Exception { String cid = objInfo.getCid(); // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); fileHashStore.deleteObject(pid); assertFalse(Files.exists(absPathPidRefsPath)); assertFalse(Files.exists(absPathCidRefsPath)); @@ -1726,8 +1741,10 @@ public void deleteObject_cidRefsFileNotEmptyObjectExistsStill() throws Exception fileHashStore.tagObject(pidExtra, cid); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); fileHashStore.deleteObject(pid); assertFalse(Files.exists(absPathPidRefsPath)); @@ -1756,17 +1773,21 @@ public void deleteObject_pidOrphan() throws Exception { Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); // Manually change the pid found in the cid refs file - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, "add"); + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, + FileHashStore.HashStoreRefUpdateTypes.add); // Create an orphaned pid refs file - fileHashStore.updateRefsFile(pid, absPathCidRefsPath, "remove"); + fileHashStore.updateRefsFile( + pid, absPathCidRefsPath, FileHashStore.HashStoreRefUpdateTypes.remove); fileHashStore.deleteObject(pid); // Confirm cid refs file still exists assertTrue(Files.exists(absPathCidRefsPath)); // Confirm the original (and now orphaned) pid refs file is deleted - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertFalse(Files.exists(absPathPidRefsPath)); // Confirm the object has not been deleted assertTrue(Files.exists(objRealPath)); @@ -1824,8 +1845,10 @@ public void deleteObject_orphanRefsFiles() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path absPathCidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path absPathPidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); fileHashStore.deleteObject(pid); assertFalse(Files.exists(absPathCidRefsPath)); @@ -1882,7 +1905,8 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); assertTrue(Files.exists(objRealPath)); // Confirm cid refs file still exists - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(cidRefsPath)); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index f12e26b8..56774358 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -161,8 +161,9 @@ public void findObject_refsPaths() throws Exception { String cidRefsPath = objInfoMap.get("cid_refs_path"); String pidRefsPath = objInfoMap.get("pid_refs_path"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), "cid"); - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), FileHashStore.HashStoreIdTypes.cid); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, + FileHashStore.HashStoreIdTypes.pid); assertEquals(cidRefsPath, cidRefsFilePath.toString()); assertEquals(pidRefsPath, pidRefsFilePath.toString()); @@ -244,7 +245,7 @@ public void findObject_cidRefsFileNotFound() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Files.delete(cidRefsPath); assertThrows(OrphanPidRefsFileException.class, () -> fileHashStore.findObject(pid)); @@ -261,8 +262,8 @@ public void findObject_cidRefsFileMissingPid() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pid, cidRefsPath, "remove"); + Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile(pid, cidRefsPath, FileHashStore.HashStoreRefUpdateTypes.remove); assertThrows(PidNotFoundInCidRefsFileException.class, () -> fileHashStore.findObject(pid)); } @@ -1088,9 +1089,9 @@ public void storeHashStoreRefsFiles() throws Exception { // Confirm refs files exist Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertTrue(Files.exists(absCidRefsPath)); assertTrue(Files.exists(absPidRefsPath)); @@ -1116,7 +1117,8 @@ public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, + FileHashStore.HashStoreIdTypes.pid); assertTrue(Files.exists(pidRefsFilePath)); String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); @@ -1133,7 +1135,7 @@ public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(cidRefsFilePath)); String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); @@ -1197,7 +1199,7 @@ public void storeHashStoreRefsFiles_pidRefsOrphanedFile() // Create orphaned pid refs file Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); File pidRefsTmpFile = fileHashStore.writeRefsFile( cidForOrphanPidRef, FileHashStore.HashStoreIdTypes.pid.getName() ); @@ -1230,11 +1232,12 @@ public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); // Confirm missing pid refs file has been created - Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, "pid"); + Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, + FileHashStore.HashStoreIdTypes.pid); assertTrue(Files.exists(pidAdditionalRefsFilePath)); // Check cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( pidAdditional, cidRefsFilePath ); @@ -1264,9 +1267,9 @@ public void unTagObject() throws Exception { // Confirm refs files do not exist Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertFalse(Files.exists(absCidRefsPath)); assertFalse(Files.exists(absPidRefsPath)); } @@ -1291,9 +1294,9 @@ public void unTagObject_cidWithMultiplePidReferences() throws Exception { // Confirm refs files state Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertFalse(Files.exists(absPidRefsPath)); assertTrue(Files.exists(absCidRefsPath)); @@ -1320,7 +1323,7 @@ public void unTagObject_orphanPidRefsFile() throws Exception { // Delete cid refs file to create orphaned pid refs file Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Files.delete(absCidRefsPath); assertFalse(Files.exists(absCidRefsPath)); @@ -1328,7 +1331,7 @@ public void unTagObject_orphanPidRefsFile() throws Exception { // Confirm pid refs is deleted Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertFalse(Files.exists(absPidRefsPath)); // Confirm number of reference files @@ -1370,14 +1373,14 @@ public void unTagObject_missingPidRefsFile() throws Exception { // Delete pid refs to create scenario Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid.getName()); + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); Files.delete(absPidRefsPath); assertFalse(Files.exists(absPidRefsPath)); fileHashStore.unTagObject(pid, cid); Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid.getName()); + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertFalse(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); } @@ -1391,8 +1394,10 @@ public void verifyHashStoreRefFiles() throws Exception { fileHashStore.tagObject(pid, cid); // Create a pid refs file with the incorrect cid - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsFilePath); } @@ -1406,8 +1411,10 @@ public void verifyHashStoreRefFiles_fileNotFound() throws Exception { String cid = "abcdef123456789"; // Create a pid refs file with the incorrect cid - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertThrows(FileNotFoundException.class, () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, @@ -1429,7 +1436,7 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertThrows( CidNotFoundInPidRefsFileException.class, @@ -1452,7 +1459,8 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); // Get path of the pid refs file - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertThrows( PidNotFoundInCidRefsFileException.class, @@ -1505,7 +1513,8 @@ public void isStringInRefsFile_cidRefsMultipleVals() throws Exception { fileHashStore.tagObject("dou.test.2", cid); fileHashStore.tagObject("dou.test.3", cid); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(fileHashStore.isStringInRefsFile("dou.test.1", cidRefsFilePath)); assertFalse(fileHashStore.isStringInRefsFile("wont.be.found", cidRefsFilePath)); @@ -1521,10 +1530,12 @@ public void updateRefsFile_add() throws Exception { fileHashStore.tagObject(pid, cid); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); String pidAdditional = "dou.test.2"; - fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "add"); + fileHashStore.updateRefsFile( + pidAdditional, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.add); List lines = Files.readAllLines(cidRefsFilePath); boolean pidOriginal_foundInCidRefFiles = false; @@ -1552,8 +1563,10 @@ public void updateRefsFile_remove() throws Exception { String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); } @@ -1568,10 +1581,13 @@ public void updateRefsFile_removeMultiplePids() throws Exception { fileHashStore.tagObject(pid, cid); String pidAdditional = "dou.test.2"; fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); - fileHashStore.updateRefsFile(pidAdditional, cidRefsFilePath, "remove"); + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + fileHashStore.updateRefsFile( + pidAdditional, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); assertTrue(Files.exists(cidRefsFilePath)); assertEquals(0, Files.size(cidRefsFilePath)); @@ -1588,8 +1604,10 @@ public void updateRefsFile_cidRefsPidNotFound() throws Exception { fileHashStore.tagObject(pid, cid); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile("dou.test.2", cidRefsFilePath, "remove"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + "dou.test.2", cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); List lines = Files.readAllLines(cidRefsFilePath); boolean pidOriginal_foundInCidRefFiles = false; @@ -1615,8 +1633,10 @@ public void updateRefsFile_cidRefsEmpty() throws Exception { fileHashStore.tagObject(pid, cid); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, "cid"); - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); List lines = Files.readAllLines(cidRefsFilePath); boolean pidOriginal_foundInCidRefFiles = false; @@ -1631,7 +1651,7 @@ public void updateRefsFile_cidRefsEmpty() throws Exception { assertEquals(0, pidsFound); // Confirm that no exception is thrown and that the cid refs still exists - fileHashStore.updateRefsFile(pid, cidRefsFilePath, "remove"); + fileHashStore.updateRefsFile(pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); assertTrue(Files.exists(cidRefsFilePath)); } @@ -1896,7 +1916,8 @@ public void isStringInRefsFile_pidFound() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); } } @@ -1915,7 +1936,8 @@ public void isStringInRefsFile_pidNotFound() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); } } @@ -2052,7 +2074,8 @@ public void getHashStoreRefsPath_pid() throws Exception { Path calculatedPidRefsRealPath = storePath.resolve("refs/pids").resolve(metadataPidHashSharded); - Path expectedPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, "pid"); + Path expectedPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertEquals(expectedPidRefsPath, calculatedPidRefsRealPath); } @@ -2078,28 +2101,19 @@ public void getHashStoreRefsPath_cid() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // Cid refs file - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); - Path calculatedCidRefsRealPath = storePath.resolve("refs/cids").resolve(objShardString); + String objShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + Path calculatedCidRefsRealPath = + storePath.resolve("refs/cids").resolve(objShardString); - Path expectedCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, "cid"); + Path expectedCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertEquals(expectedCidRefsPath, calculatedCidRefsRealPath); } } } - /** - * Confirm getHashStoreRefsPath throws exception when requesting the path to a refs file with a - * formatId arg that is not "cid" or "pid" - */ - @Test - public void getHashStoreRefsPath_incorrectRefsType() { - assertThrows(IllegalArgumentException.class, () -> { - String cid = "testcid"; - fileHashStore.getHashStoreRefsPath(cid, "not_cid_or_pid"); - }); - } - /** * Confirm getHashStoreDataObjectPath throws exception when requesting path for an object * that does not exist From 38ebd81b762d20b3de9dcd9ce692b11a25cf2c42 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 09:56:00 -0700 Subject: [PATCH 469/553] Rename 'checkForEmptyAndValidString' method to 'checkForNotEmptyAndValidString' and update affected code --- .../filehashstore/FileHashStore.java | 66 +++++++++---------- .../filehashstore/FileHashStoreUtility.java | 8 +-- .../FileHashStoreProtectedTest.java | 8 +-- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index cb7e186c..b920dd93 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -254,7 +254,7 @@ protected void verifyHashStoreProperties( FileHashStoreUtility.ensureNotNull( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); @@ -429,15 +429,15 @@ public ObjectMetadata storeObject( // Validate input parameters FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "storeObject"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( additionalAlgorithm, "additionalAlgorithm", "storeObject"); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm", "storeObject"); validateAlgorithm(checksumAlgorithm); } @@ -553,8 +553,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "tagObject"); - FileHashStoreUtility.checkForEmptyAndValidString(cid, "cid", "tagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "tagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid", "tagObject"); try { synchronizeObjectLockedCids(cid); @@ -596,14 +596,14 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "storeMetadata"); // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "storeMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "storeMetadata"); checkedFormatId = formatId; } @@ -666,7 +666,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "retrieveObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "retrieveObject"); // Check to see if object exists Path objRealPath = getHashStoreDataObjectPath(pid); @@ -701,9 +701,9 @@ public InputStream retrieveMetadata(String pid, String formatId) "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "retrieveMetadata"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "retrieveMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "retrieveMetadata"); return getHashStoreMetadataInputStream(pid, formatId); } @@ -718,7 +718,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "retrieveMetadata"); return getHashStoreMetadataInputStream(pid, DEFAULT_METADATA_NAMESPACE); } @@ -730,7 +730,7 @@ public void deleteObject(String pid) logFileHashStore.debug("Deleting object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "id", "deleteObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "id", "deleteObject"); Collection deleteList = new ArrayList<>(); try { @@ -918,9 +918,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "deleteMetadata"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "deleteMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "deleteMetadata"); // Get the path to the metadata document and add it to a list Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); @@ -944,7 +944,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOExcept NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "deleteMetadata"); // Get the path to the pid metadata document directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); @@ -1026,7 +1026,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug("Calculating hex digest for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "getHexDigest"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "getHexDigest"); validateAlgorithm(algorithm); // Find the content identifier @@ -1081,7 +1081,7 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "findObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "findObject"); // Get path of the pid references file Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); @@ -1186,7 +1186,7 @@ protected ObjectMetadata putObject( boolean compareChecksum = verifyChecksumParameters(checksum, checksumAlgorithm); // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( additionalAlgorithm, "additionalAlgorithm", "putObject" ); validateAlgorithm(additionalAlgorithm); @@ -1346,7 +1346,7 @@ protected void validateTmpObject( protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "validateAlgorithm"); - FileHashStoreUtility.checkForEmptyAndValidString(algorithm, "algorithm", "validateAlgorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm", "validateAlgorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { @@ -1368,7 +1368,7 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio */ protected boolean shouldCalculateAlgorithm(String algorithm) { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "shouldCalculateAlgorithm"); - FileHashStoreUtility.checkForEmptyAndValidString(algorithm, "algorithm", "shouldCalculateAlgorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm", "shouldCalculateAlgorithm"); boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { if (algorithm.equals(defAlgo.getName())) { @@ -1391,12 +1391,12 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor throws NoSuchAlgorithmException { // First ensure algorithm is compatible and values are valid if they aren't null if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters"); validateAlgorithm(checksumAlgorithm); } if (checksum != null) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( checksum, "checksum", "verifyChecksumParameters"); } // If checksum is supplied, checksumAlgorithm cannot be empty @@ -1404,7 +1404,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" ); - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( checksumAlgorithm, "algorithm", "verifyChecksumParameters" ); } @@ -1417,7 +1417,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksum, "checksum", "verifyChecksumParameters" ); - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( checksum, "checksum", "verifyChecksumParameters" ); } @@ -1448,7 +1448,7 @@ protected Map writeToTmpFileAndGenerateChecksums( // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(additionalAlgorithm); @@ -1456,7 +1456,7 @@ protected Map writeToTmpFileAndGenerateChecksums( } boolean generateCsAlgo = false; if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { - FileHashStoreUtility.checkForEmptyAndValidString( + FileHashStoreUtility.checkForNotEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" ); validateAlgorithm(checksumAlgorithm); @@ -1561,7 +1561,7 @@ protected void move(File source, File target, String entity) throws IOException, "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); - FileHashStoreUtility.checkForEmptyAndValidString(entity, "entity", "move"); + FileHashStoreUtility.checkForNotEmptyAndValidString(entity, "entity", "move"); if (entity.equals("object") && target.exists()) { String errMsg = "File already exists for target: " + target; logFileHashStore.warn(errMsg); @@ -1744,9 +1744,9 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, NoSuchAlgorithmException, IOException { // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "unTagObject"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "unTagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "unTagObject"); FileHashStoreUtility.ensureNotNull(cid, "cid", "unTagObject"); - FileHashStoreUtility.checkForEmptyAndValidString(cid, "cid", "unTagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid", "unTagObject"); Collection deleteList = new ArrayList<>(); @@ -1907,7 +1907,7 @@ protected void verifyHashStoreRefsFiles( * where it belongs otherwise it will be removed during garbage collection. * * @param ref Authority-based or persistent identifier to write - * @param refType Type of reference 'pid' or 'cid' + * @param refType Type of reference 'pid' or 'cid' to include in the log * @throws IOException Failure to write refs file * @return File object with single reference */ @@ -2017,14 +2017,14 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "putMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "putMetadata"); // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForEmptyAndValidString(formatId, "formatId", "putMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "putMetadata"); checkedFormatId = formatId; } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 07323ea1..e25b47a8 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -87,9 +87,9 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm public static String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "getPidHexDigest"); FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyAndValidString(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm", "getPidHexDigest"); MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); byte[] bytes = pid.getBytes(StandardCharsets.UTF_8); @@ -211,9 +211,9 @@ public static void deleteListItems(Collection deleteList) { * @param method Calling method * @throws IllegalArgumentException If the string is empty or contains illegal characters */ - public static void checkForEmptyAndValidString(String string, String argument, String method) + public static void checkForNotEmptyAndValidString(String string, String argument, String method) throws IllegalArgumentException { - ensureNotNull(string, "string", "checkForEmptyAndValidString"); + ensureNotNull(string, "string", "checkForNotEmptyAndValidString"); if (string.trim().isEmpty()) { String errMsg = "Calling Method: " + method + "(): " + argument + " cannot be empty."; throw new IllegalArgumentException(errMsg); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 56774358..a4c46a78 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2134,8 +2134,8 @@ public void getHashStoreDataObjectPath_fileNotFound() { public void fileHashStoreUtility_checkForEmptyAndValidString() { assertThrows( IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForEmptyAndValidString("dou.test.1\n", "pid", - "storeObject")); + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("dou.test.1\n", "pid", + "storeObject")); } /** @@ -2146,8 +2146,8 @@ public void fileHashStoreUtility_checkForEmptyAndValidString() { public void fileHashStoreUtility_checkForEmptyAndValidString_newLine() { assertThrows( IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForEmptyAndValidString("\n", "pid", - "storeObject")); + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("\n", "pid", + "storeObject")); } /** From d1d25592255ca8b90b00a746db4784ad70a0e4b9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 09:57:12 -0700 Subject: [PATCH 470/553] Apply autoformatting to 'syncPutObject' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b920dd93..0b361d07 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -457,8 +457,8 @@ public ObjectMetadata storeObject( */ private ObjectMetadata syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, PidRefsFileExistsException, IOException, RuntimeException, + String checksumAlgorithm, long objSize) + throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException { try { // Lock pid for thread safety, transaction control and atomic writing From da143f3a147b15a31076308d6b5f83c38bd1df32 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 09:58:17 -0700 Subject: [PATCH 471/553] Apply autoformatting to 'deleteMetadata' --- .../hashstore/filehashstore/FileHashStore.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 0b361d07..432e1de2 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -940,17 +940,18 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx * Overload method for deleteMetadata with default metadata namespace */ @Override - public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException { + public void deleteMetadata(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException { logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "deleteMetadata"); // Get the path to the pid metadata document directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidHexDigest - ); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + pidHexDigest); Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); // Add all metadata docs found in the metadata doc directory to a list to iterate over List metadataDocPaths = From 53092434690265e9580379e593afb1beef2bd290 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 09:59:19 -0700 Subject: [PATCH 472/553] Apply autoformatting to 'findObject' --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 432e1de2..868e2ebf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1077,9 +1077,9 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the * expected pid is not found in the cid refs file. */ - protected Map findObject(String pid) throws NoSuchAlgorithmException, - IOException, - OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { + protected Map findObject(String pid) + throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, + PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "findObject"); @@ -1102,9 +1102,9 @@ protected Map findObject(String pid) throws NoSuchAlgorithmExcep if (isStringInRefsFile(pid, absCidRefsPath)) { logFileHashStore.info("cid (" + cid + ") found for pid: " + pid); - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + cid); Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); if (Files.exists(realPath)) { Map objInfoMap = new HashMap<>(); From d842a595b2406742ba4bd65ed7d6fd493403b13f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 10:00:05 -0700 Subject: [PATCH 473/553] Apply autoformatting to 'putObject' --- .../filehashstore/FileHashStore.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 868e2ebf..34017986 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1178,8 +1178,8 @@ protected Map findObject(String pid) */ protected ObjectMetadata putObject( InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, + String checksumAlgorithm, long objSize) + throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, PidRefsFileExistsException, IllegalArgumentException, NullPointerException, AtomicMoveNotSupportedException, InterruptedException { logFileHashStore.debug("Begin writing data object for pid: " + pid); @@ -1188,8 +1188,7 @@ protected ObjectMetadata putObject( // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForNotEmptyAndValidString( - additionalAlgorithm, "additionalAlgorithm", "putObject" - ); + additionalAlgorithm, "additionalAlgorithm", "putObject"); validateAlgorithm(additionalAlgorithm); } if (objSize != -1) { @@ -1200,15 +1199,14 @@ protected ObjectMetadata putObject( File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); Map hexDigests; try { - hexDigests = writeToTmpFileAndGenerateChecksums( - tmpFile, object, additionalAlgorithm, checksumAlgorithm - ); + hexDigests = writeToTmpFileAndGenerateChecksums(tmpFile, object, additionalAlgorithm, + checksumAlgorithm); } catch (Exception ge) { // If the process to write to the tmpFile is interrupted for any reason, // we will delete the tmpFile. Files.delete(tmpFile.toPath()); - String errMsg = - "Unexpected Exception while storing object for pid: " + pid + ". " + ge.getMessage(); + String errMsg = "Unexpected Exception while storing object for pid: " + pid + ". " + + ge.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -1219,9 +1217,9 @@ protected ObjectMetadata putObject( // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objectCid); Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); try { @@ -1242,8 +1240,8 @@ protected ObjectMetadata putObject( } } catch (Exception e) { String errMsg = - "Unexpected exception when moving object with cid: " + objectCid + " for pid:" - + pid + ". Additional Details: " + e.getMessage(); + "Unexpected exception when moving object with cid: " + objectCid + " for pid:" + pid + + ". Additional Details: " + e.getMessage(); logFileHashStore.error(errMsg); throw e; } finally { From 01811074254208e7402d02ca70823e0398900d58 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 10:01:35 -0700 Subject: [PATCH 474/553] Apply autoformatting to 'getHashStoreMetadataPath' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 34017986..057c3aaf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2126,16 +2126,14 @@ protected Path getHashStoreMetadataPath(String abpId, String formatId) throws NoSuchAlgorithmException { // Get the pid metadata directory String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); - String pidMetadataDirRelPath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId - ); + String pidMetadataDirRelPath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + hashedId); // The file name for the metadata document is the hash of the supplied 'pid + 'formatId' String metadataDocHash = FileHashStoreUtility.getPidHexDigest(abpId + formatId, OBJECT_STORE_ALGORITHM); // Real path to metadata doc - return METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve( - metadataDocHash - ); + return METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve(metadataDocHash); } /** From b3d0ea1f38e49eec65477a8560ea08f795d89d0f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 10:08:20 -0700 Subject: [PATCH 475/553] Remove redundant code in 'getHashStoreRefsPath' method --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 057c3aaf..532a1696 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2200,14 +2200,8 @@ protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, abpcId); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); - realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } } - if (realPath == null) { - String errMsg = "formatId must be 'pid' or 'cid'"; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } return realPath; } From b88430373e1959905986e7a58ee1dfb58eb697e3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 10:22:00 -0700 Subject: [PATCH 476/553] Add default case to 'getHashStoreRefsPath' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 532a1696..858089b2 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2201,6 +2201,9 @@ protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) abpcId); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } + default -> + throw new IllegalArgumentException("Ref type must be a type of HashStoreIdTypes " + + "(pid or cid)"); } return realPath; } From 906ea735ee41591467a4a25ed8874d522a9f2a47 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 14:43:35 -0700 Subject: [PATCH 477/553] Refactor 'ObjectMetadata' to be a record instead of a custom class, and revise all junit tests and affected code --- .../dataone/hashstore/HashStoreClient.java | 2 +- .../org/dataone/hashstore/ObjectMetadata.java | 130 +++++++++--------- .../filehashstore/FileHashStore.java | 17 ++- .../org/dataone/hashstore/HashStoreTest.java | 2 +- .../dataone/hashstore/ObjectMetadataTest.java | 13 +- .../FileHashStoreInterfaceTest.java | 52 +++---- .../FileHashStoreProtectedTest.java | 30 ++-- 7 files changed, 124 insertions(+), 122 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index b62cd0c5..b5c0aadd 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -173,7 +173,7 @@ public static void main(String[] args) throws Exception { ); pidObjStream.close(); System.out.println("Object Info for pid (" + pid + "):"); - System.out.println(objInfo.getHexDigests()); + System.out.println(objInfo.hexDigests()); } else if (cmd.hasOption("storemetadata")) { String pid = cmd.getOptionValue("pid"); diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 8ae7fdf2..39a1af96 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -9,68 +9,72 @@ * client code can easily obtain metadata of a store object in HashStore without needing to know the * underlying file system details. */ -public class ObjectMetadata { - private String pid; - private final String cid; - private final long size; - private final Map hexDigests; +public record ObjectMetadata(String pid, String cid, long size, Map hexDigests) { - /** - * Creates a new instance of ObjectMetadata with the given properties. - * - * @param pid Authority based or persistent identifer, null by default - * @param cid Unique identifier for the file - * @param size Size of stored file - * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the - * file - */ - public ObjectMetadata(String pid, String cid, long size, Map hexDigests) { - this.pid = pid; - this.cid = cid; - this.size = size; - this.hexDigests = hexDigests; - } - - /** - * Get the persistent identifier - * - * @return pid - */ - public String getPid() { - return pid; - } - - /** - * Set the persistent identifier - */ - public void setPid(String pid) { - this.pid = pid; - } - - /** - * Return the cid (content identifier) - * - * @return cid - */ - public String getCid() { - return cid; - } - - /** - * Return the size - * - * @return size - */ - public long getSize() { - return size; - } - - /** - * Return a map of hex digests (checksums) - * - * @return hexDigests - */ - public Map getHexDigests() { - return hexDigests; - } } +// TODO Delete unused code +//public class ObjectMetadata { +// private String pid; +// private final String cid; +// private final long size; +// private final Map hexDigests; +// +// /** +// * Creates a new instance of ObjectMetadata with the given properties. +// * +// * @param pid Authority based or persistent identifer, null by default +// * @param cid Unique identifier for the file +// * @param size Size of stored file +// * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the +// * file +// */ +// public ObjectMetadata(String pid, String cid, long size, Map hexDigests) { +// this.pid = pid; +// this.cid = cid; +// this.size = size; +// this.hexDigests = hexDigests; +// } +// +// /** +// * Get the persistent identifier +// * +// * @return pid +// */ +// public String getPid() { +// return pid; +// } +// +// /** +// * Set the persistent identifier +// */ +// public void setPid(String pid) { +// this.pid = pid; +// } +// +// /** +// * Return the cid (content identifier) +// * +// * @return cid +// */ +// public String getCid() { +// return cid; +// } +// +// /** +// * Return the size +// * +// * @return size +// */ +// public long getSize() { +// return size; +// } +// +// /** +// * Return a map of hex digests (checksums) +// * +// * @return hexDigests +// */ +// public Map getHexDigests() { +// return hexDigests; +// } +//} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 858089b2..3f5609ea 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -483,9 +483,8 @@ private ObjectMetadata syncPutObject( object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize ); // Tag object - String cid = objInfo.getCid(); + String cid = objInfo.cid(); tagObject(pid, cid); - objInfo.setPid(pid); logFileHashStore.info( "Object stored for pid: " + pid + " at " + getHashStoreDataObjectPath(pid)); return objInfo; @@ -841,21 +840,21 @@ public void deleteIfInvalidObject( throws NonMatchingObjSizeException, NonMatchingChecksumException, UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, IOException { - logFileHashStore.debug("Verifying data object for cid: " + objectInfo.getCid()); + logFileHashStore.debug("Verifying data object for cid: " + objectInfo.cid()); // Validate input parameters FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "deleteInvalidObject"); FileHashStoreUtility.ensureNotNull( - objectInfo.getHexDigests(), "objectInfo.getHexDigests()", "deleteInvalidObject"); - if (objectInfo.getHexDigests().isEmpty()) { + objectInfo.hexDigests(), "objectInfo.getHexDigests()", "deleteInvalidObject"); + if (objectInfo.hexDigests().isEmpty()) { throw new MissingHexDigestsException("Missing hexDigests in supplied ObjectMetadata"); } FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteInvalidObject"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteInvalidObject"); FileHashStoreUtility.checkPositive(objSize, "deleteInvalidObject"); - String objCid = objectInfo.getCid(); - long objInfoRetrievedSize = objectInfo.getSize(); - Map hexDigests = objectInfo.getHexDigests(); + String objCid = objectInfo.cid(); + long objInfoRetrievedSize = objectInfo.size(); + Map hexDigests = objectInfo.hexDigests(); String digestFromHexDigests = hexDigests.get(checksumAlgorithm); // Confirm that requested checksum to verify against is available @@ -2184,7 +2183,7 @@ protected InputStream getHashStoreMetadataInputStream(String pid, String formatI */ protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) throws NoSuchAlgorithmException { - Path realPath = null; + Path realPath; switch (refType) { case pid -> { diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index fbb3249e..caad3b85 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -131,7 +131,7 @@ public void hashStore_storeObjects() throws Exception { // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, objInfo.getCid()); + assertEquals(objContentId, objInfo.cid()); } } } diff --git a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index e946107b..1f4614c6 100644 --- a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -56,7 +56,7 @@ public void testObjectMetadata() { @Test public void testObjectMetadataGetPid() { ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); - String pid = objInfo.getPid(); + String pid = objInfo.pid(); assertNull(pid); } @@ -65,11 +65,10 @@ public void testObjectMetadataGetPid() { */ @Test public void testObjectMetadataSetPid() { - ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); String pidToSet = "dou.test.1"; - objInfo.setPid(pidToSet); + ObjectMetadata objInfo = new ObjectMetadata(pidToSet, id, size, hexDigests); - String pidFromObjectMetadata = objInfo.getPid(); + String pidFromObjectMetadata = objInfo.pid(); assertEquals(pidFromObjectMetadata, pidToSet); } @@ -79,7 +78,7 @@ public void testObjectMetadataSetPid() { @Test public void testObjectMetadataGetId() { ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); - String objId = objInfo.getCid(); + String objId = objInfo.cid(); assertEquals(objId, id); } @@ -89,7 +88,7 @@ public void testObjectMetadataGetId() { @Test public void testHashAddressGetSize() { ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); - long objSize = objInfo.getSize(); + long objSize = objInfo.size(); assertEquals(objSize, size); } @@ -99,7 +98,7 @@ public void testHashAddressGetSize() { @Test public void testObjectMetadataGetHexDigests() { ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); - Map objInfoMap = objInfo.getHexDigests(); + Map objInfoMap = objInfo.hexDigests(); assertEquals(objInfoMap, hexDigests); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 898fc389..c7778a09 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -108,8 +108,8 @@ public void storeObject() throws Exception { // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); - assertEquals(objectCid, objInfo.getCid()); - assertEquals(pid, objInfo.getPid()); + assertEquals(objectCid, objInfo.cid()); + assertEquals(pid, objInfo.pid()); } } } @@ -130,7 +130,7 @@ public void storeObject_objSize() throws Exception { // Check the object size long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + assertEquals(objectSize, objInfo.size()); } @@ -151,7 +151,7 @@ public void storeObject_hexDigests() throws Exception { dataStream, pid, null, null, null, -1 ); - Map hexDigests = objInfo.getHexDigests(); + Map hexDigests = objInfo.hexDigests(); // Validate checksum values String md5 = testData.pidData.get(pid).get("md5"); @@ -277,9 +277,9 @@ public void storeObject_overloadInputStreamOnly() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - Map hexDigests = objInfo.getHexDigests(); + Map hexDigests = objInfo.hexDigests(); String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); @@ -399,7 +399,7 @@ public void storeObject_objSizeCorrect() throws Exception { ); // Check id (sha-256 hex digest of the ab_id (pid)) - assertEquals(objectSize, objInfo.getSize()); + assertEquals(objectSize, objInfo.size()); } } } @@ -421,7 +421,7 @@ public void storeObject_objSizeIncorrect() { // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + assertEquals(objectSize, objInfo.size()); } }); } @@ -462,7 +462,7 @@ public void storeObject_duplicate() throws Exception { dataStreamDup, pidTwo, null, null, null, -1 ); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); @@ -578,7 +578,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); @@ -599,7 +599,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); @@ -620,7 +620,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); @@ -641,7 +641,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); @@ -662,7 +662,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { dataStream, pid, null, null, null, -1 ); if (objInfo != null) { - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path pidRefsPath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); @@ -847,7 +847,7 @@ public void deleteIfInvalidObject_correctValues() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() + storeDepth, storeWidth, objInfo.cid() ); // Real path to the data object assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( @@ -911,7 +911,7 @@ public void deleteIfInvalidObject_supportedAlgoNotInDefaultList() throws Excepti int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() + storeDepth, storeWidth, objInfo.cid() ); // Real path to the data object assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( @@ -941,7 +941,7 @@ public void deleteIfInvalidObject_unsupportedAlgo() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() + storeDepth, storeWidth, objInfo.cid() ); // Real path to the data object assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( @@ -977,7 +977,7 @@ public void deleteIfInvalidObject_mismatchedSize() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() + storeDepth, storeWidth, objInfo.cid() ); // Real path to the data object assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( @@ -1013,7 +1013,7 @@ public void deleteIfInvalidObject_mismatchedChecksum() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() + storeDepth, storeWidth, objInfo.cid() ); // Real path to the data object assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( @@ -1707,7 +1707,7 @@ public void deleteObject_referencesDeleted() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); Path absPathPidRefsPath = @@ -1737,7 +1737,7 @@ public void deleteObject_cidRefsFileNotEmptyObjectExistsStill() throws Exception dataStream, pid, null, null, null, -1 ); String pidExtra = "dou.test" + pid; - String cid = objInfo.getCid(); + String cid = objInfo.cid(); fileHashStore.tagObject(pidExtra, cid); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -1768,7 +1768,7 @@ public void deleteObject_pidOrphan() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); String pidExtra = "dou.test" + pid; Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -1866,14 +1866,14 @@ public void deleteObjectByCid() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); fileHashStore.deleteObjectByCid(cid); // Get permanent address of the actual cid int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); + String actualCid = objInfo.cid(); String cidShardString = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, actualCid ); @@ -1897,7 +1897,7 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { ObjectMetadata objInfo = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); fileHashStore.deleteObjectByCid(cid); @@ -2134,7 +2134,7 @@ public void getHexDigest() throws Exception { // Then get the checksum String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - String objSha256Checksum = objInfo.getHexDigests().get("SHA-256"); + String objSha256Checksum = objInfo.hexDigests().get("SHA-256"); assertEquals(pidHexDigest, sha256DigestFromTestData); assertEquals(pidHexDigest, objSha256Checksum); } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index a4c46a78..06249e7e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -109,7 +109,7 @@ public void findObject_cid() throws Exception { ); Map objInfoMap = fileHashStore.findObject(pid); - assertEquals(objInfoMap.get("cid"), objInfo.getCid()); + assertEquals(objInfoMap.get("cid"), objInfo.cid()); } } } @@ -134,7 +134,7 @@ public void findObject_cidPath() throws Exception { String objectPath = objInfoMap.get("cid_object_path"); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.getCid() + storeDepth, storeWidth, objInfo.cid() ); Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); @@ -161,7 +161,7 @@ public void findObject_refsPaths() throws Exception { String cidRefsPath = objInfoMap.get("cid_refs_path"); String pidRefsPath = objInfoMap.get("pid_refs_path"); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.getCid(), FileHashStore.HashStoreIdTypes.cid); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.cid(), FileHashStore.HashStoreIdTypes.cid); Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); @@ -291,7 +291,7 @@ public void putObject_testHarness_id() throws Exception { // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, address.getCid()); + assertEquals(objContentId, address.cid()); } } } @@ -310,7 +310,7 @@ public void putObject_objSize() throws Exception { // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + assertEquals(objectSize, objInfo.size()); } @@ -329,7 +329,7 @@ public void putObject_testHarness_hexDigests() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); - Map hexDigests = address.getHexDigests(); + Map hexDigests = address.hexDigests(); // Validate checksum values String md5 = testData.pidData.get(pid).get("md5"); @@ -360,7 +360,7 @@ public void putObject_validateChecksumValue() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); - String objCid = address.getCid(); + String objCid = address.cid(); // Get relative path String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); // Get absolute path @@ -487,7 +487,7 @@ public void putObject_objSizeCorrect() throws Exception { ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); // Check id (sha-256 hex digest of the ab_id (pid)) - assertEquals(objectSize, objInfo.getSize()); + assertEquals(objectSize, objInfo.size()); } } } @@ -507,7 +507,7 @@ public void putObject_objSizeIncorrect() { // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + assertEquals(objectSize, objInfo.size()); } }); } @@ -1037,7 +1037,7 @@ public void deleteObjectByCid() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { // Store object only ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); // Try deleting the object fileHashStore.deleteObjectByCid(cid); @@ -1066,7 +1066,7 @@ public void deleteObjectByCid_cidRefsFileContainsPids() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); // Try deleting the object fileHashStore.deleteObjectByCid(cid); @@ -1915,7 +1915,7 @@ public void isStringInRefsFile_pidFound() throws Exception { try (InputStream dataStreamDup = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); @@ -1935,7 +1935,7 @@ public void isStringInRefsFile_pidNotFound() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); @@ -1954,7 +1954,7 @@ public void getHashStoreDataObjectPath() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); // Manually form the permanent address of the actual cid Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -2093,7 +2093,7 @@ public void getHashStoreRefsPath_cid() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getCid(); + String cid = objInfo.cid(); // Manually form the permanent address of the actual cid Path storePath = Paths.get(fhsProperties.getProperty("storePath")); From ce07581e493ceb390634ec4e513a4259c44ff201 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 14:44:14 -0700 Subject: [PATCH 478/553] Cleanup 'ObjectMetadata' class --- .../org/dataone/hashstore/ObjectMetadata.java | 74 +------------------ 1 file changed, 3 insertions(+), 71 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 39a1af96..8d6c360d 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -3,78 +3,10 @@ import java.util.Map; /** - * ObjectMetadata is a class that models a unique identifier for an object in the HashStore. It + * ObjectMetadata is a record that that contains metadata about an object in the HashStore. It * encapsulates information about a file's authority-based/persistent identifier (pid), content - * identifier (cid), size, and associated hash digest values. By using ObjectMetadata objects, - * client code can easily obtain metadata of a store object in HashStore without needing to know the - * underlying file system details. + * identifier (cid), size, and associated hash digest values. */ public record ObjectMetadata(String pid, String cid, long size, Map hexDigests) { -} -// TODO Delete unused code -//public class ObjectMetadata { -// private String pid; -// private final String cid; -// private final long size; -// private final Map hexDigests; -// -// /** -// * Creates a new instance of ObjectMetadata with the given properties. -// * -// * @param pid Authority based or persistent identifer, null by default -// * @param cid Unique identifier for the file -// * @param size Size of stored file -// * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the -// * file -// */ -// public ObjectMetadata(String pid, String cid, long size, Map hexDigests) { -// this.pid = pid; -// this.cid = cid; -// this.size = size; -// this.hexDigests = hexDigests; -// } -// -// /** -// * Get the persistent identifier -// * -// * @return pid -// */ -// public String getPid() { -// return pid; -// } -// -// /** -// * Set the persistent identifier -// */ -// public void setPid(String pid) { -// this.pid = pid; -// } -// -// /** -// * Return the cid (content identifier) -// * -// * @return cid -// */ -// public String getCid() { -// return cid; -// } -// -// /** -// * Return the size -// * -// * @return size -// */ -// public long getSize() { -// return size; -// } -// -// /** -// * Return a map of hex digests (checksums) -// * -// * @return hexDigests -// */ -// public Map getHexDigests() { -// return hexDigests; -// } -//} +} \ No newline at end of file From 5bce0ce9ebeccaa48c51ab1c1a770fdc167e9453 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 15:14:42 -0700 Subject: [PATCH 479/553] Add new record 'objectInfo', refactor 'findObject' to return an 'objectInfo' object and update junit tests --- .../filehashstore/FileHashStore.java | 31 ++++++++++--------- .../FileHashStoreProtectedTest.java | 24 +++++++------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3f5609ea..50da2eba 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -127,6 +127,10 @@ public String getName() { } } + record objectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, + String sysmetaPath) { + } + /** * Constructor to initialize FileHashStore, properties are required. FileHashStore is not * responsible for ensuring that the given store path is accurate. Upon initialization, if @@ -742,8 +746,8 @@ public void deleteObject(String pid) // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - Map objInfoMap = findObject(pid); - String cid = objInfoMap.get("cid"); + objectInfo objInfoMap = findObject(pid); + String cid = objInfoMap.cid(); // If no exceptions are thrown, we proceed to synchronization based on the `cid` synchronizeObjectLockedCids(cid); @@ -1031,8 +1035,8 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // Find the content identifier if (algorithm.equals(OBJECT_STORE_ALGORITHM)) { - Map objInfoMap = findObject(pid); - return objInfoMap.get("cid"); + objectInfo objInfo = findObject(pid); + return objInfo.cid(); } else { // Get permanent address of the pid object @@ -1076,7 +1080,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the * expected pid is not found in the cid refs file. */ - protected Map findObject(String pid) + protected objectInfo findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); @@ -1106,20 +1110,17 @@ protected Map findObject(String pid) cid); Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); if (Files.exists(realPath)) { - Map objInfoMap = new HashMap<>(); - objInfoMap.put("cid", cid); - objInfoMap.put("cid_object_path", realPath.toString()); - objInfoMap.put("cid_refs_path", absCidRefsPath.toString()); - objInfoMap.put("pid_refs_path", absPidRefsPath.toString()); // If the default system metadata exists, include it Path metadataPidExpectedPath = getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); if (Files.exists(metadataPidExpectedPath)) { - objInfoMap.put("sysmeta_path", metadataPidExpectedPath.toString()); + return new objectInfo( + cid, realPath.toString(), absCidRefsPath.toString(), + absPidRefsPath.toString(), metadataPidExpectedPath.toString()); } else { - objInfoMap.put("sysmeta_path", "Does not exist"); + return new objectInfo(cid, realPath.toString(), absCidRefsPath.toString(), + absPidRefsPath.toString(), "Does not exist"); } - return objInfoMap; } else { String errMsg = "Object with cid: " + cid @@ -1754,8 +1755,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - Map objInfoMap = findObject(pid); - cid = objInfoMap.get("cid"); + objectInfo objInfo= findObject(pid); + cid = objInfo.cid(); // If no exceptions are thrown, we proceed to synchronization based on the `cid` synchronizeObjectLockedCids(cid); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 06249e7e..ed7139ba 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -104,12 +104,12 @@ public void findObject_cid() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( + ObjectMetadata objMeta = fileHashStore.storeObject( dataStream, pid, null, null, null, -1 ); - Map objInfoMap = fileHashStore.findObject(pid); - assertEquals(objInfoMap.get("cid"), objInfo.cid()); + FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + assertEquals(findObjInfo.cid(), objMeta.cid()); } } } @@ -130,8 +130,8 @@ public void findObject_cidPath() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - Map objInfoMap = fileHashStore.findObject(pid); - String objectPath = objInfoMap.get("cid_object_path"); + FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + String objectPath = findObjInfo.cidObjectPath(); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( storeDepth, storeWidth, objInfo.cid() @@ -157,9 +157,9 @@ public void findObject_refsPaths() throws Exception { dataStream, pid, null, null, null, -1 ); - Map objInfoMap = fileHashStore.findObject(pid); - String cidRefsPath = objInfoMap.get("cid_refs_path"); - String pidRefsPath = objInfoMap.get("pid_refs_path"); + FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + String cidRefsPath = findObjInfo.cidRefsPath(); + String pidRefsPath = findObjInfo.pidRefsPath(); Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.cid(), FileHashStore.HashStoreIdTypes.cid); Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, @@ -188,8 +188,8 @@ public void findObject_sysmetaPath_exists() throws Exception { // Store Metadata fileHashStore.storeMetadata(metadataStream, pid); - Map objInfoMap = fileHashStore.findObject(pid); - String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + String objInfoSysmetaPath = findObjInfo.sysmetaPath(); String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); Path sysmetaPath = @@ -214,8 +214,8 @@ public void findObject_sysmetaPath_doesNotExist() throws Exception { dataStream, pid, null, null, null, -1 ); - Map objInfoMap = fileHashStore.findObject(pid); - String objInfoSysmetaPath = objInfoMap.get("sysmeta_path"); + FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + String objInfoSysmetaPath = findObjInfo.sysmetaPath(); assertEquals(objInfoSysmetaPath, "Does not exist"); } From 75ba6c00072b929d8c784d81af491ea25b289c0e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 15:27:49 -0700 Subject: [PATCH 480/553] Refactor 'checkForNotEmptyAndValidString' to call '.isBlank()' instead of .trim() and then .isEmpty() --- .../hashstore/filehashstore/FileHashStoreUtility.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index e25b47a8..33b47ff7 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -214,8 +214,9 @@ public static void deleteListItems(Collection deleteList) { public static void checkForNotEmptyAndValidString(String string, String argument, String method) throws IllegalArgumentException { ensureNotNull(string, "string", "checkForNotEmptyAndValidString"); - if (string.trim().isEmpty()) { - String errMsg = "Calling Method: " + method + "(): " + argument + " cannot be empty."; + if (string.isBlank()) { + String errMsg = "Calling Method: " + method + "(): " + argument + + " cannot be empty or contain empty white spaces, tabs or newlines."; throw new IllegalArgumentException(errMsg); } if (!isValidString(string)) { From f5ca23db0881db3a74a68feb8f52c8101250d6ee Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 13 Aug 2024 15:33:02 -0700 Subject: [PATCH 481/553] Refactor 'HashStoreRunnable' run's switch case per formatter suggestion --- .../java/org/dataone/hashstore/HashStoreRunnable.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 5db0a6fc..01f249e9 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -68,7 +68,7 @@ public void run() { log.debug("HashStoreRunnable - Called to: " + publicAPIMethod); try { switch (publicAPIMethod) { - case storeObject: + case storeObject -> { try { hashstore.storeObject(objStream, pid, null, null, null, -1); } catch (Exception e) { @@ -79,8 +79,8 @@ public void run() { throw new HashStoreServiceException(errMsg); } objStream.close(); - break; - case deleteObject: + } + case deleteObject -> { try { hashstore.deleteObject(pid); } catch (Exception e) { @@ -90,7 +90,7 @@ public void run() { log.error(errMsg); throw new HashStoreServiceException(errMsg); } - break; + } } } catch (HashStoreServiceException | IOException hse) { log.error( From 84846f4ddae99f35dcbba68b663408f52508b87c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 08:11:51 -0700 Subject: [PATCH 482/553] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index af5f32e2..82ccfd02 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ and the store width. Lastly, objects are 'tagged' with a given identifier (ex. p identifier (pid)). This process produces reference files, which allow objects to be found and retrieved with a given identifier. - Note 1: An identifier can only be used once -- Note 2: Objects are stored once and only once using its content identifier (a checksum generated +- Note 2: Each object is stored once and only once using its content identifier (a checksum generated from using a hashing algorithm). Clients that attempt to store duplicate objects will receive the expected ObjectMetadata - with HashStore handling the de-duplication process under the hood. @@ -153,7 +153,7 @@ objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, chec objectMetadata objInfo = storeObject(InputStream); // Validate object, if the parameters do not match, the data object associated with the objInfo // supplied will be deleted -deleteInvalidObject(objInfo, checksum, checksumAlgorithn, objSize); +deleteIfInvalidObject(objInfo, checksum, checksumAlgorithn, objSize); // Tag object, makes the object discoverable (find, retrieve, delete) tagObject(pid, cid); ``` From f369441ea6be0657a7140d9ccc3219c922e58459 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 08:35:10 -0700 Subject: [PATCH 483/553] Update javadocs in 'HashStore' interface --- src/main/java/org/dataone/hashstore/HashStore.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 22d5c083..2b515f3a 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -21,10 +21,11 @@ public interface HashStore { /** * The {@code storeObject} method is responsible for the atomic storage of objects to * disk using a given InputStream. Upon successful storage, the method returns a - * (ObjectMetadata) object containing relevant file information, such as the file's id - * (which can be used to locate the object on disk), the file's size, and a hex digest - * dict of algorithms and checksums. Storing an object with {@code store_object} also - * tags an object (creating references) which allow the object to be discoverable. + * (@Code ObjectMetadata) object containing relevant file information, such as the file's + * id (which can be used by a system administrator -- but not by an API client -- to locate + * the object on disk), the file's size, and a hex digest dict of algorithms and + * checksums. Storing an object with {@code store_object} also tags an object (creating + * references) which allow the object to be discoverable. * * {@code storeObject} also ensures that an object is stored only once by synchronizing * multiple calls and rejecting calls to store duplicate objects. Note, calling {@code From dc6a6fb73e78d61562ea1b18bd7e1cf9d220270c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:04:03 -0700 Subject: [PATCH 484/553] Refactor and simplify usage of enum objects 'HashStoreIdTypes' and 'HashStoreRefUpdateTypes' --- .../filehashstore/FileHashStore.java | 39 +++++-------------- .../FileHashStoreProtectedTest.java | 5 +-- 2 files changed, 11 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 50da2eba..414a9578 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -79,20 +79,6 @@ public class FileHashStore implements HashStore { public static final String[] SUPPORTED_HASH_ALGORITHMS = {"MD2", "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; - public enum HashStoreIdTypes { - - cid("cid"), pid("pid"); - - final String identifierType; - - HashStoreIdTypes(String idType) { - identifierType = idType; - } - - public String getName() { - return identifierType; - } - } enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); @@ -108,23 +94,16 @@ public String getName() { } } + public enum HashStoreIdTypes { + cid, pid + } + enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } enum HashStoreRefUpdateTypes { - - add("add"), remove("remove"); - - final String refUpdateType; - - HashStoreRefUpdateTypes(String updateType) { - refUpdateType = updateType; - } - - public String getName() { - return refUpdateType; - } + add, remove } record objectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, @@ -1675,7 +1654,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo if (retrievedCid.equalsIgnoreCase(cid)) { // The pid correctly references the cid, but the cid refs file is missing // Create the file and verify tagging process - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); File absPathCidRefsFile = absCidRefsPath.toFile(); move(cidRefsTmpFile, absPathCidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); @@ -1705,7 +1684,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); } // Get the pid refs file and verify tagging process - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); @@ -1716,8 +1695,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo } // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.getName()); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.getName()); + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); File absPathCidRefsFile = absCidRefsPath.toFile(); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index ed7139ba..7bf45e1d 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1200,9 +1200,8 @@ public void storeHashStoreRefsFiles_pidRefsOrphanedFile() // Create orphaned pid refs file Path absPidRefsPath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); - File pidRefsTmpFile = fileHashStore.writeRefsFile( - cidForOrphanPidRef, FileHashStore.HashStoreIdTypes.pid.getName() - ); + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidForOrphanPidRef, + FileHashStore.HashStoreIdTypes.pid.name()); File absPathPidRefsFile = absPidRefsPath.toFile(); fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); From 7ba3b7398add67293d15ea37599235ec0451af3a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:08:05 -0700 Subject: [PATCH 485/553] Add missing javadocs for enum objects to add clarity --- .../filehashstore/FileHashStore.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 414a9578..f1d2bf48 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -80,6 +80,9 @@ public class FileHashStore implements HashStore { "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; + /** + * The default hash algorithms included in the ObjectMetadata when storing objects. + */ enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); @@ -94,18 +97,37 @@ public String getName() { } } + /** + * The two different type of HashStore identifiers + */ public enum HashStoreIdTypes { cid, pid } + /** + * The configuration properties for a HashStore + */ enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } + /** + * When working with refs files, we either add or remove values + */ enum HashStoreRefUpdateTypes { add, remove } + /** + * Record object to encapsulate information when searching for an existing HashStore object + * based on a given persistent identifier {@code pid} + * + * @param cid Content identifier of the data object of a given pid + * @param cidObjectPath Path to the data object + * @param cidRefsPath Path to the data object's reference file + * @param pidRefsPath Path to the pid's that references the data object + * @param sysmetaPath Path to the pid's system metadata if available + */ record objectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, String sysmetaPath) { } From 1eabe79e222c316ab25e32781918a6004f5d98b4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:21:15 -0700 Subject: [PATCH 486/553] Rename 'objectInfo' record to 'ObjectInfo' --- .../hashstore/filehashstore/FileHashStore.java | 14 +++++++------- .../filehashstore/FileHashStoreProtectedTest.java | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f1d2bf48..33874724 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -128,7 +128,7 @@ enum HashStoreRefUpdateTypes { * @param pidRefsPath Path to the pid's that references the data object * @param sysmetaPath Path to the pid's system metadata if available */ - record objectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, + record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, String sysmetaPath) { } @@ -747,7 +747,7 @@ public void deleteObject(String pid) // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - objectInfo objInfoMap = findObject(pid); + ObjectInfo objInfoMap = findObject(pid); String cid = objInfoMap.cid(); // If no exceptions are thrown, we proceed to synchronization based on the `cid` @@ -1036,7 +1036,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // Find the content identifier if (algorithm.equals(OBJECT_STORE_ALGORITHM)) { - objectInfo objInfo = findObject(pid); + ObjectInfo objInfo = findObject(pid); return objInfo.cid(); } else { @@ -1081,7 +1081,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the * expected pid is not found in the cid refs file. */ - protected objectInfo findObject(String pid) + protected ObjectInfo findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); @@ -1115,11 +1115,11 @@ protected objectInfo findObject(String pid) Path metadataPidExpectedPath = getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); if (Files.exists(metadataPidExpectedPath)) { - return new objectInfo( + return new ObjectInfo( cid, realPath.toString(), absCidRefsPath.toString(), absPidRefsPath.toString(), metadataPidExpectedPath.toString()); } else { - return new objectInfo(cid, realPath.toString(), absCidRefsPath.toString(), + return new ObjectInfo(cid, realPath.toString(), absCidRefsPath.toString(), absPidRefsPath.toString(), "Does not exist"); } @@ -1756,7 +1756,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - objectInfo objInfo= findObject(pid); + ObjectInfo objInfo = findObject(pid); cid = objInfo.cid(); // If no exceptions are thrown, we proceed to synchronization based on the `cid` synchronizeObjectLockedCids(cid); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 7bf45e1d..3c3364f4 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -108,7 +108,7 @@ public void findObject_cid() throws Exception { dataStream, pid, null, null, null, -1 ); - FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); assertEquals(findObjInfo.cid(), objMeta.cid()); } } @@ -130,7 +130,7 @@ public void findObject_cidPath() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String objectPath = findObjInfo.cidObjectPath(); String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( @@ -157,7 +157,7 @@ public void findObject_refsPaths() throws Exception { dataStream, pid, null, null, null, -1 ); - FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String cidRefsPath = findObjInfo.cidRefsPath(); String pidRefsPath = findObjInfo.pidRefsPath(); @@ -188,7 +188,7 @@ public void findObject_sysmetaPath_exists() throws Exception { // Store Metadata fileHashStore.storeMetadata(metadataStream, pid); - FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String objInfoSysmetaPath = findObjInfo.sysmetaPath(); String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); @@ -214,7 +214,7 @@ public void findObject_sysmetaPath_doesNotExist() throws Exception { dataStream, pid, null, null, null, -1 ); - FileHashStore.objectInfo findObjInfo = fileHashStore.findObject(pid); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String objInfoSysmetaPath = findObjInfo.sysmetaPath(); assertEquals(objInfoSysmetaPath, "Does not exist"); From 0bd9a3f425c9404d32749488b41de9c86cd1529e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:23:15 -0700 Subject: [PATCH 487/553] Rename references of 'deleteInvalidObject' to 'deleteIfInvalidObject' --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 33874724..2d3515cf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -540,7 +540,7 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce // algorithm, etc.) is unavailable. // // Note: This method does not tag the object to make it discoverable, so the client can - // call 'deleteInvalidObject' (optional) to check that the object is valid, and then + // call 'deleteIfInvalidObject' (optional) to check that the object is valid, and then // 'tagObject' (required) to create the reference files needed to associate the // respective pids/cids. try (object) { @@ -847,15 +847,15 @@ public void deleteIfInvalidObject( IOException { logFileHashStore.debug("Verifying data object for cid: " + objectInfo.cid()); // Validate input parameters - FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "deleteInvalidObject"); + FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "deleteIfInvalidObject"); FileHashStoreUtility.ensureNotNull( - objectInfo.hexDigests(), "objectInfo.getHexDigests()", "deleteInvalidObject"); + objectInfo.hexDigests(), "objectInfo.getHexDigests()", "deleteIfInvalidObject"); if (objectInfo.hexDigests().isEmpty()) { throw new MissingHexDigestsException("Missing hexDigests in supplied ObjectMetadata"); } - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteInvalidObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteInvalidObject"); - FileHashStoreUtility.checkPositive(objSize, "deleteInvalidObject"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteIfInvalidObject"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteIfInvalidObject"); + FileHashStoreUtility.checkPositive(objSize, "deleteIfInvalidObject"); String objCid = objectInfo.cid(); long objInfoRetrievedSize = objectInfo.size(); From 189eaf327e3d7ad2ada2f85a9fba0d8169ba12b2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:32:00 -0700 Subject: [PATCH 488/553] Refactor 'tagObject' by moving synchronization code to be closer to code requiring it in 'storeHashStoreRefsFiles' --- .../filehashstore/FileHashStore.java | 144 +++++++++--------- 1 file changed, 73 insertions(+), 71 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2d3515cf..ff2c8f8a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -561,34 +561,26 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid", "tagObject"); try { - synchronizeObjectLockedCids(cid); - synchronizeReferenceLockedPids(pid); storeHashStoreRefsFiles(pid, cid); } catch (HashStoreRefsAlreadyExistException hsrfae) { + // *** cid and pid already released *** // This exception is thrown when the pid and cid are already tagged appropriately String errMsg = "HashStore refs files already exist for pid " + pid + " and cid: " + cid; throw new HashStoreRefsAlreadyExistException(errMsg); } catch (PidRefsFileExistsException prfe) { + // *** cid and pid already released *** String errMsg = "pid: " + pid + " already references another cid." + " A pid can only reference one cid."; throw new PidRefsFileExistsException(errMsg); } catch (Exception e) { + // *** cid and pid already released *** // Revert the process for all other exceptions - // We must first release the cid and pid since 'unTagObject' is synchronized - // If not, we will run into a deadlock. - releaseObjectLockedCids(cid); - releaseReferenceLockedPids(pid); unTagObject(pid, cid); throw e; - - } finally { - // Release locks - releaseObjectLockedCids(cid); - releaseReferenceLockedPids(pid); } } @@ -1654,80 +1646,90 @@ protected void deleteObjectByCid(String cid) * @param cid Content identifier * @throws NoSuchAlgorithmException If there is an issue related to calculating hashes * @throws IOException If there is an issue reading/writing a refs file + * @throws InterruptedException If there is an issue when synchronizing pid or cid values */ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgorithmException, - IOException { - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + IOException, InterruptedException { + try { + // Immediately synchronize cid and pid + synchronizeObjectLockedCids(cid); + synchronizeReferenceLockedPids(pid); - if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Confirm that reference files are where they are expected to be - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - // We throw an exception so the client is aware that everything is in place - String errMsg = - "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; - logFileHashStore.error(errMsg); - throw new HashStoreRefsAlreadyExistException(errMsg); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { - // If pid refs exists, it can only contain and reference one cid - // First, compare the cid retrieved from the pid refs file from the supplied cid - String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); - if (retrievedCid.equalsIgnoreCase(cid)) { - // The pid correctly references the cid, but the cid refs file is missing - // Create the file and verify tagging process - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Confirm that reference files are where they are expected to be + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + // We throw an exception so the client is aware that everything is in place + String errMsg = + "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { + // If pid refs exists, it can only contain and reference one cid + // First, compare the cid retrieved from the pid refs file from the supplied cid + String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); + if (retrievedCid.equalsIgnoreCase(cid)) { + // The pid correctly references the cid, but the cid refs file is missing + // Create the file and verify tagging process + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid + + " is missing. Missing cid refs file created and tagging completed."); + return; + } else { + // Check if the retrieved cid refs file exists and pid is referenced + Path retrievedAbsCidRefsPath = + getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath + )) { + // This pid is accounted for and tagged as expected. + String errMsg = "Pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } + // Orphaned pid refs file found, the retrieved cid refs file exists + // but doesn't contain the pid. Proceed to overwrite the pid refs file. + } + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Only update cid refs file if pid is not in the file + if (!isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); + } + // Get the pid refs file and verify tagging process + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File absPathPidRefsFile = absPidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid - + " is missing. Missing cid refs file created and tagging completed."); + "Object with cid: " + cid + " has been updated and tagged successfully with pid: " + + pid); return; - } else { - // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = - getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); - if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath - )) { - // This pid is accounted for and tagged as expected. - String errMsg = "Pid refs file already exists for pid: " + pid - + ", and the associated cid refs file contains the " - + "pid. A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - } - // Orphaned pid refs file found, the retrieved cid refs file exists - // but doesn't contain the pid. Proceed to overwrite the pid refs file. } - } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Only update cid refs file if pid is not in the file - if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); - } - // Get the pid refs file and verify tagging process + + // Get pid and cid refs files File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); + // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws an exception if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info( - "Object with cid: " + cid + " has been updated and tagged successfully with pid: " - + pid); - return; + "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); + } finally { + releaseObjectLockedCids(cid); + releaseReferenceLockedPids(pid); } - - // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); - // Move refs files to permanent location - File absPathPidRefsFile = absPidRefsPath.toFile(); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws an exception if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); } /** From 8ceb641b8d4866d27660a2d494d431b10072d5c2 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:47:44 -0700 Subject: [PATCH 489/553] Refactor 'checkForNotEmptyAndValidString' to get method name via thread and update signature to remove 'method' argument --- .../filehashstore/FileHashStore.java | 72 +++++++++---------- .../filehashstore/FileHashStoreUtility.java | 22 +++--- .../FileHashStoreProtectedTest.java | 6 +- 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index ff2c8f8a..2c861ccb 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -260,8 +260,7 @@ protected void verifyHashStoreProperties( storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" ); FileHashStoreUtility.checkForNotEmptyAndValidString( - storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" - ); + storeMetadataNamespace, "storeMetadataNamespace"); // Check to see if configuration exists before initializing Path hashstoreYamlPredictedPath = Paths.get(storePath + "/hashstore.yaml"); @@ -434,16 +433,16 @@ public ObjectMetadata storeObject( // Validate input parameters FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "storeObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForNotEmptyAndValidString( - additionalAlgorithm, "additionalAlgorithm", "storeObject"); + additionalAlgorithm, "additionalAlgorithm"); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { FileHashStoreUtility.checkForNotEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "storeObject"); + checksumAlgorithm, "checksumAlgorithm"); validateAlgorithm(checksumAlgorithm); } if (objSize != -1) { @@ -557,8 +556,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "tagObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid", "tagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); try { storeHashStoreRefsFiles(pid, cid); @@ -592,14 +591,14 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "storeMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "storeMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); checkedFormatId = formatId; } @@ -662,7 +661,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "retrieveObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Check to see if object exists Path objRealPath = getHashStoreDataObjectPath(pid); @@ -697,9 +696,9 @@ public InputStream retrieveMetadata(String pid, String formatId) "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "retrieveMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); return getHashStoreMetadataInputStream(pid, formatId); } @@ -714,7 +713,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); return getHashStoreMetadataInputStream(pid, DEFAULT_METADATA_NAMESPACE); } @@ -726,7 +725,7 @@ public void deleteObject(String pid) logFileHashStore.debug("Deleting object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "id", "deleteObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "id"); Collection deleteList = new ArrayList<>(); try { @@ -914,9 +913,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "deleteMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); // Get the path to the metadata document and add it to a list Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); @@ -941,7 +940,7 @@ public void deleteMetadata(String pid) InterruptedException { logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Get the path to the pid metadata document directory String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); @@ -1023,7 +1022,7 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug("Calculating hex digest for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "getHexDigest"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); validateAlgorithm(algorithm); // Find the content identifier @@ -1078,7 +1077,7 @@ protected ObjectInfo findObject(String pid) PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "findObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Get path of the pid references file Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); @@ -1181,7 +1180,7 @@ protected ObjectMetadata putObject( // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { FileHashStoreUtility.checkForNotEmptyAndValidString( - additionalAlgorithm, "additionalAlgorithm", "putObject"); + additionalAlgorithm, "additionalAlgorithm"); validateAlgorithm(additionalAlgorithm); } if (objSize != -1) { @@ -1338,7 +1337,7 @@ protected void validateTmpObject( protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "validateAlgorithm"); - FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm", "validateAlgorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { @@ -1360,7 +1359,7 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio */ protected boolean shouldCalculateAlgorithm(String algorithm) { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "shouldCalculateAlgorithm"); - FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm", "shouldCalculateAlgorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { if (algorithm.equals(defAlgo.getName())) { @@ -1384,21 +1383,18 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor // First ensure algorithm is compatible and values are valid if they aren't null if (checksumAlgorithm != null) { FileHashStoreUtility.checkForNotEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters"); + checksumAlgorithm, "checksumAlgorithm"); validateAlgorithm(checksumAlgorithm); } if (checksum != null) { - FileHashStoreUtility.checkForNotEmptyAndValidString( - checksum, "checksum", "verifyChecksumParameters"); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); } // If checksum is supplied, checksumAlgorithm cannot be empty if (checksum != null && !checksum.trim().isEmpty()) { FileHashStoreUtility.ensureNotNull( checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" ); - FileHashStoreUtility.checkForNotEmptyAndValidString( - checksumAlgorithm, "algorithm", "verifyChecksumParameters" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksumAlgorithm, "algorithm"); } // Ensure algorithm is supported, not null and not empty boolean requestValidation = false; @@ -1409,9 +1405,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor FileHashStoreUtility.ensureNotNull( checksum, "checksum", "verifyChecksumParameters" ); - FileHashStoreUtility.checkForNotEmptyAndValidString( - checksum, "checksum", "verifyChecksumParameters" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); } } return requestValidation; @@ -1441,16 +1435,14 @@ protected Map writeToTmpFileAndGenerateChecksums( boolean generateAddAlgo = false; if (additionalAlgorithm != null) { FileHashStoreUtility.checkForNotEmptyAndValidString( - additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" - ); + additionalAlgorithm, "additionalAlgorithm"); validateAlgorithm(additionalAlgorithm); generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } boolean generateCsAlgo = false; if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { FileHashStoreUtility.checkForNotEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" - ); + checksumAlgorithm, "checksumAlgorithm"); validateAlgorithm(checksumAlgorithm); generateCsAlgo = shouldCalculateAlgorithm(checksumAlgorithm); } @@ -1553,7 +1545,7 @@ protected void move(File source, File target, String entity) throws IOException, "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); - FileHashStoreUtility.checkForNotEmptyAndValidString(entity, "entity", "move"); + FileHashStoreUtility.checkForNotEmptyAndValidString(entity, "entity"); if (entity.equals("object") && target.exists()) { String errMsg = "File already exists for target: " + target; logFileHashStore.warn(errMsg); @@ -1746,9 +1738,9 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, NoSuchAlgorithmException, IOException { // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid", "unTagObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "unTagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); FileHashStoreUtility.ensureNotNull(cid, "cid", "unTagObject"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid", "unTagObject"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); Collection deleteList = new ArrayList<>(); @@ -2019,14 +2011,14 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // Validate input parameters FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "putMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId", "putMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); checkedFormatId = formatId; } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 33b47ff7..46c62622 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -87,9 +87,9 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm public static String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); - FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); byte[] bytes = pid.getBytes(StandardCharsets.UTF_8); @@ -211,18 +211,22 @@ public static void deleteListItems(Collection deleteList) { * @param method Calling method * @throws IllegalArgumentException If the string is empty or contains illegal characters */ - public static void checkForNotEmptyAndValidString(String string, String argument, String method) + public static void checkForNotEmptyAndValidString(String string, String argument) throws IllegalArgumentException { ensureNotNull(string, "string", "checkForNotEmptyAndValidString"); if (string.isBlank()) { - String errMsg = "Calling Method: " + method + "(): " + argument - + " cannot be empty or contain empty white spaces, tabs or newlines."; - throw new IllegalArgumentException(errMsg); + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = + "Calling Method: " + stackTraceElements[2].getMethodName() + "()'s argument: " + + argument + " cannot be empty, contain empty white spaces, tabs or newlines."; + throw new IllegalArgumentException(msg); } if (!isValidString(string)) { - String errMsg = "Calling Method: " + method + "(): " + argument - + " contains empty white spaces, tabs or newlines."; - throw new IllegalArgumentException(errMsg); + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = + "Calling Method: " + stackTraceElements[2].getMethodName() + "()'s argument: " + + argument + " contains empty white spaces, tabs or newlines."; + throw new IllegalArgumentException(msg); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 3c3364f4..fc7b0a4f 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2133,8 +2133,7 @@ public void getHashStoreDataObjectPath_fileNotFound() { public void fileHashStoreUtility_checkForEmptyAndValidString() { assertThrows( IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForNotEmptyAndValidString("dou.test.1\n", "pid", - "storeObject")); + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("dou.test.1\n", "pid")); } /** @@ -2145,8 +2144,7 @@ public void fileHashStoreUtility_checkForEmptyAndValidString() { public void fileHashStoreUtility_checkForEmptyAndValidString_newLine() { assertThrows( IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForNotEmptyAndValidString("\n", "pid", - "storeObject")); + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("\n", "pid")); } /** From b94eb2c664ca057eb2da7e47948e146473214f39 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:54:26 -0700 Subject: [PATCH 490/553] Refactor 'ensureNotNull' to get method name via thread and update signature to remove 'method' argument --- .../dataone/hashstore/HashStoreClient.java | 43 ++++++----- .../filehashstore/FileHashStore.java | 74 ++++++++----------- .../filehashstore/FileHashStoreUtility.java | 23 +++--- .../dataone/hashstore/HashStoreRunnable.java | 8 +- 4 files changed, 67 insertions(+), 81 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index b5c0aadd..dd5be696 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -124,19 +124,18 @@ public static void main(String[] args) throws Exception { String originDirectory = cmd.getOptionValue("sdir"); String numObjects = cmd.getOptionValue("nobj"); String sizeOfFilesToSkip = cmd.getOptionValue("gbskip"); - FileHashStoreUtility.ensureNotNull(objType, "-stype", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(objType, "-stype"); + FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir"); FileHashStoreUtility.ensureNotNull( - action, "-sts, -rav, -dfs", "HashStoreClient" - ); + action, "-sts, -rav, -dfs"); testWithKnbvm(action, objType, originDirectory, numObjects, sizeOfFilesToSkip); } else if (cmd.hasOption("getchecksum")) { String pid = cmd.getOptionValue("pid"); String algo = cmd.getOptionValue("algo"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(algo, "-algo", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(algo, "-algo"); String hexDigest = hashStore.getHexDigest(pid, algo); System.out.println(hexDigest); @@ -145,8 +144,8 @@ public static void main(String[] args) throws Exception { System.out.println("Storing object"); String pid = cmd.getOptionValue("pid"); Path path = Paths.get(cmd.getOptionValue("path")); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(path, "-path", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(path, "-path"); String additional_algo = null; if (cmd.hasOption("algo")) { @@ -179,9 +178,9 @@ public static void main(String[] args) throws Exception { String pid = cmd.getOptionValue("pid"); Path path = Paths.get(cmd.getOptionValue("path")); String formatId = cmd.getOptionValue("format_id"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(path, "-path", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(path, "-path"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); InputStream pidObjStream = Files.newInputStream(path); String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); @@ -191,7 +190,7 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("retrieveobject")) { String pid = cmd.getOptionValue("pid"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); InputStream objStream = hashStore.retrieveObject(pid); byte[] buffer = new byte[1000]; @@ -205,8 +204,8 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("retrievemetadata")) { String pid = cmd.getOptionValue("pid"); String formatId = cmd.getOptionValue("format_id"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); byte[] buffer = new byte[1000]; @@ -221,7 +220,7 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deleteobject")) { String pid = cmd.getOptionValue("pid"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); hashStore.deleteObject(pid); System.out.println("Object for pid (" + pid + ") has been deleted."); @@ -229,8 +228,8 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deletemetadata")) { String pid = cmd.getOptionValue("pid"); String formatId = cmd.getOptionValue("format_id"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); hashStore.deleteMetadata(pid, formatId); System.out.println( @@ -358,11 +357,11 @@ private static void createNewHashStore( String storePath, String storeDepth, String storeWidth, String storeAlgorithm, String storeNameSpace ) throws HashStoreFactoryException, IOException { - FileHashStoreUtility.ensureNotNull(storePath, "storePath", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeDepth, "storeDepth", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeWidth, "storeWidth", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeAlgorithm, "storeAlgorithm", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeNameSpace, "storeNameSpace", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(storePath, "storePath"); + FileHashStoreUtility.ensureNotNull(storeDepth, "storeDepth"); + FileHashStoreUtility.ensureNotNull(storeWidth, "storeWidth"); + FileHashStoreUtility.ensureNotNull(storeAlgorithm, "storeAlgorithm"); + FileHashStoreUtility.ensureNotNull(storeNameSpace, "storeNameSpace"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", storePath); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2c861ccb..97c5e11d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -148,9 +148,7 @@ record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String p public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentException, IOException, NoSuchAlgorithmException { logFileHashStore.info("Initializing FileHashStore"); - FileHashStoreUtility.ensureNotNull( - hashstoreProperties, "hashstoreProperties", "FileHashStore - constructor" - ); + FileHashStoreUtility.ensureNotNull(hashstoreProperties, "hashstoreProperties"); // Get properties Path storePath = Paths.get( @@ -256,9 +254,7 @@ protected void verifyHashStoreProperties( throw new IllegalArgumentException(errMsg); } validateAlgorithm(storeAlgorithm); - FileHashStoreUtility.ensureNotNull( - storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" - ); + FileHashStoreUtility.ensureNotNull(storeMetadataNamespace, "storeMetadataNamespace"); FileHashStoreUtility.checkForNotEmptyAndValidString( storeMetadataNamespace, "storeMetadataNamespace"); @@ -431,8 +427,8 @@ public ObjectMetadata storeObject( PidRefsFileExistsException { logFileHashStore.debug("Storing data object for pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); + FileHashStoreUtility.ensureNotNull(object, "object"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { @@ -554,8 +550,8 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi NoSuchAlgorithmException, FileNotFoundException, InterruptedException { logFileHashStore.debug("Tagging cid (" + cid + ") with pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); - FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.ensureNotNull(cid, "cid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); @@ -589,8 +585,8 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) NoSuchAlgorithmException { logFileHashStore.debug("Storing metadata for pid: " + pid + ", with formatId: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // If no formatId is supplied, use the default namespace to store metadata @@ -660,7 +656,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Check to see if object exists @@ -695,9 +691,9 @@ public InputStream retrieveMetadata(String pid, String formatId) logFileHashStore.debug( "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); + FileHashStoreUtility.ensureNotNull(formatId, "formatId"); FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); return getHashStoreMetadataInputStream(pid, formatId); @@ -712,7 +708,7 @@ public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, logFileHashStore.debug( "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); return getHashStoreMetadataInputStream(pid, DEFAULT_METADATA_NAMESPACE); @@ -724,7 +720,7 @@ public void deleteObject(String pid) InterruptedException { logFileHashStore.debug("Deleting object for pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "id", "deleteObject"); + FileHashStoreUtility.ensureNotNull(pid, "id"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "id"); Collection deleteList = new ArrayList<>(); @@ -838,14 +834,13 @@ public void deleteIfInvalidObject( IOException { logFileHashStore.debug("Verifying data object for cid: " + objectInfo.cid()); // Validate input parameters - FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "deleteIfInvalidObject"); - FileHashStoreUtility.ensureNotNull( - objectInfo.hexDigests(), "objectInfo.getHexDigests()", "deleteIfInvalidObject"); + FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo"); + FileHashStoreUtility.ensureNotNull(objectInfo.hexDigests(), "objectInfo.getHexDigests()"); if (objectInfo.hexDigests().isEmpty()) { throw new MissingHexDigestsException("Missing hexDigests in supplied ObjectMetadata"); } - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "deleteIfInvalidObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "deleteIfInvalidObject"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm"); FileHashStoreUtility.checkPositive(objSize, "deleteIfInvalidObject"); String objCid = objectInfo.cid(); @@ -912,9 +907,9 @@ public void deleteMetadata(String pid, String formatId) throws IllegalArgumentEx logFileHashStore.debug( "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); + FileHashStoreUtility.ensureNotNull(formatId, "formatId"); FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); // Get the path to the metadata document and add it to a list @@ -939,7 +934,7 @@ public void deleteMetadata(String pid) throws IllegalArgumentException, IOException, NoSuchAlgorithmException, InterruptedException { logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Get the path to the pid metadata document directory @@ -969,8 +964,7 @@ public void deleteMetadata(String pid) */ protected Collection syncRenameMetadataDocForDeletion( Collection metadataDocPaths) throws IOException, InterruptedException { - FileHashStoreUtility.ensureNotNull( - metadataDocPaths, "metadataDocPaths", "syncRenameMetadataDocForDeletion"); + FileHashStoreUtility.ensureNotNull(metadataDocPaths, "metadataDocPaths"); if (metadataDocPaths.isEmpty()) { String errMsg = "metadataDocPaths supplied cannot be empty."; logFileHashStore.error(errMsg); @@ -1021,7 +1015,7 @@ protected Collection syncRenameMetadataDocForDeletion( public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { logFileHashStore.debug("Calculating hex digest for pid: " + pid); - FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); validateAlgorithm(algorithm); @@ -1076,7 +1070,7 @@ protected ObjectInfo findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, PidNotFoundInCidRefsFileException, OrphanRefsFilesException { logFileHashStore.debug("Finding object for pid: " + pid); - FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Get path of the pid references file @@ -1336,7 +1330,7 @@ protected void validateTmpObject( */ protected boolean validateAlgorithm(String algorithm) throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "validateAlgorithm"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); @@ -1358,7 +1352,7 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio * @return Boolean */ protected boolean shouldCalculateAlgorithm(String algorithm) { - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "shouldCalculateAlgorithm"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { @@ -1391,9 +1385,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor } // If checksum is supplied, checksumAlgorithm cannot be empty if (checksum != null && !checksum.trim().isEmpty()) { - FileHashStoreUtility.ensureNotNull( - checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" - ); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm"); FileHashStoreUtility.checkForNotEmptyAndValidString(checksumAlgorithm, "algorithm"); } // Ensure algorithm is supported, not null and not empty @@ -1402,9 +1394,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor requestValidation = validateAlgorithm(checksumAlgorithm); // Ensure checksum is not null or empty if checksumAlgorithm is supplied if (requestValidation) { - FileHashStoreUtility.ensureNotNull( - checksum, "checksum", "verifyChecksumParameters" - ); + FileHashStoreUtility.ensureNotNull(checksum, "checksum"); FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); } } @@ -1544,7 +1534,7 @@ protected void move(File source, File target, String entity) throws IOException, logFileHashStore.debug( "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters - FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); + FileHashStoreUtility.ensureNotNull(entity, "entity"); FileHashStoreUtility.checkForNotEmptyAndValidString(entity, "entity"); if (entity.equals("object") && target.exists()) { String errMsg = "File already exists for target: " + target; @@ -1737,9 +1727,9 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo protected void unTagObject(String pid, String cid) throws InterruptedException, NoSuchAlgorithmException, IOException { // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "unTagObject"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - FileHashStoreUtility.ensureNotNull(cid, "cid", "unTagObject"); + FileHashStoreUtility.ensureNotNull(cid, "cid"); FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); Collection deleteList = new ArrayList<>(); @@ -2009,8 +1999,8 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) logFileHashStore.debug( "Writing metadata for pid: " + pid + " , with metadata namespace: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // If no formatId is supplied, use the default namespace to store metadata diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 46c62622..a2ba23f6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -35,14 +35,16 @@ public class FileHashStoreUtility { * * @param object Object to check * @param argument Value that is being checked - * @param method Calling method or class * @throws IllegalArgumentException If the object is null */ - public static void ensureNotNull(Object object, String argument, String method) + public static void ensureNotNull(Object object, String argument) throws IllegalArgumentException { if (object == null) { - String errMsg = "Calling Method: " + method + "(): " + argument + " cannot be null."; - throw new IllegalArgumentException(errMsg); + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = + "Calling Method: " + stackTraceElements[2].getMethodName() + "()'s argument: " + + argument + " cannot be null."; + throw new IllegalArgumentException(msg); } } @@ -86,9 +88,9 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm */ public static String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); @@ -144,7 +146,7 @@ public static List getFilesFromDir(Path directory) throws IOException { * @throws IOException Issue with renaming the given file path */ public static Path renamePathForDeletion(Path pathToRename) throws IOException { - ensureNotNull(pathToRename, "pathToRename", "renamePathForDeletion"); + ensureNotNull(pathToRename, "pathToRename"); if (!Files.exists(pathToRename)) { String errMsg = "Given path to file: " + pathToRename + " does not exist."; throw new FileNotFoundException(errMsg); @@ -165,7 +167,7 @@ public static Path renamePathForDeletion(Path pathToRename) throws IOException { * @throws IOException Issue with renaming the given file path */ public static void renamePathForRestoration(Path pathToRename) throws IOException { - ensureNotNull(pathToRename, "pathToRename", "renamePathForRestoration"); + ensureNotNull(pathToRename, "pathToRename"); if (!Files.exists(pathToRename)) { String errMsg = "Given path to file: " + pathToRename + " does not exist."; throw new FileNotFoundException(errMsg); @@ -184,7 +186,7 @@ public static void renamePathForRestoration(Path pathToRename) throws IOExceptio * @param deleteList Directory to check */ public static void deleteListItems(Collection deleteList) { - ensureNotNull(deleteList, "deleteList", "deleteListItems"); + ensureNotNull(deleteList, "deleteList"); if (!deleteList.isEmpty()) { for (Path deleteItem : deleteList) { if (Files.exists(deleteItem)) { @@ -208,12 +210,11 @@ public static void deleteListItems(Collection deleteList) { * * @param string String to check * @param argument Value that is being checked - * @param method Calling method * @throws IllegalArgumentException If the string is empty or contains illegal characters */ public static void checkForNotEmptyAndValidString(String string, String argument) throws IllegalArgumentException { - ensureNotNull(string, "string", "checkForNotEmptyAndValidString"); + ensureNotNull(string, "string"); if (string.isBlank()) { StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); String msg = diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 01f249e9..925a7c4e 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -32,9 +32,7 @@ public class HashStoreRunnable implements Runnable { */ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { - FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", - "HashStoreRunnableConstructor ~ HashStore object is" - + " null."); + FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); FileHashStoreUtility.checkPositive( publicAPIMethod, "HashStoreRunnableConstructor ~ Must" + " supply an integer."); this.hashstore = hashstore; @@ -51,9 +49,7 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream o * @param pid Persistent or authority-based identifier */ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { - FileHashStoreUtility.ensureNotNull(hashstore, "hashstore", - "HashStoreRunnableConstructor ~ HashStore object is" - + " null."); + FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); FileHashStoreUtility.checkPositive( publicAPIMethod, "HashStoreRunnableConstructor ~ Must" + " supply an integer."); this.hashstore = hashstore; From 4561f79bd48779ca099a37dcfa50f1db4b76eb8a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 09:58:28 -0700 Subject: [PATCH 491/553] Refactor 'checkPositive' to get method name via thread and update signature to remove 'method' argument --- .../dataone/hashstore/filehashstore/FileHashStore.java | 6 +++--- .../hashstore/filehashstore/FileHashStoreUtility.java | 10 +++++----- .../java/org/dataone/hashstore/HashStoreRunnable.java | 6 ++---- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 97c5e11d..d0e22a0d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -442,7 +442,7 @@ public ObjectMetadata storeObject( validateAlgorithm(checksumAlgorithm); } if (objSize != -1) { - FileHashStoreUtility.checkPositive(objSize, "storeObject"); + FileHashStoreUtility.checkPositive(objSize); } try (object) { @@ -841,7 +841,7 @@ public void deleteIfInvalidObject( } FileHashStoreUtility.ensureNotNull(checksum, "checksum"); FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm"); - FileHashStoreUtility.checkPositive(objSize, "deleteIfInvalidObject"); + FileHashStoreUtility.checkPositive(objSize); String objCid = objectInfo.cid(); long objInfoRetrievedSize = objectInfo.size(); @@ -1178,7 +1178,7 @@ protected ObjectMetadata putObject( validateAlgorithm(additionalAlgorithm); } if (objSize != -1) { - FileHashStoreUtility.checkPositive(objSize, "putObject"); + FileHashStoreUtility.checkPositive(objSize); } // Generate tmp file and write to it diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index a2ba23f6..2580347b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -256,12 +256,12 @@ public static boolean isValidString(String string) { * @param method Calling method * @throws IllegalArgumentException If longInt is less than or equal */ - public static void checkPositive(long longInt, String method) - throws IllegalArgumentException { + public static void checkPositive(long longInt) throws IllegalArgumentException { if (longInt <= 0) { - String errMsg = - "Calling Method: " + method + "(): objSize cannot be less than or equal to 0."; - throw new IllegalArgumentException(errMsg); + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = "Calling Method: " + stackTraceElements[2].getMethodName() + + "(): given objSize/long/runnableMethod/etc. object cannot be <= 0 "; + throw new IllegalArgumentException(msg); } } diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 925a7c4e..1a2b6c5c 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -33,8 +33,7 @@ public class HashStoreRunnable implements Runnable { public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); - FileHashStoreUtility.checkPositive( - publicAPIMethod, "HashStoreRunnableConstructor ~ Must" + " supply an integer."); + FileHashStoreUtility.checkPositive(publicAPIMethod); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; this.objStream = objStream; @@ -50,8 +49,7 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream o */ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); - FileHashStoreUtility.checkPositive( - publicAPIMethod, "HashStoreRunnableConstructor ~ Must" + " supply an integer."); + FileHashStoreUtility.checkPositive(publicAPIMethod); this.hashstore = hashstore; this.publicAPIMethod = publicAPIMethod; this.pid = pid; From 962ed5b41b4ed9675200070df94cc2b8b6267cba Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 13:27:09 -0700 Subject: [PATCH 492/553] Fix typo in 'HashStore' interface --- src/main/java/org/dataone/hashstore/HashStore.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 2b515f3a..069afcd4 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -20,7 +20,7 @@ public interface HashStore { /** * The {@code storeObject} method is responsible for the atomic storage of objects to - * disk using a given InputStream. Upon successful storage, the method returns a + * disk using a given InputStream. Upon successful storage, the method returns an * (@Code ObjectMetadata) object containing relevant file information, such as the file's * id (which can be used by a system administrator -- but not by an API client -- to locate * the object on disk), the file's size, and a hex digest dict of algorithms and From a9fce582544e654810f50b8fdd944399c92156d4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 13:28:16 -0700 Subject: [PATCH 493/553] Revise comments in 'tagObject' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d0e22a0d..e4947faf 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -556,23 +556,24 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); try { + // This method synchronizes the pid and cid storeHashStoreRefsFiles(pid, cid); } catch (HashStoreRefsAlreadyExistException hsrfae) { - // *** cid and pid already released *** + // cid and pid has been released // This exception is thrown when the pid and cid are already tagged appropriately String errMsg = "HashStore refs files already exist for pid " + pid + " and cid: " + cid; throw new HashStoreRefsAlreadyExistException(errMsg); } catch (PidRefsFileExistsException prfe) { - // *** cid and pid already released *** + // cid and pid has been released String errMsg = "pid: " + pid + " already references another cid." + " A pid can only reference one cid."; throw new PidRefsFileExistsException(errMsg); } catch (Exception e) { - // *** cid and pid already released *** + // cid and pid has been released // Revert the process for all other exceptions unTagObject(pid, cid); throw e; From 6a0c32be888d5d980fdabe36a0147b003252586a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 14:03:32 -0700 Subject: [PATCH 494/553] Move synchronized call to within try statement in 'unTagObject' to improve flow --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e4947faf..db817a20 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1743,10 +1743,9 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, try { ObjectInfo objInfo = findObject(pid); cid = objInfo.cid(); - // If no exceptions are thrown, we proceed to synchronization based on the `cid` - synchronizeObjectLockedCids(cid); - try { + // If no exceptions are thrown, we proceed to synchronization based on the `cid` + synchronizeObjectLockedCids(cid); // Get paths to reference files to work on Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); From d61d9c3fe7d49ffda0e75d6e0ad26d7ad657c872 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 15 Aug 2024 21:28:21 -0700 Subject: [PATCH 495/553] Apply IntelliJ automatic formatting to entire codebase for linting consistenty --- .../java/org/dataone/hashstore/HashStore.java | 466 +++++++++--------- .../dataone/hashstore/HashStoreClient.java | 209 ++++---- .../dataone/hashstore/HashStoreFactory.java | 19 +- .../filehashstore/FileHashStore.java | 356 +++++++------ .../filehashstore/FileHashStoreUtility.java | 17 +- .../hashstore/HashStoreClientTest.java | 77 ++- .../dataone/hashstore/HashStoreRunnable.java | 16 +- .../org/dataone/hashstore/HashStoreTest.java | 38 +- .../dataone/hashstore/ObjectMetadataTest.java | 9 +- .../filehashstore/FileHashStoreInitTest.java | 57 +-- .../FileHashStoreInterfaceTest.java | 364 +++++++------- .../FileHashStoreProtectedTest.java | 295 +++++------ 12 files changed, 922 insertions(+), 1001 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 069afcd4..b374d94f 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -18,247 +18,251 @@ * expected usage of the system. */ public interface HashStore { - /** - * The {@code storeObject} method is responsible for the atomic storage of objects to - * disk using a given InputStream. Upon successful storage, the method returns an - * (@Code ObjectMetadata) object containing relevant file information, such as the file's - * id (which can be used by a system administrator -- but not by an API client -- to locate - * the object on disk), the file's size, and a hex digest dict of algorithms and - * checksums. Storing an object with {@code store_object} also tags an object (creating - * references) which allow the object to be discoverable. - * - * {@code storeObject} also ensures that an object is stored only once by synchronizing - * multiple calls and rejecting calls to store duplicate objects. Note, calling {@code - * storeObject} without a pid is a possibility, but should only store the object without - * tagging the object. It is then the caller's responsibility to finalize the process by - * calling {@code tagObject} after verifying the correct object is stored. - * - * The file's id is determined by calculating the object's content identifier based on the - * store's default algorithm, which is also used as the permanent address of the file. The - * file's identifier is then sharded using the store's configured depth and width, delimited - * by '/' and concatenated to produce the final permanent address and is stored in the - * {@code ./[storePath]/objects/} directory. - * - * By default, the hex digest map includes the following hash algorithms: MD5, SHA-1, - * SHA-256, SHA-384, SHA-512 - which are the most commonly used algorithms in dataset - * submissions to DataONE and the Arctic Data Center. If an additional algorithm is - * provided, the {@code storeObject} method checks if it is supported and adds it to the hex - * digests dict along with its corresponding hex digest. An algorithm is considered - * "supported" if it is recognized as a valid hash algorithm in {@code java.security - * .MessageDigest} class. - * - * Similarly, if a file size and/or checksum & checksumAlgorithm value are provided, - * {@code storeObject} validates the object to ensure it matches the given arguments - * before moving the file to its permanent address. - * - * @param object Input stream to file - * @param pid Authority-based identifier - * @param additionalAlgorithm Additional hex digest to include in hexDigests - * @param checksum Value of checksum to validate against - * @param checksumAlgorithm Algorithm of checksum submitted - * @param objSize Expected size of object to validate after storing - * @return ObjectMetadata object encapsulating file information - * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is - * invalid - * @throws IOException I/O Error when writing file, generating checksums - * and/or moving file - * @throws PidRefsFileExistsException If a pid refs file already exists, meaning the pid is - * already referencing a file. - * @throws RuntimeException Thrown when there is an issue with permissions, - * illegal arguments (ex. empty pid) or null pointers - * @throws InterruptedException When tagging pid and cid process is interrupted - */ - ObjectMetadata storeObject( - InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; + /** + * The {@code storeObject} method is responsible for the atomic storage of objects to disk using + * a given InputStream. Upon successful storage, the method returns an (@Code ObjectMetadata) + * object containing relevant file information, such as the file's id (which can be used by a + * system administrator -- but not by an API client -- to locate the object on disk), the file's + * size, and a hex digest dict of algorithms and checksums. Storing an object with + * {@code store_object} also tags an object (creating references) which allow the object to be + * discoverable. + * + * {@code storeObject} also ensures that an object is stored only once by synchronizing multiple + * calls and rejecting calls to store duplicate objects. Note, calling {@code storeObject} + * without a pid is a possibility, but should only store the object without tagging the object. + * It is then the caller's responsibility to finalize the process by calling {@code tagObject} + * after verifying the correct object is stored. + * + * The file's id is determined by calculating the object's content identifier based on the + * store's default algorithm, which is also used as the permanent address of the file. The + * file's identifier is then sharded using the store's configured depth and width, delimited by + * '/' and concatenated to produce the final permanent address and is stored in the + * {@code ./[storePath]/objects/} directory. + * + * By default, the hex digest map includes the following hash algorithms: MD5, SHA-1, SHA-256, + * SHA-384, SHA-512 - which are the most commonly used algorithms in dataset submissions to + * DataONE and the Arctic Data Center. If an additional algorithm is provided, the + * {@code storeObject} method checks if it is supported and adds it to the hex digests dict + * along with its corresponding hex digest. An algorithm is considered "supported" if it is + * recognized as a valid hash algorithm in {@code java.security .MessageDigest} class. + * + * Similarly, if a file size and/or checksum & checksumAlgorithm value are provided, + * {@code storeObject} validates the object to ensure it matches the given arguments before + * moving the file to its permanent address. + * + * @param object Input stream to file + * @param pid Authority-based identifier + * @param additionalAlgorithm Additional hex digest to include in hexDigests + * @param checksum Value of checksum to validate against + * @param checksumAlgorithm Algorithm of checksum submitted + * @param objSize Expected size of object to validate after storing + * @return ObjectMetadata object encapsulating file information + * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is invalid + * @throws IOException I/O Error when writing file, generating checksums and/or + * moving file + * @throws PidRefsFileExistsException If a pid refs file already exists, meaning the pid is + * already referencing a file. + * @throws RuntimeException Thrown when there is an issue with permissions, illegal + * arguments (ex. empty pid) or null pointers + * @throws InterruptedException When tagging pid and cid process is interrupted + */ + ObjectMetadata storeObject( + InputStream object, String pid, String additionalAlgorithm, String checksum, + String checksumAlgorithm, long objSize) + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, + InterruptedException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - * - * Store an object only without reference files. - */ - ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; + /** + * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object only without reference files. + */ + ObjectMetadata storeObject(InputStream object) + throws NoSuchAlgorithmException, IOException, RuntimeException, + InterruptedException; - /** - * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, - * deleting or calculating a hex digest of an object is based on a pid argument; and to - * proceed, we must be able to find the object associated with the pid. - * - * @param pid Authority-based identifier - * @param cid Content-identifier (hash identifier) - * @throws IOException Failure to create tmp file - * @throws PidRefsFileExistsException When pid refs file already exists - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address - * does not exist - * @throws FileNotFoundException If refs file is missing during verification - * @throws InterruptedException When tagObject is waiting to execute but is - * interrupted - */ - void tagObject(String pid, String cid) throws IOException, - PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, - InterruptedException; + /** + * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, + * deleting or calculating a hex digest of an object is based on a pid argument; and to proceed, + * we must be able to find the object associated with the pid. + * + * @param pid Authority-based identifier + * @param cid Content-identifier (hash identifier) + * @throws IOException Failure to create tmp file + * @throws PidRefsFileExistsException When pid refs file already exists + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address does not + * exist + * @throws FileNotFoundException If refs file is missing during verification + * @throws InterruptedException When tagObject is waiting to execute but is interrupted + */ + void tagObject(String pid, String cid) + throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, + FileNotFoundException, InterruptedException; - /** - * Confirms that an ObjectMetadata's content is equal to the given values. This method - * throws an exception if there are any issues, and attempts to remove the data object - * if it is determined to be invalid. - * - * @param objectInfo ObjectMetadata object with values - * @param checksum Value of checksum to validate against - * @param checksumAlgorithm Algorithm of checksum submitted - * @param objSize Expected size of object to validate after storing - * @throws NonMatchingObjSizeException Given size =/= objMeta size value - * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value - * @throws UnsupportedHashAlgorithmException Given algo is not found or supported - * @throws NoSuchAlgorithmException When 'deleteInvalidObject' is true and an algo - * used to get a cid refs file is not supported - * @throws InterruptedException When 'deleteInvalidObject' is true and an issue - * with coordinating deleting objects occurs - * @throws IOException Issue with recalculating supported algo for - * checksum not found - */ - void deleteIfInvalidObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) - throws NonMatchingObjSizeException, NonMatchingChecksumException, - UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, - IOException; + /** + * Confirms that an ObjectMetadata's content is equal to the given values. This method throws an + * exception if there are any issues, and attempts to remove the data object if it is determined + * to be invalid. + * + * @param objectInfo ObjectMetadata object with values + * @param checksum Value of checksum to validate against + * @param checksumAlgorithm Algorithm of checksum submitted + * @param objSize Expected size of object to validate after storing + * @throws NonMatchingObjSizeException Given size =/= objMeta size value + * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value + * @throws UnsupportedHashAlgorithmException Given algo is not found or supported + * @throws NoSuchAlgorithmException When 'deleteInvalidObject' is true and an algo used + * to get a cid refs file is not supported + * @throws InterruptedException When 'deleteInvalidObject' is true and an issue + * with coordinating deleting objects occurs + * @throws IOException Issue with recalculating supported algo for + * checksum not found + */ + void deleteIfInvalidObject( + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) + throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, + IOException; - /** - * Adds/updates metadata (ex. {@code sysmeta}) to the HashStore by using a given - * InputStream, a persistent identifier ({@code pid}) and metadata format ({@code - * formatId}). All metadata documents for a given pid will be stored in the directory - * (under ../metadata) that is determined by calculating the hash of the given pid, with - * the document name being the hash of the metadata format ({@code formatId}). - * - * Note, multiple calls to store the same metadata content will all be accepted, but is not - * guaranteed to execute sequentially. - * - * @param metadata Input stream to metadata document - * @param pid Authority-based identifier - * @param formatId Metadata namespace/format - * @return Path to metadata content identifier (string representing metadata address) - * @throws IOException When there is an error writing the metadata document - * @throws IllegalArgumentException Invalid values like null for metadata, or empty pids and - * formatIds - * @throws FileNotFoundException When temp metadata file is not found - * @throws InterruptedException metadataLockedIds synchronization issue - * @throws NoSuchAlgorithmException Algorithm used to calculate permanent address is not - * supported - */ - String storeMetadata(InputStream metadata, String pid, String formatId) - throws IOException, IllegalArgumentException, FileNotFoundException, - InterruptedException, NoSuchAlgorithmException; + /** + * Adds/updates metadata (ex. {@code sysmeta}) to the HashStore by using a given InputStream, a + * persistent identifier ({@code pid}) and metadata format ({@code formatId}). All metadata + * documents for a given pid will be stored in the directory (under ../metadata) that is + * determined by calculating the hash of the given pid, with the document name being the hash of + * the metadata format ({@code formatId}). + * + * Note, multiple calls to store the same metadata content will all be accepted, but is not + * guaranteed to execute sequentially. + * + * @param metadata Input stream to metadata document + * @param pid Authority-based identifier + * @param formatId Metadata namespace/format + * @return Path to metadata content identifier (string representing metadata address) + * @throws IOException When there is an error writing the metadata document + * @throws IllegalArgumentException Invalid values like null for metadata, or empty pids and + * formatIds + * @throws FileNotFoundException When temp metadata file is not found + * @throws InterruptedException metadataLockedIds synchronization issue + * @throws NoSuchAlgorithmException Algorithm used to calculate permanent address is not + * supported + */ + String storeMetadata(InputStream metadata, String pid, String formatId) + throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, + NoSuchAlgorithmException; - /** - * @see #storeMetadata(InputStream, String, String) - * - * If the '(InputStream metadata, String pid)' signature is used, the metadata format - * stored will default to {@code sysmeta}. - */ - String storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, FileNotFoundException, InterruptedException, - NoSuchAlgorithmException; + /** + * @see #storeMetadata(InputStream, String, String) + * + * If the '(InputStream metadata, String pid)' signature is used, the metadata format stored + * will default to {@code sysmeta}. + */ + String storeMetadata(InputStream metadata, String pid) + throws IOException, IllegalArgumentException, InterruptedException, + NoSuchAlgorithmException; - /** - * Returns an InputStream to an object from HashStore using a given persistent identifier. - * - * @param pid Authority-based identifier - * @return Object InputStream - * @throws IllegalArgumentException When pid is null or empty - * @throws FileNotFoundException When requested pid has no associated object - * @throws IOException I/O error when creating InputStream to object - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - */ - InputStream retrieveObject(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * Returns an InputStream to an object from HashStore using a given persistent identifier. + * + * @param pid Authority-based identifier + * @return Object InputStream + * @throws IllegalArgumentException When pid is null or empty + * @throws FileNotFoundException When requested pid has no associated object + * @throws IOException I/O error when creating InputStream to object + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + */ + InputStream retrieveObject(String pid) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; - /** - * Returns an InputStream to the metadata content of a given pid and metadata namespace from - * HashStore. - * - * @param pid Authority-based identifier - * @param formatId Metadata namespace/format - * @return Metadata InputStream - * @throws IllegalArgumentException When pid/formatId is null or empty - * @throws FileNotFoundException When requested pid+formatId has no associated object - * @throws IOException I/O error when creating InputStream to metadata - * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not - * supported - */ - InputStream retrieveMetadata(String pid, String formatId) - throws IllegalArgumentException, FileNotFoundException, IOException, - NoSuchAlgorithmException; + /** + * Returns an InputStream to the metadata content of a given pid and metadata namespace from + * HashStore. + * + * @param pid Authority-based identifier + * @param formatId Metadata namespace/format + * @return Metadata InputStream + * @throws IllegalArgumentException When pid/formatId is null or empty + * @throws FileNotFoundException When requested pid+formatId has no associated object + * @throws IOException I/O error when creating InputStream to metadata + * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not + * supported + */ + InputStream retrieveMetadata(String pid, String formatId) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; - /** - * @see #retrieveMetadata(String, String) - * - * If {@code retrieveMetadata} is called with signature (String pid), the metadata - * document retrieved will be the given pid's 'sysmeta' - */ - InputStream retrieveMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * @see #retrieveMetadata(String, String) + * + * If {@code retrieveMetadata} is called with signature (String pid), the metadata document + * retrieved will be the given pid's 'sysmeta' + */ + InputStream retrieveMetadata(String pid) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException; - /** - * Deletes an object and all relevant associated files (ex. system metadata, reference - * files, etc.) based on a given pid. If other pids still reference the pid's associated - * object, the object will not be deleted. - * - * @param pid Authority-based identifier - * @throws IllegalArgumentException When pid is null or empty - * @throws IOException I/O error when deleting empty directories, - * modifying/deleting reference files - * @throws NoSuchAlgorithmException When algorithm used to calculate an object or metadata's - * address is not supported - * @throws InterruptedException When deletion synchronization is interrupted - */ - void deleteObject(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException; + /** + * Deletes an object and all relevant associated files (ex. system metadata, reference files, + * etc.) based on a given pid. If other pids still reference the pid's associated object, the + * object will not be deleted. + * + * @param pid Authority-based identifier + * @throws IllegalArgumentException When pid is null or empty + * @throws IOException I/O error when deleting empty directories, + * modifying/deleting reference files + * @throws NoSuchAlgorithmException When algorithm used to calculate an object or metadata's + * address is not supported + * @throws InterruptedException When deletion synchronization is interrupted + */ + void deleteObject(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException; - /** - * Deletes a metadata document (ex. {@code sysmeta}) permanently from HashStore using a - * given persistent identifier and its respective metadata namespace. - * - * @param pid Authority-based identifier - * @param formatId Metadata namespace/format - * @throws IllegalArgumentException When pid or formatId is null or empty - * @throws IOException I/O error when deleting metadata or empty directories - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - * @throws InterruptedException Issue with synchronization on metadata doc - */ - void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException, InterruptedException; + /** + * Deletes a metadata document (ex. {@code sysmeta}) permanently from HashStore using a given + * persistent identifier and its respective metadata namespace. + * + * @param pid Authority-based identifier + * @param formatId Metadata namespace/format + * @throws IllegalArgumentException When pid or formatId is null or empty + * @throws IOException I/O error when deleting metadata or empty directories + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + * @throws InterruptedException Issue with synchronization on metadata doc + */ + void deleteMetadata(String pid, String formatId) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException; - /** - * Deletes all metadata related for the given 'pid' from HashStore - * - * @param pid Authority-based identifier - * @throws IllegalArgumentException If pid is invalid - * @throws IOException I/O error when deleting metadata or empty directories - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - * @throws InterruptedException Issue with synchronization on metadata doc - */ - void deleteMetadata(String pid) throws IllegalArgumentException, IOException, - NoSuchAlgorithmException, InterruptedException; + /** + * Deletes all metadata related for the given 'pid' from HashStore + * + * @param pid Authority-based identifier + * @throws IllegalArgumentException If pid is invalid + * @throws IOException I/O error when deleting metadata or empty directories + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + * @throws InterruptedException Issue with synchronization on metadata doc + */ + void deleteMetadata(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException; - /** - * Calculates the hex digest of an object that exists in HashStore using a given persistent - * identifier and hash algorithm. - * - * @param pid Authority-based identifier - * @param algorithm Algorithm of desired hex digest - * @return String hex digest of requested pid - * @throws IllegalArgumentException When pid or formatId is null or empty - * @throws FileNotFoundException When requested pid object does not exist - * @throws IOException I/O error when calculating hex digests - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - */ - String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * Calculates the hex digest of an object that exists in HashStore using a given persistent + * identifier and hash algorithm. + * + * @param pid Authority-based identifier + * @param algorithm Algorithm of desired hex digest + * @return String hex digest of requested pid + * @throws IllegalArgumentException When pid or formatId is null or empty + * @throws FileNotFoundException When requested pid object does not exist + * @throws IOException I/O error when calculating hex digests + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + */ + String getHexDigest(String pid, String algorithm) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; } diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index dd5be696..084ef873 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -34,8 +34,8 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; /** - * HashStoreClient is a development tool used to create a new HashStore or interact directly with - * an existing HashStore through the command line. See 'README.md' for usage examples. + * HashStoreClient is a development tool used to create a new HashStore or interact directly with an + * existing HashStore through the command line. See 'README.md' for usage examples. */ public class HashStoreClient { private static HashStore hashStore; @@ -43,7 +43,7 @@ public class HashStoreClient { /** * Entry point to the HashStore Client interface. - * + * * @param args Command line arguments * @throws Exception General exception class to catch all exceptions. See the HashStore * interface for details. @@ -83,15 +83,14 @@ public static void main(String[] args) throws Exception { String storeNameSpace = cmd.getOptionValue("nsp"); createNewHashStore( - storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace - ); + storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace); } else { storePath = Paths.get(cmd.getOptionValue("store")); Path hashstoreYaml = storePath.resolve("hashstore.yaml"); if (!Files.exists(hashstoreYaml)) { - String errMsg = "HashStoreClient - Missing hashstore.yaml at storePath (" - + storePath - + "), please create a store with '-chs'. Use '-h' to see options."; + String errMsg = + "HashStoreClient - Missing hashstore.yaml at storePath (" + storePath + + "), please create a store with '-chs'. Use '-h' to see options."; throw new FileNotFoundException(errMsg); } initializeHashStore(storePath); @@ -99,13 +98,14 @@ public static void main(String[] args) throws Exception { // Parse remaining options if (cmd.hasOption("knbvm")) { System.out.println( - "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore.yaml." - ); + "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore" + + ".yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); if (!Files.exists(pgdbYaml)) { - String errMsg = "HashStoreClient - Missing pgdb.yaml at storePath (" - + storePath + "), please manually create it with the following keys: " - + "db_user, db_password, db_host, db_port, db_name"; + String errMsg = + "HashStoreClient - Missing pgdb.yaml at storePath (" + storePath + + "), please manually create it with the following keys: " + + "db_user, db_password, db_host, db_port, db_name"; throw new FileNotFoundException(errMsg); } @@ -126,8 +126,7 @@ public static void main(String[] args) throws Exception { String sizeOfFilesToSkip = cmd.getOptionValue("gbskip"); FileHashStoreUtility.ensureNotNull(objType, "-stype"); FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir"); - FileHashStoreUtility.ensureNotNull( - action, "-sts, -rav, -dfs"); + FileHashStoreUtility.ensureNotNull(action, "-sts, -rav, -dfs"); testWithKnbvm(action, objType, originDirectory, numObjects, sizeOfFilesToSkip); @@ -167,9 +166,9 @@ public static void main(String[] args) throws Exception { } InputStream pidObjStream = Files.newInputStream(path); - ObjectMetadata objInfo = hashStore.storeObject( - pidObjStream, pid, additional_algo, checksum, checksum_algo, size - ); + ObjectMetadata objInfo = + hashStore.storeObject(pidObjStream, pid, additional_algo, checksum, + checksum_algo, size); pidObjStream.close(); System.out.println("Object Info for pid (" + pid + "):"); System.out.println(objInfo.hexDigests()); @@ -210,9 +209,8 @@ public static void main(String[] args) throws Exception { InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); byte[] buffer = new byte[1000]; int bytesRead = metadataStream.read(buffer, 0, buffer.length); - String metadataPreview = new String( - buffer, 0, bytesRead, StandardCharsets.UTF_8 - ); + String metadataPreview = + new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); metadataStream.close(); System.out.println(metadataPreview); String retrieveMetadataMsg = "...\n<-- Truncated for Display Purposes -->"; @@ -232,10 +230,8 @@ public static void main(String[] args) throws Exception { FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); hashStore.deleteMetadata(pid, formatId); - System.out.println( - "Metadata for pid (" + pid + ") and namespace (" + formatId - + ") has been deleted." - ); + System.out.println("Metadata for pid (" + pid + ") and namespace (" + formatId + + ") has been deleted."); } else { System.out.println("HashStoreClient - No options found, use -h for help."); } @@ -264,88 +260,64 @@ private static Options addHashStoreClientOptions() { options.addOption("dp", "storedepth", true, "Depth of HashStore to create."); options.addOption("wp", "storewidth", true, "Width of HashStore to create."); options.addOption( - "ap", "storealgo", true, "Algorithm used for calculating file addresses in a HashStore." - ); + "ap", "storealgo", true, + "Algorithm used for calculating file addresses in a HashStore."); options.addOption( - "nsp", "storenamespace", true, "Default metadata namespace in a HashStore." - ); + "nsp", "storenamespace", true, "Default metadata namespace in a HashStore."); // Public API options + options.addOption("getchecksum", "client_getchecksum", false, + "Flag to get the hex digest of a data object in a HashStore."); options.addOption( - "getchecksum", "client_getchecksum", false, - "Flag to get the hex digest of a data object in a HashStore." - ); - options.addOption( - "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore." - ); - options.addOption( - "storemetadata", "client_storemetadata", false, "Flag to store metadata to a HashStore" - ); - options.addOption( - "retrieveobject", "client_retrieveobject", false, - "Flag to retrieve objs from a HashStore." - ); - options.addOption( - "retrievemetadata", "client_retrievemetadata", false, - "Flag to retrieve metadata objs from a HashStore." - ); + "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore."); options.addOption( - "deleteobject", "client_deleteobject", false, "Flag to delete objs from a HashStore." - ); + "storemetadata", "client_storemetadata", false, + "Flag to store metadata to a HashStore"); + options.addOption("retrieveobject", "client_retrieveobject", false, + "Flag to retrieve objs from a HashStore."); + options.addOption("retrievemetadata", "client_retrievemetadata", false, + "Flag to retrieve metadata objs from a HashStore."); options.addOption( - "deletemetadata", "client_deletemetadata", false, - "Flag to delete metadata objs from a HashStore." - ); + "deleteobject", "client_deleteobject", false, "Flag to delete objs from a HashStore."); + options.addOption("deletemetadata", "client_deletemetadata", false, + "Flag to delete metadata objs from a HashStore."); options.addOption("pid", "pidguid", true, "PID or GUID of object/metadata."); options.addOption("path", "filepath", true, "Path to object/metadata."); - options.addOption( - "algo", "objectalgo", true, - "Algorithm to use when calling '-getchecksum' or '-storeobject' flag." - ); + options.addOption("algo", "objectalgo", true, + "Algorithm to use when calling '-getchecksum' or '-storeobject' flag."); options.addOption("checksum", "obj_checksum", true, "Checksum of object to store."); options.addOption( - "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied." - ); + "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied."); options.addOption("size", "obj_size", true, "Size of object to store/validate."); - options.addOption( - "format_id", "metadata_format", true, - "Format_id/namespace of metadata to store, retrieve or delete." - ); - // knbvm (test.arcticdata.io) options. Note: In order to test with knbvm, you must manually create + options.addOption("format_id", "metadata_format", true, + "Format_id/namespace of metadata to store, retrieve or delete."); + // knbvm (test.arcticdata.io) options. Note: In order to test with knbvm, you must + // manually create // a `pgdb.yaml` file with the respective JDBC values to access a Metacat db. options.addOption( - "knbvm", "knbvmtestadc", false, "(knbvm) Flag to specify testing with knbvm." - ); - options.addOption( - "nobj", "numberofobj", true, - "(knbvm) Option to specify number of objects to retrieve from a Metacat db." - ); - options.addOption( - "gbskip", "gbsizetoskip", true, "(knbvm) Option to specify the size of objects to skip." - ); - options.addOption( - "sdir", "storedirectory", true, - "(knbvm) Option to specify the directory of objects to convert." - ); + "knbvm", "knbvmtestadc", false, "(knbvm) Flag to specify testing with knbvm."); + options.addOption("nobj", "numberofobj", true, + "(knbvm) Option to specify number of objects to retrieve from a Metacat" + + " db."); options.addOption( - "stype", "storetype", true, "(knbvm) Option to specify 'objects' or 'metadata'" - ); + "gbskip", "gbsizetoskip", true, + "(knbvm) Option to specify the size of objects to skip."); + options.addOption("sdir", "storedirectory", true, + "(knbvm) Option to specify the directory of objects to convert."); options.addOption( - "sts", "storetohs", false, "(knbvm) Test flag to store objs to a HashStore" - ); + "stype", "storetype", true, "(knbvm) Option to specify 'objects' or 'metadata'"); options.addOption( - "rav", "retandval", false, - "(knbvm) Test flag to retrieve and validate objs from a HashStore." - ); + "sts", "storetohs", false, "(knbvm) Test flag to store objs to a HashStore"); + options.addOption("rav", "retandval", false, + "(knbvm) Test flag to retrieve and validate objs from a HashStore."); options.addOption( - "dfs", "delfromhs", false, "(knbvm) Test flag to delete objs from a HashStore" - ); + "dfs", "delfromhs", false, "(knbvm) Test flag to delete objs from a HashStore"); options.addOption("hsr", "hsservicerequest", false, "Dev option to test threading."); return options; } /** * Create a new HashStore with the given properties. - * + * * @param storePath Path to HashStore. * @param storeDepth Depth of store. * @param storeWidth Width of store. @@ -355,8 +327,7 @@ private static Options addHashStoreClientOptions() { */ private static void createNewHashStore( String storePath, String storeDepth, String storeWidth, String storeAlgorithm, - String storeNameSpace - ) throws HashStoreFactoryException, IOException { + String storeNameSpace) throws IOException { FileHashStoreUtility.ensureNotNull(storePath, "storePath"); FileHashStoreUtility.ensureNotNull(storeDepth, "storeDepth"); FileHashStoreUtility.ensureNotNull(storeWidth, "storeWidth"); @@ -394,8 +365,7 @@ private static HashMap loadHashStoreYaml(Path storePath) { hsProperties.put("storeWidth", hashStoreYamlProperties.get("store_width")); hsProperties.put("storeAlgorithm", hashStoreYamlProperties.get("store_algorithm")); hsProperties.put( - "storeMetadataNamespace", hashStoreYamlProperties.get("store_metadata_namespace") - ); + "storeMetadataNamespace", hashStoreYamlProperties.get("store_metadata_namespace")); } catch (IOException ioe) { ioe.printStackTrace(); @@ -407,14 +377,14 @@ private static HashMap loadHashStoreYaml(Path storePath) { /** * Initialize HashStore to use in client app. HashStore must already exist or an exception will * be thrown. - * + * * @param storePath Path to store. * @throws HashStoreFactoryException If unable to initialize HashStore. * @throws IOException If 'hashstore.yaml' cannot be loaded. * @throws FileNotFoundException When 'hashstore.yaml' is missing. */ - private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, - IOException { + private static void initializeHashStore(Path storePath) + throws HashStoreFactoryException, IOException { // Load properties and get HashStore HashMap hsProperties = loadHashStoreYaml(storePath); Properties storeProperties = new Properties(); @@ -422,11 +392,9 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE storeProperties.setProperty("storeDepth", hsProperties.get("storeDepth").toString()); storeProperties.setProperty("storeWidth", hsProperties.get("storeWidth").toString()); storeProperties.setProperty( - "storeAlgorithm", hsProperties.get("storeAlgorithm").toString() - ); + "storeAlgorithm", hsProperties.get("storeAlgorithm").toString()); storeProperties.setProperty( - "storeMetadataNamespace", hsProperties.get("storeMetadataNamespace").toString() - ); + "storeMetadataNamespace", hsProperties.get("storeMetadataNamespace").toString()); // Get HashStore String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; @@ -438,19 +406,18 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE /** * Entry point for working with test data found in knbvm (test.arcticdata.io) - * + * * @param actionFlag String representing a knbvm test-related method to call. * @param objType "data" (objects) or "documents" (metadata). * @param originDir Directory path of given objType - * @param numObjects Number of rows to retrieve from metacat db, - * if null, will retrieve all rows. + * @param numObjects Number of rows to retrieve from metacat db, if null, will retrieve + * all rows. * @param sizeOfFilesToSkip Size of files in GB to skip * @throws IOException Related to accessing config files or objects */ private static void testWithKnbvm( String actionFlag, String objType, String originDir, String numObjects, - String sizeOfFilesToSkip - ) throws IOException { + String sizeOfFilesToSkip) throws IOException { // Load metacat db yaml // Note: In order to test with knbvm, you must manually create a `pgdb.yaml` file with the // respective JDBC values to access a Metacat db. @@ -505,8 +472,8 @@ private static void testWithKnbvm( boolean skipFile = false; if (sizeOfFilesToSkip != null) { // Calculate the size of requested gb to skip in bytes - long gbFilesToSkip = Integer.parseInt(sizeOfFilesToSkip) * (1024L * 1024 - * 1024); + long gbFilesToSkip = + Integer.parseInt(sizeOfFilesToSkip) * (1024L * 1024 * 1024); if (setItemSize > gbFilesToSkip) { skipFile = true; } @@ -516,8 +483,7 @@ private static void testWithKnbvm( Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev); if (Files.exists(setItemFilePath)) { System.out.println( - "File exists (" + setItemFilePath + ")! Adding to resultObjList." - ); + "File exists (" + setItemFilePath + ")! Adding to resultObjList."); Map resultObj = new HashMap<>(); resultObj.put("pid", guid); resultObj.put("algorithm", formattedChecksumAlgo); @@ -561,7 +527,7 @@ private static void testWithKnbvm( /** * Store objects to a HashStore with a checksum and checksum algorithm - * + * * @param resultObjList List containing items with the following properties: 'pid', 'path', * 'algorithm', 'checksum' */ @@ -582,8 +548,7 @@ private static void storeObjsWithChecksumFromDb(Collection> String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); try { logExceptionToFile( - guid, errMsg, "java/store_obj_errors/PidRefsFileExistsException" - ); + guid, errMsg, "java/store_obj_errors/PidRefsFileExistsException"); } catch (Exception e) { e.printStackTrace(); } @@ -618,7 +583,7 @@ private static void storeObjsWithChecksumFromDb(Collection> /** * Retrieve objects from a HashStore and validate its contents by comparing checksums. - * + * * @param resultObjList List containing items with the following properties: 'pid', 'algorithm', * 'checksum' */ @@ -679,7 +644,7 @@ private static void retrieveAndValidateObjs(Collection> resu /** * Deletes a list of objects from a HashStore - * + * * @param resultObjList List containing items with the following property: 'pid' */ private static void deleteObjectsFromStore(Collection> resultObjList) { @@ -722,7 +687,7 @@ private static void deleteObjectsFromStore(Collection> resul /** * Store a list containing info about metadata to a HashStore - * + * * @param resultObjList List containing items that have the following properties: 'pid', 'path' * and 'namespace' */ @@ -768,7 +733,7 @@ private static void storeMetadataFromDb(Collection> resultOb /** * Retrieve metadata from a HashStore and validate its contents by comparing checksums. - * + * * @param resultObjList List containing items with the following properties: 'pid', 'namespace', * 'algorithm', 'checksum' */ @@ -787,9 +752,8 @@ private static void retrieveAndValidateMetadata(Collection> // Get hex digest System.out.println("Calculating hex digest with algorithm: " + algorithm); - String streamDigest = FileHashStoreUtility.calculateHexDigest( - metadataStream, algorithm - ); + String streamDigest = + FileHashStoreUtility.calculateHexDigest(metadataStream, algorithm); metadataStream.close(); // If checksums don't match, write a .txt file @@ -798,8 +762,7 @@ private static void retrieveAndValidateMetadata(Collection> + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; logExceptionToFile( - guid, errMsg, "java/retrieve_metadata_errors/checksum_mismatch" - ); + guid, errMsg, "java/retrieve_metadata_errors/checksum_mismatch"); } else { System.out.println("Checksums match!"); } @@ -834,7 +797,7 @@ private static void retrieveAndValidateMetadata(Collection> /** * Deletes a list of metadata from a HashStore - * + * * @param resultObjList List containing items with the following property: 'pid' */ private static void deleteMetadataFromStore(Collection> resultObjList) { @@ -881,7 +844,7 @@ private static void deleteMetadataFromStore(Collection> resu /** * Format an algorithm string value to be compatible with MessageDigest class - * + * * @param value Algorithm value to format * @return Formatted algorithm value */ @@ -902,7 +865,7 @@ private static String formatAlgo(String value) { /** * Log a plain text file with the guid/pid as the file name with a message. - * + * * @param guid Pid/guid for which an exception was encountered. * @param errMsg Message to write into text file. * @param directory Directory within HashStore to log error (txt) files. @@ -916,10 +879,8 @@ private static void logExceptionToFile(String guid, String errMsg, String direct Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 - ) - )) { + new OutputStreamWriter(Files.newOutputStream(objectErrorTxtFile), + StandardCharsets.UTF_8))) { writer.write(errMsg); } catch (Exception e) { diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index 40a3344d..d428cc80 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -18,12 +18,11 @@ public class HashStoreFactory { /** * Factory method to generate a HashStore - * + * * @param classPackage String of the package name, ex. * "org.dataone.hashstore.filehashstore.FileHashStore" * @param storeProperties Properties object with the following keys: storePath, storeDepth, * storeWidth, storeAlgorithm, storeMetadataNamespace - * * @return HashStore instance ready to store objects and metadata * @throws HashStoreFactoryException When HashStore failÏs to initialize due to permissions or * class-related issues @@ -52,22 +51,23 @@ public static HashStore getHashStore(String classPackage, Properties storeProper hashstore = (HashStore) constructor.newInstance(storeProperties); } catch (ClassNotFoundException cnfe) { - String errMsg = "HashStoreFactory - Unable to find 'FileHashStore' classPackage: " - + classPackage + " - " + cnfe.getCause(); + String errMsg = + "HashStoreFactory - Unable to find 'FileHashStore' classPackage: " + classPackage + + " - " + cnfe.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (NoSuchMethodException nsme) { - String errMsg = "HashStoreFactory - Constructor not found for 'FileHashStore': " - + classPackage + " - " + nsme.getCause(); + String errMsg = + "HashStoreFactory - Constructor not found for 'FileHashStore': " + classPackage + + " - " + nsme.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (IllegalAccessException iae) { String errMsg = "HashStoreFactory - Executing method does not have access to the definition of" - + " the specified class , field, method or constructor. " + iae - .getCause(); + + " the specified class , field, method or constructor. " + iae.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); @@ -78,7 +78,8 @@ public static HashStore getHashStore(String classPackage, Properties storeProper throw new HashStoreFactoryException(errMsg); } catch (InvocationTargetException ite) { - String errMsg = "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite.getCause(); + String errMsg = + "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index db817a20..1b4d4715 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -76,8 +76,8 @@ public class FileHashStore implements HashStore { public static final String HASHSTORE_YAML = "hashstore.yaml"; - public static final String[] SUPPORTED_HASH_ALGORITHMS = {"MD2", "MD5", "SHA-1", "SHA-256", - "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; + public static final String[] SUPPORTED_HASH_ALGORITHMS = + {"MD2", "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; /** @@ -122,11 +122,11 @@ enum HashStoreRefUpdateTypes { * Record object to encapsulate information when searching for an existing HashStore object * based on a given persistent identifier {@code pid} * - * @param cid Content identifier of the data object of a given pid + * @param cid Content identifier of the data object of a given pid * @param cidObjectPath Path to the data object - * @param cidRefsPath Path to the data object's reference file - * @param pidRefsPath Path to the pid's that references the data object - * @param sysmetaPath Path to the pid's system metadata if available + * @param cidRefsPath Path to the data object's reference file + * @param pidRefsPath Path to the pid's that references the data object + * @param sysmetaPath Path to the pid's system metadata if available */ record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, String sysmetaPath) { @@ -134,9 +134,9 @@ record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String p /** * Constructor to initialize FileHashStore, properties are required. FileHashStore is not - * responsible for ensuring that the given store path is accurate. Upon initialization, if - * an existing config file (hashstore.yaml) is present, it will confirm that it is accurate - * against the supplied properties. If not, FileHashSTore will check for 'hashstore' specific + * responsible for ensuring that the given store path is accurate. Upon initialization, if an + * existing config file (hashstore.yaml) is present, it will confirm that it is accurate against + * the supplied properties. If not, FileHashSTore will check for 'hashstore' specific * directories at the supplied store path before initializing. * * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, @@ -145,31 +145,25 @@ record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String p * @throws IOException Issue with creating directories * @throws NoSuchAlgorithmException Unsupported store algorithm */ - public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException { + public FileHashStore(Properties hashstoreProperties) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException { logFileHashStore.info("Initializing FileHashStore"); FileHashStoreUtility.ensureNotNull(hashstoreProperties, "hashstoreProperties"); // Get properties - Path storePath = Paths.get( - hashstoreProperties.getProperty(HashStoreProperties.storePath.name()) - ); + Path storePath = + Paths.get(hashstoreProperties.getProperty(HashStoreProperties.storePath.name())); int storeDepth = Integer.parseInt( - hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name()) - ); + hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name())); int storeWidth = Integer.parseInt( - hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name()) - ); - String storeAlgorithm = hashstoreProperties.getProperty( - HashStoreProperties.storeAlgorithm.name() - ); - String storeMetadataNamespace = hashstoreProperties.getProperty( - HashStoreProperties.storeMetadataNamespace.name() - ); + hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name())); + String storeAlgorithm = + hashstoreProperties.getProperty(HashStoreProperties.storeAlgorithm.name()); + String storeMetadataNamespace = + hashstoreProperties.getProperty(HashStoreProperties.storeMetadataNamespace.name()); verifyHashStoreProperties( - storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace - ); + storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace); // HashStore configuration has been reviewed, proceed with initialization STORE_ROOT = storePath; @@ -210,15 +204,14 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep // Write configuration file 'hashstore.yaml' to store HashStore properties Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); if (!Files.exists(hashstoreYaml)) { - String hashstoreYamlContent = buildHashStoreYamlString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, DEFAULT_METADATA_NAMESPACE - ); + String hashstoreYamlContent = + buildHashStoreYamlString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, + DEFAULT_METADATA_NAMESPACE); writeHashStoreYaml(hashstoreYamlContent); - logFileHashStore.info( - "hashstore.yaml written to storePath: " + hashstoreYaml); + logFileHashStore.info("hashstore.yaml written to storePath: " + hashstoreYaml); } else { - logFileHashStore.info("hashstore.yaml exists and has been verified." - + " Initializing FileHashStore."); + logFileHashStore.info( + "hashstore.yaml exists and has been verified." + " Initializing FileHashStore."); } } @@ -245,8 +238,9 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep */ protected void verifyHashStoreProperties( Path storePath, int storeDepth, int storeWidth, String storeAlgorithm, - String storeMetadataNamespace - ) throws NoSuchAlgorithmException, IOException, IllegalArgumentException, IllegalStateException { + String storeMetadataNamespace) + throws NoSuchAlgorithmException, IOException, IllegalArgumentException, + IllegalStateException { if (storeDepth <= 0 || storeWidth <= 0) { String errMsg = "Depth and width must be > than 0. Depth: " + storeDepth + ". Width: " + storeWidth; @@ -266,12 +260,10 @@ protected void verifyHashStoreProperties( HashMap hsProperties = loadHashStoreYaml(storePath); int existingStoreDepth = (int) hsProperties.get(HashStoreProperties.storeDepth.name()); int existingStoreWidth = (int) hsProperties.get(HashStoreProperties.storeWidth.name()); - String existingStoreAlgorithm = (String) hsProperties.get( - HashStoreProperties.storeAlgorithm.name() - ); - String existingStoreMetadataNs = (String) hsProperties.get( - HashStoreProperties.storeMetadataNamespace.name() - ); + String existingStoreAlgorithm = + (String) hsProperties.get(HashStoreProperties.storeAlgorithm.name()); + String existingStoreMetadataNs = + (String) hsProperties.get(HashStoreProperties.storeMetadataNamespace.name()); FileHashStoreUtility.checkObjectEquality("store depth", storeDepth, existingStoreDepth); FileHashStoreUtility.checkObjectEquality("store width", storeWidth, existingStoreWidth); @@ -286,11 +278,9 @@ protected void verifyHashStoreProperties( logFileHashStore.debug("hashstore.yaml not found, checking store path for" + " `/objects`, `/metadata` and `/refs` directories."); if (Files.isDirectory(storePath)) { - Path[] conflictingDirectories = { - storePath.resolve("objects"), - storePath.resolve("metadata"), - storePath.resolve("refs") - }; + Path[] conflictingDirectories = + {storePath.resolve("objects"), storePath.resolve("metadata"), + storePath.resolve("refs")}; for (Path dir : conflictingDirectories) { if (Files.exists(dir) && Files.isDirectory(dir)) { String errMsg = "FileHashStore - Unable to initialize HashStore." @@ -322,21 +312,15 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc try { HashMap hashStoreYamlProperties = om.readValue(hashStoreYamlFile, HashMap.class); hsProperties.put( - HashStoreProperties.storeDepth.name(), hashStoreYamlProperties.get("store_depth") - ); + HashStoreProperties.storeDepth.name(), hashStoreYamlProperties.get("store_depth")); hsProperties.put( - HashStoreProperties.storeWidth.name(), hashStoreYamlProperties.get("store_width") - ); + HashStoreProperties.storeWidth.name(), hashStoreYamlProperties.get("store_width")); hsProperties.put( - HashStoreProperties.storeAlgorithm.name(), hashStoreYamlProperties.get( - "store_algorithm" - ) - ); + HashStoreProperties.storeAlgorithm.name(), + hashStoreYamlProperties.get("store_algorithm")); hsProperties.put( - HashStoreProperties.storeMetadataNamespace.name(), hashStoreYamlProperties.get( - "store_metadata_namespace" - ) - ); + HashStoreProperties.storeMetadataNamespace.name(), + hashStoreYamlProperties.get("store_metadata_namespace")); } catch (IOException ioe) { logFileHashStore.fatal( @@ -357,8 +341,7 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8) - )) { + new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8))) { writer.write(yamlString); } catch (IOException ioe) { @@ -379,8 +362,7 @@ protected void writeHashStoreYaml(String yamlString) throws IOException { * @return String that representing the contents of 'hashstore.yaml' */ protected String buildHashStoreYamlString( - int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace - ) { + int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace) { return String.format(""" # Default configuration variables for HashStore @@ -413,8 +395,7 @@ protected String buildHashStoreYamlString( - "SHA-256" - "SHA-384" - "SHA-512" - """, storeDepth, storeWidth, storeMetadataNamespace, storeAlgorithm - ); + """, storeDepth, storeWidth, storeMetadataNamespace, storeAlgorithm); } // HashStore Public API Methods @@ -422,9 +403,8 @@ protected String buildHashStoreYamlString( @Override public ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException, - PidRefsFileExistsException { + String checksumAlgorithm, long objSize) + throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException { logFileHashStore.debug("Storing data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(object, "object"); @@ -458,8 +438,7 @@ public ObjectMetadata storeObject( private ObjectMetadata syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, String checksumAlgorithm, long objSize) - throws NoSuchAlgorithmException, IOException, RuntimeException, - InterruptedException { + throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException { try { // Lock pid for thread safety, transaction control and atomic writing // An object is stored once and only once @@ -479,9 +458,8 @@ private ObjectMetadata syncPutObject( + additionalAlgorithm + ". checksum: " + checksum + ". checksumAlgorithm: " + checksumAlgorithm); // Store object - ObjectMetadata objInfo = putObject( - object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize - ); + ObjectMetadata objInfo = + putObject(object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize); // Tag object String cid = objInfo.cid(); tagObject(pid, cid); @@ -527,8 +505,9 @@ private ObjectMetadata syncPutObject( * Overload method for storeObject with just an InputStream */ @Override - public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidRefsFileExistsException, RuntimeException, InterruptedException { + public ObjectMetadata storeObject(InputStream object) + throws NoSuchAlgorithmException, IOException, RuntimeException, + InterruptedException { // 'putObject' is called directly to bypass the pid synchronization implemented to // efficiently handle object store requests without a pid. This scenario occurs when // metadata about the object (ex. form data including the pid, checksum, checksum @@ -546,8 +525,9 @@ public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmExce @Override - public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException, InterruptedException { + public void tagObject(String pid, String cid) + throws IOException, NoSuchAlgorithmException, + InterruptedException { logFileHashStore.debug("Tagging cid (" + cid + ") with pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid"); @@ -582,7 +562,7 @@ public void tagObject(String pid, String cid) throws IOException, PidRefsFileExi @Override public String storeMetadata(InputStream metadata, String pid, String formatId) - throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, + throws IOException, IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { logFileHashStore.debug("Storing metadata for pid: " + pid + ", with formatId: " + formatId); // Validate input parameters @@ -612,8 +592,8 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF throws InterruptedException, IOException, NoSuchAlgorithmException { // Get the metadata document id, which is the synchronization value String pidFormatId = pid + checkedFormatId; - String metadataDocId = FileHashStoreUtility.getPidHexDigest(pidFormatId, - OBJECT_STORE_ALGORITHM); + String metadataDocId = + FileHashStoreUtility.getPidHexDigest(pidFormatId, OBJECT_STORE_ALGORITHM); logFileHashStore.debug( "putMetadata() called to store metadata for pid: " + pid + ", with formatId: " + checkedFormatId + " for metadata document: " + metadataDocId); @@ -646,15 +626,16 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF * Overload method for storeMetadata with default metadata namespace */ @Override - public String storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, FileNotFoundException, InterruptedException, + public String storeMetadata(InputStream metadata, String pid) + throws IOException, IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { return storeMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); } @Override - public InputStream retrieveObject(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { + public InputStream retrieveObject(String pid) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid"); @@ -687,7 +668,7 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, @Override public InputStream retrieveMetadata(String pid, String formatId) - throws IllegalArgumentException, FileNotFoundException, IOException, + throws IllegalArgumentException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); @@ -704,8 +685,9 @@ public InputStream retrieveMetadata(String pid, String formatId) * Overload method for retrieveMetadata with default metadata namespace */ @Override - public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { + public InputStream retrieveMetadata(String pid) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { logFileHashStore.debug( "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters @@ -790,8 +772,7 @@ public void deleteObject(String pid) // Since we must access the cid reference file, the `cid` must be synchronized synchronizeObjectLockedCids(cidRead); - Path absCidRefsPath = - getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); @@ -854,13 +835,13 @@ public void deleteIfInvalidObject( try { validateAlgorithm(checksumAlgorithm); // If no exceptions thrown, calculate the checksum with the given algo - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objCid - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objCid); Path pathToCidObject = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); try (InputStream inputStream = Files.newInputStream(pathToCidObject)) { - digestFromHexDigests = FileHashStoreUtility.calculateHexDigest(inputStream, - checksumAlgorithm); + digestFromHexDigests = + FileHashStoreUtility.calculateHexDigest(inputStream, checksumAlgorithm); } catch (IOException ioe) { String errMsg = "Unexpected error when calculating a checksum for cid: " + objCid @@ -903,8 +884,9 @@ public void deleteIfInvalidObject( } @Override - public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException, InterruptedException { + public void deleteMetadata(String pid, String formatId) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException { logFileHashStore.debug( "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters @@ -1013,8 +995,9 @@ protected Collection syncRenameMetadataDocForDeletion( } @Override - public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { + public String getHexDigest(String pid, String algorithm) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { logFileHashStore.debug("Calculating hex digest for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); @@ -1036,13 +1019,11 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE } InputStream dataStream = Files.newInputStream(objRealPath); - String mdObjectHexDigest = FileHashStoreUtility.calculateHexDigest( - dataStream, algorithm - ); + String mdObjectHexDigest = + FileHashStoreUtility.calculateHexDigest(dataStream, algorithm); logFileHashStore.info( "Hex digest calculated for pid: " + pid + ", with hex digest value: " - + mdObjectHexDigest - ); + + mdObjectHexDigest); return mdObjectHexDigest; } } @@ -1050,22 +1031,22 @@ public String getHexDigest(String pid, String algorithm) throws IllegalArgumentE // FileHashStore Core & Supporting Methods /** - * Checks whether an object referenced by a pid exists and returns a map containing the - * absolute path to the object, pid refs file, cid refs file and sysmeta document. + * Checks whether an object referenced by a pid exists and returns a map containing the absolute + * path to the object, pid refs file, cid refs file and sysmeta document. * * @param pid Authority-based identifier * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, - * pid_refs_path, sysmeta_path - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs - * file's absolute address is not valid + * pid_refs_path, sysmeta_path + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs file's + * absolute address is not valid * @throws IOException Unable to read from a pid refs file or pid refs * file does not exist - * @throws OrphanRefsFilesException pid and cid refs file found, but object does - * not exist - * @throws OrphanPidRefsFileException When pid refs file exists and the cid found - * inside does not exist. - * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the - * expected pid is not found in the cid refs file. + * @throws OrphanRefsFilesException pid and cid refs file found, but object does not + * exist + * @throws OrphanPidRefsFileException When pid refs file exists and the cid found inside + * does not exist. + * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the expected + * pid is not found in the cid refs file. */ protected ObjectInfo findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, @@ -1101,9 +1082,9 @@ protected ObjectInfo findObject(String pid) Path metadataPidExpectedPath = getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); if (Files.exists(metadataPidExpectedPath)) { - return new ObjectInfo( - cid, realPath.toString(), absCidRefsPath.toString(), - absPidRefsPath.toString(), metadataPidExpectedPath.toString()); + return new ObjectInfo(cid, realPath.toString(), absCidRefsPath.toString(), + absPidRefsPath.toString(), + metadataPidExpectedPath.toString()); } else { return new ObjectInfo(cid, realPath.toString(), absCidRefsPath.toString(), absPidRefsPath.toString(), "Does not exist"); @@ -1138,8 +1119,8 @@ protected ObjectInfo findObject(String pid) * hex digest value of an authority based identifier, usually provided as a persistent * identifier (pid). If an additional algorithm is provided and supported, its respective hex * digest value will be included in hexDigests map. If a checksum and checksumAlgorithm is - * provided, FileHashStore will validate the given checksum against the hex digest produced - * of the supplied checksumAlgorithm. + * provided, FileHashStore will validate the given checksum against the hex digest produced of + * the supplied checksumAlgorithm. * * @param object InputStream for file * @param pid Authority-based identifier @@ -1148,7 +1129,7 @@ protected ObjectInfo findObject(String pid) * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing * @return 'ObjectMetadata' object that contains the file id, size, and a checksum map based on - * the default algorithm list. + * the default algorithm list. * @throws IOException I/O Error when writing file, generating checksums, * moving file or deleting tmpFile upon duplicate found * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is @@ -1243,7 +1224,7 @@ protected ObjectMetadata putObject( * algorithm against a list of hex digests. If there is a mismatch, the tmpFile will be deleted * and exceptions will be thrown. * - * @param compareChecksum Decide whether to proceed with comparing checksums + * @param compareChecksum Decide whether to proceed with comparing checksums * @param checksum Expected checksum value of object * @param checksumAlgorithm Hash algorithm of checksum value * @param tmpFile Path to the file that is being evaluated @@ -1253,8 +1234,8 @@ protected ObjectMetadata putObject( */ protected void validateTmpObject( boolean compareChecksum, String checksum, String checksumAlgorithm, File tmpFile, - Map hexDigests, long expectedSize - ) throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException, + Map hexDigests, long expectedSize) + throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException, IOException { if (expectedSize > 0) { long storedObjFileSize = Files.size(Paths.get(tmpFile.toString())); @@ -1265,8 +1246,8 @@ protected void validateTmpObject( } catch (Exception ge) { String errMsg = - "objSize given is not equal to the stored object size. ObjSize: " + expectedSize - + ". storedObjFileSize: " + storedObjFileSize + "objSize given is not equal to the stored object size. ObjSize: " + + expectedSize + ". storedObjFileSize: " + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); throw new NonMatchingObjSizeException(errMsg); @@ -1329,8 +1310,8 @@ protected void validateTmpObject( * @throws IllegalArgumentException Algorithm cannot be empty * @throws NoSuchAlgorithmException Algorithm not supported */ - protected boolean validateAlgorithm(String algorithm) throws NullPointerException, - IllegalArgumentException, NoSuchAlgorithmException { + protected boolean validateAlgorithm(String algorithm) + throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); @@ -1420,8 +1401,8 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor * @throws FileNotFoundException tmpFile cannot be found */ protected Map writeToTmpFileAndGenerateChecksums( - File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { + File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) + throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { @@ -1500,13 +1481,13 @@ protected Map writeToTmpFileAndGenerateChecksums( hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); if (generateAddAlgo) { - String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) - .toLowerCase(); + String extraAlgoDigest = + DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } if (generateCsAlgo) { - String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) - .toLowerCase(); + String extraChecksumDigest = + DatatypeConverter.printHexBinary(checksumAlgo.digest()).toLowerCase(); hexDigests.put(checksumAlgorithm, extraChecksumDigest); } logFileHashStore.debug( @@ -1530,8 +1511,9 @@ protected Map writeToTmpFileAndGenerateChecksums( * @throws AtomicMoveNotSupportedException When ATOMIC_MOVE is not supported (usually * encountered when moving across file systems) */ - protected void move(File source, File target, String entity) throws IOException, - SecurityException, AtomicMoveNotSupportedException, FileAlreadyExistsException { + protected void move(File source, File target, String entity) + throws IOException, SecurityException, AtomicMoveNotSupportedException, + FileAlreadyExistsException { logFileHashStore.debug( "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters @@ -1622,17 +1604,17 @@ protected void deleteObjectByCid(String cid) } /** - * Create the pid refs file and create/update cid refs files in HashStore to establish - * the relationship between a 'pid' and a 'cid' + * Create the pid refs file and create/update cid refs files in HashStore to establish the + * relationship between a 'pid' and a 'cid' * * @param pid Persistent or authority-based identifier * @param cid Content identifier * @throws NoSuchAlgorithmException If there is an issue related to calculating hashes - * @throws IOException If there is an issue reading/writing a refs file - * @throws InterruptedException If there is an issue when synchronizing pid or cid values + * @throws IOException If there is an issue reading/writing a refs file + * @throws InterruptedException If there is an issue when synchronizing pid or cid values */ - protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgorithmException, - IOException, InterruptedException { + protected void storeHashStoreRefsFiles(String pid, String cid) + throws NoSuchAlgorithmException, IOException, InterruptedException { try { // Immediately synchronize cid and pid synchronizeObjectLockedCids(cid); @@ -1669,8 +1651,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo // Check if the retrieved cid refs file exists and pid is referenced Path retrievedAbsCidRefsPath = getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); - if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile(pid, retrievedAbsCidRefsPath - )) { + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( + pid, retrievedAbsCidRefsPath)) { // This pid is accounted for and tagged as expected. String errMsg = "Pid refs file already exists for pid: " + pid + ", and the associated cid refs file contains the " @@ -1691,9 +1673,9 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "Object with cid: " + cid + " has been updated and tagged successfully with pid: " - + pid); + logFileHashStore.info("Object with cid: " + cid + + " has been updated and tagged successfully with pid: " + + pid); return; } @@ -1725,8 +1707,8 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throws NoSuchAlgo * @throws NoSuchAlgorithmException When there is an algorithm used that is not supported * @throws IOException When there is an issue deleting refs files */ - protected void unTagObject(String pid, String cid) throws InterruptedException, - NoSuchAlgorithmException, IOException { + protected void unTagObject(String pid, String cid) + throws InterruptedException, NoSuchAlgorithmException, IOException { // Validate input parameters FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); @@ -1789,8 +1771,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, // Since we must access the cid reference file, the `cid` must be synchronized synchronizeObjectLockedCids(cidRead); - Path absCidRefsPath = - getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); @@ -1821,8 +1802,7 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found // Check to see if pid is in the `cid refs file`and attempt to remove it - Path absCidRefsPath = - getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); String errMsg = "Pid refs file not found, removed pid found in cid refs file: " @@ -1849,8 +1829,8 @@ protected void unTagObject(String pid, String cid) throws InterruptedException, * @throws IOException Unable to read any of the refs files */ protected void verifyHashStoreRefsFiles( - String pid, String cid, Path absPidRefsPath, Path absCidRefsPath - ) throws FileNotFoundException, CidNotFoundInPidRefsFileException, + String pid, String cid, Path absPidRefsPath, Path absCidRefsPath) + throws FileNotFoundException, CidNotFoundInPidRefsFileException, PidNotFoundInCidRefsFileException, IOException { // First confirm that the refs files have been created/moved to where they need to be if (!Files.exists(absCidRefsPath)) { @@ -1892,16 +1872,14 @@ protected void verifyHashStoreRefsFiles( * * @param ref Authority-based or persistent identifier to write * @param refType Type of reference 'pid' or 'cid' to include in the log - * @throws IOException Failure to write refs file * @return File object with single reference + * @throws IOException Failure to write refs file */ protected File writeRefsFile(String ref, String refType) throws IOException { File cidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(cidRefsTmpFile.toPath()), StandardCharsets.UTF_8 - ) - )) { + new OutputStreamWriter(Files.newOutputStream(cidRefsTmpFile.toPath()), + StandardCharsets.UTF_8))) { writer.write(ref); writer.close(); @@ -1953,9 +1931,9 @@ protected void updateRefsFile(String ref, Path absRefsPath, HashStoreRefUpdateTy try { // Obtain a lock on the file before updating it - try (FileChannel channel = FileChannel.open( - absRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock ignored = channel.lock()) { + try (FileChannel channel = FileChannel.open(absRefsPath, StandardOpenOption.READ, + StandardOpenOption.WRITE); + FileLock ignored = channel.lock()) { Collection lines = new ArrayList<>(Files.readAllLines(absRefsPath)); if (updateType.equals(HashStoreRefUpdateTypes.add)) { @@ -2017,9 +1995,8 @@ protected String putMetadata(InputStream metadata, String pid, String formatId) // by using the hash of the 'pid', with the file name being the hash of the 'pid+formatId' Path pathToStoredMetadata = getHashStoreMetadataPath(pid, checkedFormatId); - File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( - "tmp", METADATA_TMP_FILE_DIRECTORY - ); + File tmpMetadataFile = + FileHashStoreUtility.generateTmpFile("tmp", METADATA_TMP_FILE_DIRECTORY); boolean tmpMetadataWritten = writeToTmpMetadataFile(tmpMetadataFile, metadata); if (tmpMetadataWritten) { logFileHashStore.debug( @@ -2071,16 +2048,16 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea * @param abpId Authority-based or persistent identifier * @return Path to the HasHStore data object * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported - * @throws IOException Issue when reading a pid refs file to retrieve a 'cid' + * @throws IOException Issue when reading a pid refs file to retrieve a 'cid' */ - protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmException, - IOException { + protected Path getHashStoreDataObjectPath(String abpId) + throws NoSuchAlgorithmException, IOException { // Retrieve the 'cid' from the pid refs file String objectCid; String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); - String pidRefsFileRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, hashedId - ); + String pidRefsFileRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + hashedId); Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); if (!Files.exists(pathToPidRefsFile)) { String errMsg = @@ -2092,9 +2069,9 @@ protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmEx objectCid = new String(Files.readAllBytes(pathToPidRefsFile)); } // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objectCid); // Real path to the data object return OBJECT_STORE_DIRECTORY.resolve(objRelativePath); } @@ -2102,7 +2079,7 @@ protected Path getHashStoreDataObjectPath(String abpId) throws NoSuchAlgorithmEx /** * Get the absolute path to a HashStore metadata document * - * @param abpId Authority-based or persistent identifier + * @param abpId Authority-based or persistent identifier * @param formatId Metadata formatId or namespace * @return Path to the requested metadata document * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported @@ -2152,8 +2129,9 @@ protected InputStream getHashStoreMetadataInputStream(String pid, String formatI return metadataCidInputStream; } catch (IOException ioe) { - String errMsg = "Unexpected error when creating InputStream for pid: " + pid - + " with formatId: " + formatId + ". IOException: " + ioe.getMessage(); + String errMsg = + "Unexpected error when creating InputStream for pid: " + pid + " with formatId: " + + formatId + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -2162,7 +2140,7 @@ protected InputStream getHashStoreMetadataInputStream(String pid, String formatI /** * Get the absolute path to a HashStore pid or cid ref file * - * @param abpcId Authority-based identifier, persistent identifier or content identifier + * @param abpcId Authority-based identifier, persistent identifier or content identifier * @param refType {@link HashStoreIdTypes} * @return Path to the requested refs file * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported @@ -2186,23 +2164,21 @@ protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) abpcId); realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); } - default -> - throw new IllegalArgumentException("Ref type must be a type of HashStoreIdTypes " - + "(pid or cid)"); + default -> throw new IllegalArgumentException( + "Ref type must be a type of HashStoreIdTypes " + "(pid or cid)"); } return realPath; } /** * Storing, deleting and untagging objects are synchronized together. Duplicate store object - * requests for a pid are rejected, but deleting an object will wait for a pid to be released - * if it's found to be in use before proceeding. + * requests for a pid are rejected, but deleting an object will wait for a pid to be released if + * it's found to be in use before proceeding. * * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private static void synchronizeObjectLockedPids(String pid) - throws InterruptedException { + private static void synchronizeObjectLockedPids(String pid) throws InterruptedException { synchronized (objectLockedPids) { while (objectLockedPids.contains(pid)) { try { @@ -2295,8 +2271,7 @@ private static void synchronizeObjectLockedCids(String cid) throws InterruptedEx throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "Synchronizing objectLockedCids for cid: " + cid); + logFileHashStore.debug("Synchronizing objectLockedCids for cid: " + cid); objectLockedCids.add(cid); } } @@ -2315,9 +2290,9 @@ private static void releaseObjectLockedCids(String cid) { } /** - * Synchronize the pid tagging process since {@code tagObject} is a Public API method that - * can be called directly. This is used in the scenario when the client is missing metadata - * but must store the data object first. + * Synchronize the pid tagging process since {@code tagObject} is a Public API method that can + * be called directly. This is used in the scenario when the client is missing metadata but must + * store the data object first. * * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid @@ -2335,8 +2310,7 @@ private static void synchronizeReferenceLockedPids(String pid) throws Interrupte throw new InterruptedException(errMsg); } } - logFileHashStore.debug( - "Synchronizing referenceLockedPids for pid: " + pid); + logFileHashStore.debug("Synchronizing referenceLockedPids for pid: " + pid); referenceLockedPids.add(pid); } } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 2580347b..105ac569 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -23,8 +23,8 @@ import javax.xml.bind.DatatypeConverter; /** - * FileHashStoreUtility is a utility class that encapsulates generic or shared functionality - * in FileHashStore and/or related classes. + * FileHashStoreUtility is a utility class that encapsulates generic or shared functionality in + * FileHashStore and/or related classes. */ public class FileHashStoreUtility { @@ -140,7 +140,7 @@ public static List getFilesFromDir(Path directory) throws IOException { /** * Rename the given path to the 'file name' + '_delete' - * + * * @param pathToRename The path to the file to be renamed with '_delete' * @return Path to the file with '_delete' appended * @throws IOException Issue with renaming the given file path @@ -234,6 +234,7 @@ public static void checkForNotEmptyAndValidString(String string, String argument /** * Iterates over a given string and checks each character to make sure that there are no * whitespaces, tabs, new lines or other illegal characters. + * * @param string String to check * @return True if valid, False if illegal characters found. */ @@ -298,8 +299,8 @@ public static String getHierarchicalPathString(int depth, int width, String dige } /** - * Creates an empty/temporary file in a given location. If this file is not moved, it will - * be deleted upon JVM gracefully exiting or shutting down. + * Creates an empty/temporary file in a given location. If this file is not moved, it will be + * deleted upon JVM gracefully exiting or shutting down. * * @param prefix string to prepend before tmp file * @param directory location to create tmp file @@ -307,8 +308,8 @@ public static String getHierarchicalPathString(int depth, int width, String dige * @throws IOException Issues with generating tmpFile * @throws SecurityException Insufficient permissions to create tmpFile */ - public static File generateTmpFile(String prefix, Path directory) throws IOException, - SecurityException { + public static File generateTmpFile(String prefix, Path directory) + throws IOException, SecurityException { Random rand = new Random(); int randomNumber = rand.nextInt(1000000); String newPrefix = prefix + "-" + System.currentTimeMillis() + randomNumber; @@ -322,7 +323,7 @@ public static File generateTmpFile(String prefix, Path directory) throws IOExcep /** * Ensures that two objects are equal. If not, throws an IllegalArgumentException. * - * @param nameValue The name of the object being checked + * @param nameValue The name of the object being checked * @param suppliedValue The value supplied to compare * @param existingValue The existing value to compare with * @throws IllegalArgumentException If the supplied value is not equal to the existing value diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 15bb683c..4383e38b 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -43,8 +43,7 @@ public void getHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { hsProperties = storeProperties; @@ -90,8 +89,8 @@ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String di } /** - * Utility method to get absolute path of a given object and objType - * ("objects", "metadata", "cid", or "pid"). + * Utility method to get absolute path of a given object and objType ("objects", "metadata", + * "cid", or "pid"). */ public Path getObjectAbsPath(String id, String objType) throws Exception { String storeAlgo = hsProperties.getProperty("storeAlgorithm"); @@ -108,24 +107,23 @@ public Path getObjectAbsPath(String id, String objType) throws Exception { if (objType.equals("metadata")) { // Get pid metadata directory hash(pid) String pidHash = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); - String pidMetadataDirectory = getHierarchicalPathString(shardDepth, shardWidth, pidHash); + String pidMetadataDirectory = + getHierarchicalPathString(shardDepth, shardWidth, pidHash); // Get sysmeta name hash(pid+default_formatId) - String metadataDocHash = - FileHashStoreUtility.getPidHexDigest(id + hsProperties.getProperty( - "storeMetadataNamespace"), storeAlgo); - absPath = storePath.resolve("metadata").resolve(pidMetadataDirectory).resolve(metadataDocHash); + String metadataDocHash = FileHashStoreUtility.getPidHexDigest( + id + hsProperties.getProperty("storeMetadataNamespace"), storeAlgo); + absPath = storePath.resolve("metadata").resolve(pidMetadataDirectory) + .resolve(metadataDocHash); } if (objType.equals("cid")) { - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - shardDepth, shardWidth, id - ); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(shardDepth, shardWidth, id); absPath = storePath.resolve("refs/cids").resolve(pidRelativePath); } if (objType.equals("pid")) { String hashId = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - shardDepth, shardWidth, hashId - ); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(shardDepth, shardWidth, hashId); absPath = storePath.resolve("refs/pids").resolve(pidRelativePath); } return absPath; @@ -147,9 +145,10 @@ public void client_createHashStore() throws Exception { String optAlgoValue = "SHA-256"; String optFormatId = "-nsp"; String optFormatIdValue = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; - String[] args = {optCreateHashstore, optStore, optStorePath, optStoreDepth, - optStoreDepthValue, optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, - optFormatId, optFormatIdValue}; + String[] args = + {optCreateHashstore, optStore, optStorePath, optStoreDepth, optStoreDepthValue, + optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, optFormatId, + optFormatIdValue}; HashStoreClient.main(args); Path storePath = Paths.get(optStorePath); @@ -184,8 +183,8 @@ public void client_storeObjects() throws Exception { String optPath = "-path"; String optObjectPath = testDataFile.toString(); String optPid = "-pid"; - String[] args = {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, - pid}; + String[] args = + {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, pid}; HashStoreClient.main(args); // Confirm object was stored @@ -226,8 +225,9 @@ public void client_storeMetadata() throws Exception { String optPid = "-pid"; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, - optPid, pid, optFormatId, optFormatIdValue}; + String[] args = + {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, optPid, pid, + optFormatId, optFormatIdValue}; HashStoreClient.main(args); // Confirm metadata was stored @@ -235,20 +235,17 @@ public void client_storeMetadata() throws Exception { String storeAlgorithm = hsProperties.getProperty("storeAlgorithm"); int storeDepth = Integer.parseInt(hsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(hsProperties.getProperty("storeWidth")); - Path metadataDirectory = Paths.get(hsProperties.getProperty("storePath")).resolve( - "metadata" - ); + Path metadataDirectory = + Paths.get(hsProperties.getProperty("storePath")).resolve("metadata"); String metadataCidPartOne = FileHashStoreUtility.getPidHexDigest(pid, storeAlgorithm); - String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, metadataCidPartOne - ); + String pidMetadataDirectory = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataCidPartOne); // The file name for the metadata document is the hash of the supplied 'formatId' - String metadataCidPartTwo = FileHashStoreUtility.getPidHexDigest( - pid + optFormatIdValue, storeAlgorithm - ); - Path expectedMetadataPath = metadataDirectory.resolve(pidMetadataDirectory).resolve( - metadataCidPartTwo - ); + String metadataCidPartTwo = + FileHashStoreUtility.getPidHexDigest(pid + optFormatIdValue, storeAlgorithm); + Path expectedMetadataPath = + metadataDirectory.resolve(pidMetadataDirectory).resolve(metadataCidPartTwo); assertTrue(Files.exists(expectedMetadataPath)); // Put things back @@ -324,8 +321,8 @@ public void client_retrieveMetadata() throws Exception { String optPid = "-pid"; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, pid, - optFormatId, optFormatIdValue}; + String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, pid, optFormatId, + optFormatIdValue}; HashStoreClient.main(args); // Put things back @@ -405,8 +402,8 @@ public void client_deleteMetadata() throws Exception { String optPid = "-pid"; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, pid, - optFormatId, optFormatIdValue}; + String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, pid, optFormatId, + optFormatIdValue}; HashStoreClient.main(args); // Confirm metadata was deleted @@ -449,8 +446,8 @@ public void client_getHexDigest() throws Exception { String optPid = "-pid"; String optAlgo = "-algo"; String optAlgoValue = "SHA-256"; - String[] args = {optGetChecksum, optStore, optStorePath, optPid, pid, optAlgo, - optAlgoValue}; + String[] args = + {optGetChecksum, optStore, optStorePath, optPid, pid, optAlgo, optAlgoValue}; HashStoreClient.main(args); diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java index 1a2b6c5c..738d62af 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -9,9 +9,9 @@ import java.io.InputStream; /** - * A HashStoreRunnable represents the data needed for a single request to HashStore - * packaged as a Runnable task that can be executed within a thread pool, typically - * provided by the Executor service. + * A HashStoreRunnable represents the data needed for a single request to HashStore packaged as a + * Runnable task that can be executed within a thread pool, typically provided by the Executor + * service. */ public class HashStoreRunnable implements Runnable { private static final Log log = LogFactory.getLog(HashStoreRunnable.class); @@ -30,8 +30,8 @@ public class HashStoreRunnable implements Runnable { * @param objStream Stream to data object * @param pid Persistent or authority-based identifier */ - public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream objStream, - String pid) { + public HashStoreRunnable( + HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); FileHashStoreUtility.checkPositive(publicAPIMethod); this.hashstore = hashstore; @@ -44,7 +44,8 @@ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, InputStream o * Constructor for HashStoreRunnable where only a pid is necessary (ex. to delete an object). * * @param hashstore HashStore object to interact with - * @param publicAPIMethod Integer representing action/Public API method (ex. 2 for deleteObject) + * @param publicAPIMethod Integer representing action/Public API method (ex. 2 for + * deleteObject) * @param pid Persistent or authority-based identifier */ public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { @@ -87,8 +88,7 @@ public void run() { } } } catch (HashStoreServiceException | IOException hse) { - log.error( - "HashStoreRunnable ~ Unexpected Error: " + hse.getMessage()); + log.error("HashStoreRunnable ~ Unexpected Error: " + hse.getMessage()); } } } diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index caad3b85..990b922b 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -36,8 +36,7 @@ public void getHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); @@ -77,8 +76,8 @@ public void hashStore_classPackageNull() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); hashStore = HashStoreFactory.getHashStore(null, storeProperties); }); @@ -98,8 +97,8 @@ public void hashStore_classPackageNotFound() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); @@ -127,7 +126,8 @@ public void hashStore_storeObjects() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = + hashStore.storeObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); @@ -154,11 +154,11 @@ public void getHashStore_objFolderExists() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); - assertThrows(HashStoreFactoryException.class, () -> hashStore = - HashStoreFactory.getHashStore(classPackage, storeProperties)); + assertThrows( + HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); } /** @@ -179,11 +179,11 @@ public void getHashStore_metadataFolderExists() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); - assertThrows(HashStoreFactoryException.class, () -> hashStore = - HashStoreFactory.getHashStore(classPackage, storeProperties)); + assertThrows( + HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); } /** @@ -204,10 +204,10 @@ public void getHashStore_refsFolderExists() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); - assertThrows(HashStoreFactoryException.class, () -> hashStore = - HashStoreFactory.getHashStore(classPackage, storeProperties)); + assertThrows( + HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); } } diff --git a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index 1f4614c6..e0a340d3 100644 --- a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -29,16 +29,13 @@ public void initializeInstanceVariables() { hexDigests.put("md5", "f4ea2d07db950873462a064937197b0f"); hexDigests.put("sha1", "3d25436c4490b08a2646e283dada5c60e5c0539d"); hexDigests.put( - "sha256", "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a" - ); + "sha256", "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"); hexDigests.put( "sha384", - "a204678330fcdc04980c9327d4e5daf01ab7541e8a351d49a7e9c5005439dce749ada39c4c35f573dd7d307cca11bea8" - ); + "a204678330fcdc04980c9327d4e5daf01ab7541e8a351d49a7e9c5005439dce749ada39c4c35f573dd7d307cca11bea8"); hexDigests.put( "sha512", - "bf9e7f4d4e66bd082817d87659d1d57c2220c376cd032ed97cadd481cf40d78dd479cbed14d34d98bae8cebc603b40c633d088751f07155a94468aa59e2ad109" - ); + "bf9e7f4d4e66bd082817d87659d1d57c2220c376cd032ed97cadd481cf40d78dd479cbed14d34d98bae8cebc603b40c633d088751f07155a94468aa59e2ad109"); } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java index 445a2988..f46dcb3e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java @@ -50,8 +50,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fileHashStore = new FileHashStore(storeProperties); @@ -92,8 +91,8 @@ public void constructor_nullStorePath() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -111,8 +110,8 @@ public void constructor_illegalDepthArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -130,8 +129,8 @@ public void constructor_illegalWidthArg() { storeProperties.setProperty("storeWidth", "0"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -149,8 +148,8 @@ public void constructor_illegalAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "MD5"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -168,8 +167,8 @@ public void constructor_emptyAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", ""); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -187,8 +186,8 @@ public void constructor_emptySpacesAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", " "); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -240,8 +239,8 @@ public void initDefaultStore_directoryNull() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -303,8 +302,8 @@ public void testGetHashStoreYaml() throws IOException { assertEquals(hsProperties.get("storeWidth"), 2); assertEquals(hsProperties.get("storeAlgorithm"), "SHA-256"); assertEquals( - hsProperties.get("storeMetadataNamespace"), "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + hsProperties.get("storeMetadataNamespace"), + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); } /** @@ -318,8 +317,7 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); } @@ -337,8 +335,8 @@ public void testExistingHashStoreConfiguration_diffAlgorithm() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "MD5"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -357,8 +355,8 @@ public void testExistingHashStoreConfiguration_diffDepth() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -377,8 +375,8 @@ public void testExistingHashStoreConfiguration_diffWidth() { storeProperties.setProperty("storeWidth", "1"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -397,8 +395,7 @@ public void testExistingHashStoreConfiguration_diffMetadataNamespace() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.test.org/service/types/v2.0" - ); + "storeMetadataNamespace", "http://ns.test.org/service/types/v2.0"); new FileHashStore(storeProperties); }); @@ -419,8 +416,8 @@ public void testExistingHashStoreConfiguration_missingYaml() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); HashStore secondHashStore = new FileHashStore(storeProperties); diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index c7778a09..33eee518 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -70,8 +70,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fhsProperties = storeProperties; @@ -102,9 +101,8 @@ public void storeObject() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); @@ -124,16 +122,14 @@ public void storeObject_objSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Check the object size long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); assertEquals(objectSize, objInfo.size()); } - } } @@ -147,9 +143,8 @@ public void storeObject_hexDigests() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); Map hexDigests = objInfo.hexDigests(); @@ -265,8 +260,8 @@ public void storeObject_zeroObjSize() { } /** - * Check that store object executes as expected with only an InputStream (does not create - * any reference files) + * Check that store object executes as expected with only an InputStream (does not create any + * reference files) */ @Test public void storeObject_overloadInputStreamOnly() throws Exception { @@ -394,9 +389,8 @@ public void storeObject_objSizeCorrect() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, objectSize - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, objectSize); // Check id (sha-256 hex digest of the ab_id (pid)) assertEquals(objectSize, objInfo.size()); @@ -415,9 +409,8 @@ public void storeObject_objSizeIncorrect() { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 1000 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, 1000); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -444,8 +437,8 @@ public void storeObject_invalidAlgorithm() { } /** - * Check that store object tags cid refs file as expected when called - * to store a duplicate object (two pids that reference the same cid) + * Check that store object tags cid refs file as expected when called to store a duplicate + * object (two pids that reference the same cid) */ @Test public void storeObject_duplicate() throws Exception { @@ -458,9 +451,8 @@ public void storeObject_duplicate() throws Exception { fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String pidTwo = pid + ".test"; - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStreamDup, pidTwo, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); String cid = objInfo.cid(); Path absCidRefsPath = @@ -472,9 +464,8 @@ public void storeObject_duplicate() throws Exception { } /** - * Test that storeObject successfully stores a 1GB file - * Note 1: a 4GB successfully stored in approximately 1m30s - * Note 2: Successfully stores 250GB file confirmed from knbvm + * Test that storeObject successfully stores a 1GB file Note 1: a 4GB successfully stored in + * approximately 1m30s Note 2: Successfully stores 250GB file confirmed from knbvm */ @Test public void storeObject_largeSparseFile() throws Exception { @@ -506,8 +497,8 @@ public void storeObject_largeSparseFile() throws Exception { } /** - * Tests that temporary objects that are being worked on while storeObject is in - * progress and gets interrupted are deleted. + * Tests that temporary objects that are being worked on while storeObject is in progress and + * gets interrupted are deleted. */ @Test public void storeObject_interruptProcess() throws Exception { @@ -574,9 +565,8 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { // Submit 5 futures to the thread pool, each calling storeObject Future future1 = executorService.submit(() -> { try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -589,15 +579,16 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); - assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future2 = executorService.submit(() -> { try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -610,15 +601,16 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); - assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future3 = executorService.submit(() -> { try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -631,15 +623,16 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); - assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future4 = executorService.submit(() -> { try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -652,15 +645,16 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); - assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future5 = executorService.submit(() -> { try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { String cid = objInfo.cid(); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -673,8 +667,10 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); - assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); @@ -709,8 +705,7 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); - Runnable - request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); + Runnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); executorService.execute(request); } @@ -734,8 +729,8 @@ public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { // Confirm that 50 pid refs file exists Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" - + "/pids")); + List pidRefFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); assertEquals(50, pidRefFiles.size()); } @@ -798,8 +793,8 @@ public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { } /** - * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' - * that is already referencing another 'cid' + * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' that is + * already referencing another 'cid' */ @Test public void tagObject_PidRefsFileExistsException() throws Exception { @@ -846,19 +841,20 @@ public void deleteIfInvalidObject_correctValues() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.cid() - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertTrue(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); } } } /** - * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests - * is empty. + * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests is + * empty. */ @Test public void deleteIfInvalidObject_objInfoEmptyHexDigests() { @@ -868,14 +864,13 @@ public void deleteIfInvalidObject_objInfoEmptyHexDigests() { ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); - assertThrows( - MissingHexDigestsException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); + assertThrows(MissingHexDigestsException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); } /** - * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests - * is null. + * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests is + * null. */ @Test public void deleteIfInvalidObject_objInfoNullHexDigests() { @@ -883,14 +878,13 @@ public void deleteIfInvalidObject_objInfoNullHexDigests() { long size = 1999999; ObjectMetadata objInfo = new ObjectMetadata(null, id, size, null); - assertThrows( - IllegalArgumentException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); } /** - * Check that deleteIfInvalidObject calculates and verifies a checksum with a supported algorithm that is - * not included in the default list + * Check that deleteIfInvalidObject calculates and verifies a checksum with a supported + * algorithm that is not included in the default list */ @Test public void deleteIfInvalidObject_supportedAlgoNotInDefaultList() throws Exception { @@ -910,19 +904,20 @@ public void deleteIfInvalidObject_supportedAlgoNotInDefaultList() throws Excepti int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.cid() - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertTrue(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); } } } /** - * Check that deleteIfInvalidObject calculates throws exception when given a checksumAlgorithm that is - * not supported + * Check that deleteIfInvalidObject calculates throws exception when given a checksumAlgorithm + * that is not supported */ @Test public void deleteIfInvalidObject_unsupportedAlgo() throws Exception { @@ -933,19 +928,20 @@ public void deleteIfInvalidObject_unsupportedAlgo() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - assertThrows( - UnsupportedHashAlgorithmException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, "ValueNotRelevant", "BLAKE2S", 1000)); + assertThrows(UnsupportedHashAlgorithmException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, "ValueNotRelevant", + "BLAKE2S", 1000)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.cid() - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); // Real path to the data object - assertTrue(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertTrue(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); } } } @@ -968,20 +964,21 @@ public void deleteIfInvalidObject_mismatchedSize() throws Exception { String expectedChecksum = testData.pidData.get(pid).get("sha256"); long expectedSize = 123456789; - assertThrows( - NonMatchingObjSizeException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); + assertThrows(NonMatchingObjSizeException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, + defaultStoreAlgorithm, + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.cid() - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertFalse(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); } } } @@ -1004,20 +1001,21 @@ public void deleteIfInvalidObject_mismatchedChecksum() throws Exception { String expectedChecksum = "intentionallyWrongValue"; long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertThrows( - NonMatchingChecksumException.class, - () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, defaultStoreAlgorithm, - expectedSize)); + assertThrows(NonMatchingChecksumException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, + defaultStoreAlgorithm, + expectedSize)); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // If cid is found, return the expected real path to object - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.cid() - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); // Real path to the data object - assertFalse(Files.exists(Paths.get(fhsProperties.getProperty("storePath")).resolve( - "objects").resolve(objRelativePath))); + assertFalse(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); } } } @@ -1035,7 +1033,8 @@ public void storeMetadata() throws Exception { try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + String metadataPath = + fileHashStore.storeMetadata(metadataStream, pid, testFormatId); metadataStream.close(); // Calculate absolute path @@ -1089,12 +1088,11 @@ public void storeMetadata_pidHashIsDirectory() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); String metadataPidhash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String pidMetadataDirectory = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, metadataPidhash - ); - Path expectedPidMetadataDirectory = rootDirectory.resolve("metadata").resolve( - pidMetadataDirectory - ); + String pidMetadataDirectory = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidhash); + Path expectedPidMetadataDirectory = + rootDirectory.resolve("metadata").resolve(pidMetadataDirectory); assertTrue(Files.isDirectory(expectedPidMetadataDirectory)); } @@ -1115,7 +1113,8 @@ public void storeMetadata_multipleFormatIds() throws Exception { try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); InputStream metadataStreamDup = Files.newInputStream(testMetaDataFile)) { String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + String metadataPath = + fileHashStore.storeMetadata(metadataStream, pid, testFormatId); String metadataDefaultPath = fileHashStore.storeMetadata(metadataStreamDup, pid); // Calculate absolute path @@ -1161,8 +1160,7 @@ public void storeMetadata_fileSize() throws Exception { public void storeMetadata_metadataNull() { for (String pid : testData.pidList) { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.storeMetadata(null, pid, null) - ); + IllegalArgumentException.class, () -> fileHashStore.storeMetadata(null, pid, null)); } } @@ -1229,7 +1227,7 @@ public void storeMetadata_pidEmptySpaces() { * concurrently, each of which will have to wait for the given `pid` to be released from * metadataLockedIds before proceeding to store the given metadata content from its * `storeMetadata()` request. - * + * * All requests to store the same metadata will be executed, and the existing metadata file will * be overwritten by each thread. No exceptions should be encountered during these tests. */ @@ -1299,7 +1297,8 @@ public void storeMetadata_metadataLockedIds() throws Exception { Path metadataCidAbsPath = fileHashStore.getHashStoreMetadataPath(pid, formatId); assertTrue(Files.exists(metadataCidAbsPath)); - // Confirm there are only three files in HashStore - 'hashstore.yaml', the metadata file written + // Confirm there are only three files in HashStore - 'hashstore.yaml', the metadata file + // written // and the metadata refs file that contains namespaces used try (Stream walk = Files.walk(storePath)) { long fileCount = walk.filter(Files::isRegularFile).count(); @@ -1328,14 +1327,13 @@ public void retrieveObject() throws Exception { } /** - * Check that retrieveObject throws exception when there is no object - * associated with a given pid + * Check that retrieveObject throws exception when there is no object associated with a given + * pid */ @Test public void retrieveObject_pidDoesNotExist() { - assertThrows( - FileNotFoundException.class, - () -> fileHashStore.retrieveObject("pid.whose.object.does.not.exist")); + assertThrows(FileNotFoundException.class, + () -> fileHashStore.retrieveObject("pid.whose.object.does.not.exist")); } /** @@ -1403,7 +1401,8 @@ public void retrieveObject_verifyContent() throws Exception { } // Get hex digest - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256Digest = + DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); assertEquals(sha256Digest, sha256DigestFromTestData); @@ -1537,9 +1536,8 @@ public void retrieveMetadata_formatEmptySpaces() { public void retrieveMetadata_pidNotFound() { assertThrows(FileNotFoundException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata( - "dou.2023.hs.1", storeFormatId - ); + InputStream pidInputStream = + fileHashStore.retrieveMetadata("dou.2023.hs.1", storeFormatId); pidInputStream.close(); }); } @@ -1562,7 +1560,8 @@ public void retrieveMetadata_verifyContent() throws Exception { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); // Retrieve object - try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, storeFormatId)) { + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, + storeFormatId)) { // Read content and compare it to the SHA-256 checksum from TestDataHarness MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); try { @@ -1579,11 +1578,10 @@ public void retrieveMetadata_verifyContent() throws Exception { } // Get hex digest - String sha256MetadataDigest = DatatypeConverter.printHexBinary(sha256.digest()) - .toLowerCase(); - String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_cid_sha256" - ); + String sha256MetadataDigest = + DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256MetadataDigestFromTestData = + testData.pidData.get(pid).get("metadata_cid_sha256"); assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); } catch (Exception e) { @@ -1612,11 +1610,11 @@ public void deleteObject_dataObjAndMetadataDocs() throws Exception { try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile)) { String testFormatId = "https://test.arcticdata.io/ns"; - String metadataPathString = fileHashStore.storeMetadata( - metadataStream, pid, testFormatId - ); + String metadataPathString = + fileHashStore.storeMetadata(metadataStream, pid, testFormatId); - String metadataDefaultPathString = fileHashStore.storeMetadata(metadataStreamTwo, pid); + String metadataDefaultPathString = + fileHashStore.storeMetadata(metadataStreamTwo, pid); Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); Path metadataPath = Paths.get(metadataPathString); Path metadataDefaultPath = Paths.get(metadataDefaultPathString); @@ -1638,8 +1636,8 @@ public void deleteObject_dataObjAndMetadataDocs() throws Exception { /** - * Confirm that deleteObject overload method with signature (String pid) deletes objects - * and does not throw exceptions if metadata documents do not exist. + * Confirm that deleteObject overload method with signature (String pid) deletes objects and + * does not throw exceptions if metadata documents do not exist. */ @Test public void deleteObject_stringPidNoMetadataDocs() throws Exception { @@ -1704,9 +1702,8 @@ public void deleteObject_referencesDeleted() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); @@ -1733,9 +1730,8 @@ public void deleteObject_cidRefsFileNotEmptyObjectExistsStill() throws Exception Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String pidExtra = "dou.test" + pid; String cid = objInfo.cid(); fileHashStore.tagObject(pidExtra, cid); @@ -1765,9 +1761,8 @@ public void deleteObject_pidOrphan() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); String pidExtra = "dou.test" + pid; Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -1803,8 +1798,7 @@ public void deleteObject_pidOrphan() throws Exception { @Test public void deleteObject_pidNotFound() { assertThrows( - FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1") - ); + FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1")); } /** @@ -1812,9 +1806,7 @@ public void deleteObject_pidNotFound() { */ @Test public void deleteObject_pidNull() { - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject(null) - ); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(null)); } /** @@ -1822,9 +1814,7 @@ public void deleteObject_pidNull() { */ @Test public void deleteObject_pidEmpty() { - assertThrows( - IllegalArgumentException.class, () -> fileHashStore.deleteObject("") - ); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject("")); } /** @@ -1832,8 +1822,7 @@ public void deleteObject_pidEmpty() { */ @Test public void deleteObject_pidEmptySpaces() { - assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(" ") - ); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.deleteObject(" ")); } /** @@ -1874,18 +1863,19 @@ public void deleteObjectByCid() throws Exception { int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); String actualCid = objInfo.cid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); + String cidShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + actualCid); + Path objectStoreDirectory = + rootDirectory.resolve("objects").resolve(cidShardString); assertFalse(Files.exists(objectStoreDirectory)); } } } /** - * Confirm deleteObjectByCid does not delete an object because a cid refs file - * exists (there are still pids referencing the object) + * Confirm deleteObjectByCid does not delete an object because a cid refs file exists (there are + * still pids referencing the object) */ @Test public void deleteObject_cidType_AndCidRefsExists() throws Exception { @@ -1894,9 +1884,8 @@ public void deleteObject_cidType_AndCidRefsExists() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); fileHashStore.deleteObjectByCid(cid); @@ -1933,14 +1922,12 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { // Store 1000 for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); - Runnable - request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); + Runnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); executorService.execute(request); } // Delete 1000 for (String pidAdjusted : pidModifiedList) { - Runnable - request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); + Runnable request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); executorService.execute(request); } @@ -1956,11 +1943,11 @@ public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { } assertEquals(0, objectPaths.size()); // Check that no refs files exist - List pidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" - + "/pids")); + List pidRefFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); assertEquals(0, pidRefFiles.size()); - List cidRefFiles = FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" - + "/cids")); + List cidRefFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); assertEquals(0, cidRefFiles.size()); } @@ -2041,8 +2028,7 @@ public void deleteMetadata_overload() throws Exception { } /** - * Confirm that no exceptions are thrown when called to delete metadata - * that does not exist. + * Confirm that no exceptions are thrown when called to delete metadata that does not exist. */ @Test public void deleteMetadata_pidNotFound() throws Exception { @@ -2127,9 +2113,8 @@ public void getHexDigest() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); // Then get the checksum String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); @@ -2160,8 +2145,7 @@ public void getHexDigest_pidNotFound() { @Test public void getHexDigest_pidNull() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.getHexDigest(null, "SHA-256") - ); + IllegalArgumentException.class, () -> fileHashStore.getHexDigest(null, "SHA-256")); } /** @@ -2170,8 +2154,7 @@ public void getHexDigest_pidNull() { @Test public void getHexDigest_pidEmpty() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.getHexDigest("", "SHA-256") - ); + IllegalArgumentException.class, () -> fileHashStore.getHexDigest("", "SHA-256")); } /** @@ -2180,8 +2163,7 @@ public void getHexDigest_pidEmpty() { @Test public void getHexDigest_pidEmptySpaces() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.getHexDigest(" ", "SHA-256") - ); + IllegalArgumentException.class, () -> fileHashStore.getHexDigest(" ", "SHA-256")); } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index fc7b0a4f..0145fce0 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -104,9 +104,8 @@ public void findObject_cid() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objMeta = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objMeta = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); assertEquals(findObjInfo.cid(), objMeta.cid()); @@ -124,18 +123,17 @@ public void findObject_cidPath() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String objectPath = findObjInfo.cidObjectPath(); - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, objInfo.cid() - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); assertEquals(objectPath, realPath.toString()); @@ -153,17 +151,17 @@ public void findObject_refsPaths() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String cidRefsPath = findObjInfo.cidRefsPath(); String pidRefsPath = findObjInfo.pidRefsPath(); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.cid(), FileHashStore.HashStoreIdTypes.cid); - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, - FileHashStore.HashStoreIdTypes.pid); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.cid(), + FileHashStore.HashStoreIdTypes.cid); + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertEquals(cidRefsPath, cidRefsFilePath.toString()); assertEquals(pidRefsPath, pidRefsFilePath.toString()); @@ -210,9 +208,7 @@ public void findObject_sysmetaPath_doesNotExist() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); String objInfoSysmetaPath = findObjInfo.sysmetaPath(); @@ -223,8 +219,8 @@ public void findObject_sysmetaPath_doesNotExist() throws Exception { } /** - * Confirm findObject throws exception when cid object does not exist but reference - * files exist. + * Confirm findObject throws exception when cid object does not exist but reference files + * exist. */ @Test public void findObject_refsFileExistButObjectDoesNot() throws Exception { @@ -236,8 +232,8 @@ public void findObject_refsFileExistButObjectDoesNot() throws Exception { } /** - * Confirm that findObject throws OrphanPidRefsFileException exception when - * pid refs file found but cid refs file is missing. + * Confirm that findObject throws OrphanPidRefsFileException exception when pid refs file found + * but cid refs file is missing. */ @Test public void findObject_cidRefsFileNotFound() throws Exception { @@ -245,7 +241,8 @@ public void findObject_cidRefsFileNotFound() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Files.delete(cidRefsPath); assertThrows(OrphanPidRefsFileException.class, () -> fileHashStore.findObject(pid)); @@ -253,8 +250,8 @@ public void findObject_cidRefsFileNotFound() throws Exception { /** - * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when - * pid refs file found but cid refs file is missing. + * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when pid refs file + * found but cid refs file is missing. */ @Test public void findObject_cidRefsFileMissingPid() throws Exception { @@ -262,8 +259,10 @@ public void findObject_cidRefsFileMissingPid() throws Exception { String cid = "abcdef123456789"; fileHashStore.tagObject(pid, cid); - Path cidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - fileHashStore.updateRefsFile(pid, cidRefsPath, FileHashStore.HashStoreRefUpdateTypes.remove); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + pid, cidRefsPath, FileHashStore.HashStoreRefUpdateTypes.remove); assertThrows(PidNotFoundInCidRefsFileException.class, () -> fileHashStore.findObject(pid)); } @@ -287,7 +286,8 @@ public void putObject_testHarness_id() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + ObjectMetadata address = + fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id, aka object_cid) String objContentId = testData.pidData.get(pid).get("sha256"); @@ -306,14 +306,14 @@ public void putObject_objSize() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = + fileHashStore.putObject(dataStream, pid, null, null, null, -1); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); assertEquals(objectSize, objInfo.size()); } - } } @@ -327,7 +327,8 @@ public void putObject_testHarness_hexDigests() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + ObjectMetadata address = + fileHashStore.putObject(dataStream, pid, null, null, null, -1); Map hexDigests = address.hexDigests(); @@ -358,7 +359,8 @@ public void putObject_validateChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); + ObjectMetadata address = + fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); String objCid = address.cid(); // Get relative path @@ -484,7 +486,8 @@ public void putObject_objSizeCorrect() throws Exception { long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); + ObjectMetadata objInfo = + fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); // Check id (sha-256 hex digest of the ab_id (pid)) assertEquals(objectSize, objInfo.size()); @@ -503,7 +506,8 @@ public void putObject_objSizeIncorrect() { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, 1000); + ObjectMetadata objInfo = + fileHashStore.putObject(dataStream, pid, null, null, null, 1000); // Check id (sha-256 hex digest of the ab_id (pid)) long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); @@ -573,8 +577,8 @@ public void putObject_emptyAlgorithm() { } /** - * Confirm validateTmpObject does nothing when requestValidation is false and does not throw - * any exceptions + * Confirm validateTmpObject does nothing when requestValidation is false and does not throw any + * exceptions */ @Test public void validateTmpObject() throws Exception { @@ -582,8 +586,8 @@ public void validateTmpObject() throws Exception { hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); File tmpFile = generateTemporaryFile(); - fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFile, - hexDigests, -1); + fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFile, hexDigests, + -1); } /** @@ -603,8 +607,7 @@ public void validateTmpObject_sizeMatches() throws Exception { e.printStackTrace(); } - fileHashStore.validateTmpObject(false, "sha256Digest", "SHA-256", tmpFile, - hexDigests, 1); + fileHashStore.validateTmpObject(false, "sha256Digest", "SHA-256", tmpFile, hexDigests, 1); } /** @@ -622,8 +625,8 @@ public void validateTmpObject_sizeMismatch() throws Exception { tmpFile, hexDigests, 10)); } - /**GG - * Confirm validateTmpObject does not throw exception when requested to validate checksums + /** + * GG Confirm validateTmpObject does not throw exception when requested to validate checksums * with good values */ @Test @@ -632,13 +635,12 @@ public void validateTmpObject_validationRequested_matchingChecksum() throws Exce hexDigests.put("MD5", "md5Digest"); hexDigests.put("SHA-256", "sha256Digest"); File tmpFile = generateTemporaryFile(); - fileHashStore.validateTmpObject(true, "sha256Digest", "SHA-256", tmpFile, - hexDigests, -1); + fileHashStore.validateTmpObject(true, "sha256Digest", "SHA-256", tmpFile, hexDigests, -1); } /** - * Confirm validateTmpObject does not throw exception when requested to validate checksums - * with good values, and that the tmpFile passed is deleted. + * Confirm validateTmpObject does not throw exception when requested to validate checksums with + * good values, and that the tmpFile passed is deleted. */ @Test public void validateTmpObject_validationRequested_nonMatchingChecksum() throws Exception { @@ -665,8 +667,8 @@ public void validateTmpObject_validationRequested_algoNotFound() throws Exceptio File tmpFile = generateTemporaryFile(); assertThrows(NoSuchAlgorithmException.class, - () -> fileHashStore.validateTmpObject(true, "md2Digest", "MD2", - tmpFile, hexDigests, -1)); + () -> fileHashStore.validateTmpObject(true, "md2Digest", "MD2", tmpFile, + hexDigests, -1)); assertFalse(Files.exists(tmpFile.toPath())); } @@ -764,7 +766,7 @@ public void shouldCalculateAlgorithm_algoNotIncluded() { */ @Test public void verifyChecksumParameters() throws Exception { - boolean shouldValidate = fileHashStore.verifyChecksumParameters("abc123","SHA-256"); + boolean shouldValidate = fileHashStore.verifyChecksumParameters("abc123", "SHA-256"); assertTrue(shouldValidate); } @@ -773,9 +775,8 @@ public void verifyChecksumParameters() throws Exception { */ @Test public void verifyChecksumParameters_emptyChecksum() { - assertThrows( - IllegalArgumentException.class, - () -> fileHashStore.verifyChecksumParameters(" ", "SHA-256")); + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.verifyChecksumParameters(" ", "SHA-256")); } /** @@ -783,9 +784,8 @@ public void verifyChecksumParameters_emptyChecksum() { */ @Test public void verifyChecksumParameters_emptyAlgorithm() { - assertThrows( - IllegalArgumentException.class, - () -> fileHashStore.verifyChecksumParameters("abc123", " ")); + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.verifyChecksumParameters("abc123", " ")); } /** @@ -793,9 +793,8 @@ public void verifyChecksumParameters_emptyAlgorithm() { */ @Test public void verifyChecksumParameters_unsupportedAlgorithm() { - assertThrows( - NoSuchAlgorithmException.class, - () -> fileHashStore.verifyChecksumParameters("abc123", "SHA-DOU")); + assertThrows(NoSuchAlgorithmException.class, + () -> fileHashStore.verifyChecksumParameters("abc123", "SHA-DOU")); } /** @@ -812,7 +811,8 @@ public void writeToTmpFileAndGenerateChecksums() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, null); + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, + null); // Validate checksum values String md5 = testData.pidData.get(pid).get("md5"); @@ -845,7 +845,8 @@ public void writeToTmpFileAndGenerateChecksums_tmpFileSize() throws Exception { String addAlgo = "MD2"; try (InputStream dataStream = Files.newInputStream(testDataFile)) { - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, null); + fileHashStore.writeToTmpFileAndGenerateChecksums( + newTmpFile, dataStream, addAlgo, null); } long testDataFileSize = Files.size(testDataFile); @@ -871,8 +872,8 @@ public void writeToTmpFileAndGenerateChecksums_addAlgo() throws Exception { try (InputStream dataStream = Files.newInputStream(testDataFile)) { Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, - null); + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, + addAlgo, null); // Validate checksum values String md2 = testData.pidData.get(pid).get("md2"); assertEquals(md2, hexDigests.get("MD2")); @@ -924,8 +925,8 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce try (InputStream dataStream = Files.newInputStream(testDataFile)) { Map hexDigests = - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, - checksumAlgo); + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, + addAlgo, checksumAlgo); // Validate checksum values String md2 = testData.pidData.get(pid).get("md2"); String sha512224 = testData.pidData.get(pid).get("sha512-224"); @@ -1046,7 +1047,8 @@ public void deleteObjectByCid() throws Exception { Path storePath = Paths.get(fhsProperties.getProperty("storePath")); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + String objShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path objRealPath = storePath.resolve("objects").resolve(objShardString); assertFalse(Files.exists(objRealPath)); @@ -1065,7 +1067,8 @@ public void deleteObjectByCid_cidRefsFileContainsPids() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); // Try deleting the object @@ -1108,8 +1111,8 @@ public void storeHashStoreRefsFiles() throws Exception { } /** - * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content - * is correct + * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content is + * correct */ @Test public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { @@ -1117,8 +1120,8 @@ public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); - Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, - FileHashStore.HashStoreIdTypes.pid); + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); assertTrue(Files.exists(pidRefsFilePath)); String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); @@ -1126,8 +1129,8 @@ public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { } /** - * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content - * is correct + * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content is + * correct */ @Test public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { @@ -1135,7 +1138,8 @@ public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(cidRefsFilePath)); String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); @@ -1143,8 +1147,8 @@ public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { } /** - * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException - * when refs files already exist + * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException when refs files + * already exist */ @Test public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { @@ -1152,9 +1156,8 @@ public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws String cid = "abcdef123456789"; fileHashStore.storeHashStoreRefsFiles(pid, cid); - assertThrows( - HashStoreRefsAlreadyExistException.class, - () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + assertThrows(HashStoreRefsAlreadyExistException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); // Confirm that there is only 1 of each ref file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -1169,21 +1172,19 @@ public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws /** * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what - * is found in the pid refs file, and the associated cid refs file from the pid refs file - * is correctly tagged (everything is where it's expected to be) + * is found in the pid refs file, and the associated cid refs file from the pid refs file is + * correctly tagged (everything is where it's expected to be) */ @Test - public void storeHashStoreRefsFiles_PidRefsFileExistsException() - throws Exception { + public void storeHashStoreRefsFiles_PidRefsFileExistsException() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; String existingCid = "987654321fedcba"; fileHashStore.storeHashStoreRefsFiles(pid, existingCid); // This will throw an exception because the pid and cid refs file are in sync - assertThrows( - PidRefsFileExistsException.class, - () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + assertThrows(PidRefsFileExistsException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); } /** @@ -1191,8 +1192,7 @@ public void storeHashStoreRefsFiles_PidRefsFileExistsException() * references does not exist (does not have a cid refs file) */ @Test - public void storeHashStoreRefsFiles_pidRefsOrphanedFile() - throws Exception { + public void storeHashStoreRefsFiles_pidRefsOrphanedFile() throws Exception { String pid = "dou.test.1"; String cid = "abcdef123456789"; String cidForOrphanPidRef = "987654321fedcba"; @@ -1231,15 +1231,15 @@ public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); // Confirm missing pid refs file has been created - Path pidAdditionalRefsFilePath = fileHashStore.getHashStoreRefsPath(pidAdditional, - FileHashStore.HashStoreIdTypes.pid); + Path pidAdditionalRefsFilePath = + fileHashStore.getHashStoreRefsPath(pidAdditional, FileHashStore.HashStoreIdTypes.pid); assertTrue(Files.exists(pidAdditionalRefsFilePath)); // Check cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - boolean pidFoundInCidRefFiles = fileHashStore.isStringInRefsFile( - pidAdditional, cidRefsFilePath - ); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + boolean pidFoundInCidRefFiles = + fileHashStore.isStringInRefsFile(pidAdditional, cidRefsFilePath); assertTrue(pidFoundInCidRefFiles); // There should be 2 pid refs file, and 1 cid refs file @@ -1274,8 +1274,8 @@ public void unTagObject() throws Exception { } /** - * Check that unTagObject deletes pid refs file for a cid that is referenced by - * multiple pids, and that the cid refs file is not deleted. + * Check that unTagObject deletes pid refs file for a cid that is referenced by multiple pids, + * and that the cid refs file is not deleted. */ @Test public void unTagObject_cidWithMultiplePidReferences() throws Exception { @@ -1345,8 +1345,8 @@ public void unTagObject_orphanPidRefsFile() throws Exception { } /** - * Check that unTagObject does not throw exception when a pid refs file and cid refs file - * does not exist + * Check that unTagObject does not throw exception when a pid refs file and cid refs file does + * not exist */ @Test public void unTagObject_missingRefsFiles() throws Exception { @@ -1357,8 +1357,8 @@ public void unTagObject_missingRefsFiles() throws Exception { } /** - * Check that unTagObject does not throw exception when a pid refs file and cid refs file - * does not exist + * Check that unTagObject does not throw exception when a pid refs file and cid refs file does + * not exist */ @Test public void unTagObject_missingPidRefsFile() throws Exception { @@ -1435,12 +1435,12 @@ public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - assertThrows( - CidNotFoundInPidRefsFileException.class, - () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, - cidRefsFilePath)); + assertThrows(CidNotFoundInPidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, + cidRefsFilePath)); } /** @@ -1461,10 +1461,9 @@ public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception Path pidRefsFilePath = fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); - assertThrows( - PidNotFoundInCidRefsFileException.class, - () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, - cidRefsTmpFilePath)); + assertThrows(PidNotFoundInCidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsTmpFilePath)); } /** @@ -1593,8 +1592,8 @@ public void updateRefsFile_removeMultiplePids() throws Exception { } /** - * Confirm that updateRefsFile does not throw any exception if called to remove a value - * that is not found in a cid refs file. + * Confirm that updateRefsFile does not throw any exception if called to remove a value that is + * not found in a cid refs file. */ @Test public void updateRefsFile_cidRefsPidNotFound() throws Exception { @@ -1622,8 +1621,8 @@ public void updateRefsFile_cidRefsPidNotFound() throws Exception { } /** - * Confirm that updateRefsFile does not throw any exception if called to remove a value - * from a cid refs file that is empty + * Confirm that updateRefsFile does not throw any exception if called to remove a value from a + * cid refs file that is empty */ @Test public void updateRefsFile_cidRefsEmpty() throws Exception { @@ -1650,7 +1649,8 @@ public void updateRefsFile_cidRefsEmpty() throws Exception { assertEquals(0, pidsFound); // Confirm that no exception is thrown and that the cid refs still exists - fileHashStore.updateRefsFile(pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); assertTrue(Files.exists(cidRefsFilePath)); } @@ -1758,7 +1758,8 @@ public void writeToTmpMetadataFile() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + boolean metadataWritten = + fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); assertTrue(metadataWritten); } } @@ -1777,7 +1778,8 @@ public void writeToTmpMetadataFile_tmpFileSize() throws Exception { Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + boolean metadataWritten = + fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); assertTrue(metadataWritten); long tmpMetadataFileSize = Files.size(newTmpFile.toPath()); @@ -1842,8 +1844,10 @@ public void syncRenameMetadataDocForDeletion_renamesAsExpected() throws Exceptio try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); - String pathToMetadataTwo = fileHashStore.putMetadata(metadataStream, pid, "ns.test.1"); - String pathToMetadataThree = fileHashStore.putMetadata(metadataStream, pid, "ns.test" + ".3"); + String pathToMetadataTwo = + fileHashStore.putMetadata(metadataStream, pid, "ns.test.1"); + String pathToMetadataThree = + fileHashStore.putMetadata(metadataStream, pid, "ns.test" + ".3"); // Confirm that metadata documents are present Path storePath = Paths.get(fhsProperties.getProperty("storePath")); @@ -1851,9 +1855,9 @@ public void syncRenameMetadataDocForDeletion_renamesAsExpected() throws Exceptio int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); - String pidRelativePath = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, pidHexDigest - ); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + pidHexDigest); Path expectedPidMetadataDirectory = storePath.resolve("metadata").resolve(pidRelativePath); List metadataDocPaths = @@ -1881,9 +1885,8 @@ public void syncRenameMetadataDocForDeletion_renamesAsExpected() throws Exceptio @Test public void syncRenameMetadataDocForDeletion_emptyList() { Collection metadataDocPaths = new ArrayList<>(); - assertThrows( - IllegalArgumentException.class, - () -> fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths)); + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths)); } /** @@ -1891,9 +1894,8 @@ public void syncRenameMetadataDocForDeletion_emptyList() { */ @Test public void syncRenameMetadataDocForDeletion_nullList() { - assertThrows( - IllegalArgumentException.class, - () -> fileHashStore.syncRenameMetadataDocForDeletion(null)); + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.syncRenameMetadataDocForDeletion(null)); } /** @@ -1912,7 +1914,8 @@ public void isStringInRefsFile_pidFound() throws Exception { String pidTwo = pid + ".test"; try (InputStream dataStreamDup = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); String cid = objInfo.cid(); Path absCidRefsPath = @@ -1932,7 +1935,8 @@ public void isStringInRefsFile_pidNotFound() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); Path absCidRefsPath = @@ -1952,14 +1956,16 @@ public void getHashStoreDataObjectPath() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); // Manually form the permanent address of the actual cid Path storePath = Paths.get(fhsProperties.getProperty("storePath")); int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String objShardString = FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + String objShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); Path expectedObjCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); @@ -1990,7 +1996,8 @@ public void getHashStoreMetadataPath() throws Exception { int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); // Document ID - String hashId = FileHashStoreUtility.getPidHexDigest(pid + storeFormatId, storeAlgo); + String hashId = + FileHashStoreUtility.getPidHexDigest(pid + storeFormatId, storeAlgo); // Metadata directory of the given pid String metadataPidDirId = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); @@ -2026,7 +2033,8 @@ public void getHashStoreMetadataInputStream() throws Exception { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream metadataCidInputStream = fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId); + InputStream metadataCidInputStream = + fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId); assertNotNull(metadataCidInputStream); } } @@ -2041,9 +2049,8 @@ public void getHashStoreMetadataInputStream_fileNotFound() { for (String pid : testData.pidList) { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - assertThrows( - FileNotFoundException.class, - () -> fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId)); + assertThrows(FileNotFoundException.class, + () -> fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId)); } } @@ -2091,7 +2098,8 @@ public void getHashStoreRefsPath_cid() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); try (InputStream dataStream = Files.newInputStream(testDataFile)) { - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); String cid = objInfo.cid(); // Manually form the permanent address of the actual cid @@ -2114,8 +2122,8 @@ public void getHashStoreRefsPath_cid() throws Exception { } /** - * Confirm getHashStoreDataObjectPath throws exception when requesting path for an object - * that does not exist + * Confirm getHashStoreDataObjectPath throws exception when requesting path for an object that + * does not exist */ @Test public void getHashStoreDataObjectPath_fileNotFound() { @@ -2131,9 +2139,9 @@ public void getHashStoreDataObjectPath_fileNotFound() { */ @Test public void fileHashStoreUtility_checkForEmptyAndValidString() { - assertThrows( - IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForNotEmptyAndValidString("dou.test.1\n", "pid")); + assertThrows(IllegalArgumentException.class, + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("dou.test.1\n", + "pid")); } /** @@ -2142,9 +2150,8 @@ public void fileHashStoreUtility_checkForEmptyAndValidString() { */ @Test public void fileHashStoreUtility_checkForEmptyAndValidString_newLine() { - assertThrows( - IllegalArgumentException.class, - () -> FileHashStoreUtility.checkForNotEmptyAndValidString("\n", "pid")); + assertThrows(IllegalArgumentException.class, + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("\n", "pid")); } /** From 62d72a1b4740ac40834ef7945e520d573d5b535b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 16 Aug 2024 09:27:03 -0700 Subject: [PATCH 496/553] Make enum 'DefaultHashAlgorithms' and 'HashStoreProperties' public to provide access to 'FileHashStoreLinks' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++-- .../dataone/hashstore/filehashstore/FileHashStoreUtility.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 5b6a511a..5bb87668 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -82,7 +82,7 @@ public class FileHashStore implements HashStore { /** * The default hash algorithms included in the ObjectMetadata when storing objects. */ - enum DefaultHashAlgorithms { + public enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); final String algoName; @@ -106,7 +106,7 @@ public enum HashStoreIdTypes { /** * The configuration properties for a HashStore */ - enum HashStoreProperties { + public enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 8224099d..b180b050 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -116,7 +116,7 @@ public static void createParentDirectories(Path desiredPath) throws IOException Files.createDirectories(destinationDirectoryPath); } catch (FileAlreadyExistsException faee) { - logFHSU.warn("Directory already exists at: " + destinationDirectoryPath + log.warn("Directory already exists at: " + destinationDirectoryPath + " - Skipping directory creation"); } } From fed0882a9b008db39aaaf2dc7bea89cee10de967 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 16 Aug 2024 09:27:25 -0700 Subject: [PATCH 497/553] Revise 'FileHashStoreLinks' and 'HashStoreConverter' classes based on new changes from --- .../FileHashStoreLinks.java | 76 +++++++++---------- .../HashStoreConverter.java | 41 +++++----- .../FileHashStoreLinksTest.java | 4 +- .../HashStoreConverterTest.java | 10 +-- 4 files changed, 64 insertions(+), 67 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index c2dd532b..d051cc74 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -22,10 +22,10 @@ import java.util.Properties; /** - * FileHashStoreLinks is an extension of FileHashStore that provides the client with the ability - * to store a hard link instead of storing a data object. This is desirable when a directory with - * data objects already exists to optimize disk usage, and is more performant since there is no - * write operation. + * FileHashStoreLinks is an extension of FileHashStore that provides the client with the ability to + * store a hard link instead of storing a data object. This is desirable when a directory with data + * objects already exists to optimize disk usage, and is more performant since there is no write + * operation. */ public class FileHashStoreLinks extends FileHashStore { @@ -45,22 +45,18 @@ public class FileHashStoreLinks extends FileHashStore { * configuration file * @throws NoSuchAlgorithmException If an algorithm in the properties is not supported */ - public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException { + public FileHashStoreLinks(Properties hashstoreProperties) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException { super(hashstoreProperties); // If configuration matches, set FileHashStoreLinks private variables - Path storePath = Paths.get( - hashstoreProperties.getProperty(HashStoreProperties.storePath.name()) - ); + Path storePath = + Paths.get(hashstoreProperties.getProperty(HashStoreProperties.storePath.name())); int storeDepth = Integer.parseInt( - hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name()) - ); + hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name())); int storeWidth = Integer.parseInt( - hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name()) - ); - String storeAlgorithm = hashstoreProperties.getProperty( - HashStoreProperties.storeAlgorithm.name() - ); + hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name())); + String storeAlgorithm = + hashstoreProperties.getProperty(HashStoreProperties.storeAlgorithm.name()); DIRECTORY_DEPTH = storeDepth; DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; @@ -71,26 +67,27 @@ public FileHashStoreLinks(Properties hashstoreProperties) throws IllegalArgument /** * Store a hard link to HashStore from an existing data object in the filesystem. * - * @param filePath Path to the source file which a hard link will be created for - * @param fileStream Stream to the source file to calculate checksums for - * @param pid Persistent or authority-based identifier for tagging - * @param checksum Value of checksum + * @param filePath Path to the source file which a hard link will be created for + * @param fileStream Stream to the source file to calculate checksums for + * @param pid Persistent or authority-based identifier for tagging + * @param checksum Value of checksum * @param checksumAlgorithm Ex. "SHA-256" * @return ObjectMetadata encapsulating information about the data file * @throws NoSuchAlgorithmException Issue with one of the hashing algorithms to calculate * @throws IOException An issue with reading from the given file stream - * @throws InterruptedException Sync issue when tagging pid and cid + * @throws InterruptedException Sync issue when tagging pid and cid */ - public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, String pid, - String checksum, String checksumAlgorithm) + public ObjectMetadata storeHardLink( + Path filePath, InputStream fileStream, String pid, String checksum, + String checksumAlgorithm) throws NoSuchAlgorithmException, IOException, InterruptedException { // Validate input parameters - FileHashStoreUtility.ensureNotNull(filePath, "filePath", "storeHardLink"); - FileHashStoreUtility.ensureNotNull(fileStream, "fileStream", "storeHardLink"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeHardLink"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "storeHardLink"); - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeHardLink"); - FileHashStoreUtility.checkForEmptyAndValidString(checksum, "checksum", "storeHardLink"); + FileHashStoreUtility.ensureNotNull(filePath, "filePath"); + FileHashStoreUtility.ensureNotNull(fileStream, "fileStream"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum"); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); validateAlgorithm(checksumAlgorithm); if (!Files.exists(filePath)) { String errMsg = "Given file path: " + filePath + " does not exist."; @@ -110,9 +107,9 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); - String objRelativePath = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objectCid); Path objHardLinkPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); // Create parent directories to the hard link, otherwise // Files.createLink will throw a NoSuchFileException @@ -122,8 +119,7 @@ public ObjectMetadata storeHardLink(Path filePath, InputStream fileStream, Strin Files.createLink(objHardLinkPath, filePath); } catch (FileAlreadyExistsException faee) { - logFileHashStoreLinks.warn( - "Data object already exists at: " + objHardLinkPath); + logFileHashStoreLinks.warn("Data object already exists at: " + objHardLinkPath); } // This method is thread safe and synchronized @@ -154,10 +150,10 @@ protected Path getHashStoreLinksDataObjectPath(String pid) } /** - * Get a HashMap consisting of algorithms and their respective hex digests for a given - * data stream. If an additional algorithm is supplied and supported, it and its checksum - * value will be included in the hex digests map. Default algorithms: MD5, SHA-1, SHA-256, - * SHA-384, SHA-512 + * Get a HashMap consisting of algorithms and their respective hex digests for a given data + * stream. If an additional algorithm is supplied and supported, it and its checksum value will + * be included in the hex digests map. Default algorithms: MD5, SHA-1, SHA-256, SHA-384, + * SHA-512 * * @param dataStream input stream of data to store * @param additionalAlgorithm additional algorithm to include in hex digest map @@ -226,8 +222,8 @@ protected Map generateChecksums( hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); if (generateAddAlgo) { - String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) - .toLowerCase(); + String extraAlgoDigest = + DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 0e6d615a..69af9e2b 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -15,8 +15,8 @@ /** * HashStoreConverter is a utility tool to assist with ingesting existing data objects and their * respective system metadata into a HashStore. Instead of duplicating data objects (that already - * exist), HashStoreConverter provides a simple API to create a hard link to a data object with - * its expected HashStore object path. + * exist), HashStoreConverter provides a simple API to create a hard link to a data object with its + * expected HashStore object path. */ public class HashStoreConverter { private static final Log logHashStoreConverter = LogFactory.getLog(HashStoreConverter.class); @@ -51,37 +51,38 @@ public HashStoreConverter(Properties hashstoreProperties) } /** - * Take an existing path to a data object, store it into a new or existing HashStore via a - * hard link, store the supplied system metadata and return the ObjectMetadata for the data - * object. A 'filePath' may be null, in which case a hard link will not be created, and only - * the sysmeta will be stored. + * Take an existing path to a data object, store it into a new or existing HashStore via a hard + * link, store the supplied system metadata and return the ObjectMetadata for the data object. A + * 'filePath' may be null, in which case a hard link will not be created, and only the sysmeta + * will be stored. * - * @param filePath Path to existing data object - * @param pid Persistent or authority-based identifier - * @param sysmetaStream Stream to sysmeta content to store. - * @param checksum Value of checksum + * @param filePath Path to existing data object + * @param pid Persistent or authority-based identifier + * @param sysmetaStream Stream to sysmeta content to store. + * @param checksum Value of checksum * @param checksumAlgorithm Ex. "SHA-256" * @return ObjectMetadata for the given pid * @throws IOException An issue with calculating checksums or storing sysmeta * @throws NoSuchAlgorithmException An algorithm defined is not supported * @throws InterruptedException Issue with synchronizing storing metadata */ - public ObjectMetadata convert(Path filePath, String pid, InputStream sysmetaStream, - String checksum, String checksumAlgorithm) + public ObjectMetadata convert( + Path filePath, String pid, InputStream sysmetaStream, String checksum, + String checksumAlgorithm) throws IOException, NoSuchAlgorithmException, InterruptedException { logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); - FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream", "convert"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "convert"); - FileHashStoreUtility.checkForEmptyAndValidString(pid, "pid", "convert"); + FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Store the hard link first if it's available ObjectMetadata objInfo = null; if (filePath != null) { - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "convert"); - FileHashStoreUtility.checkForEmptyAndValidString(checksum, "checksum", "convert"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "convert"); - FileHashStoreUtility.checkForEmptyAndValidString( - checksumAlgorithm, "checksumAlgorithm", "convert"); + FileHashStoreUtility.ensureNotNull(checksum, "checksum"); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm"); try (InputStream fileStream = Files.newInputStream(filePath)) { objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid, checksum, diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 718fdcfb..7a05235d 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -147,8 +147,8 @@ public void storeHardLink() throws Exception { // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); - assertEquals(objectCid, objInfo.getCid()); - assertEquals(pid, objInfo.getPid()); + assertEquals(objectCid, objInfo.cid()); + assertEquals(pid, objInfo.pid()); Path objPath = fileHashStoreLinks.getHashStoreLinksDataObjectPath(pid); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java index 276c0b90..150f7114 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverterTest.java @@ -119,14 +119,14 @@ public void convert() throws Exception { sysmetaStream.close(); // Check checksums - Map hexDigests = objInfo.getHexDigests(); + Map hexDigests = objInfo.hexDigests(); assertEquals(md5, hexDigests.get("MD5")); assertEquals(sha1, hexDigests.get("SHA-1")); assertEquals(sha256, hexDigests.get("SHA-256")); assertEquals(sha384, hexDigests.get("SHA-384")); assertEquals(sha512, hexDigests.get("SHA-512")); - assertEquals(sha256, objInfo.getCid()); - assertEquals(pid, objInfo.getPid()); + assertEquals(sha256, objInfo.cid()); + assertEquals(pid, objInfo.pid()); // Metadata is stored directly through 'FileHashStore' // Creation of hard links is confirmed via 'FileHashStoreLinks' @@ -152,7 +152,7 @@ public void convert_checksumAlgorithmIncluded() throws Exception { sysmetaStream.close(); // Check checksums - Map hexDigests = objInfo.getHexDigests(); + Map hexDigests = objInfo.hexDigests(); assertEquals(sha256, hexDigests.get("SHA-256")); } } @@ -177,7 +177,7 @@ public void convert_checksumAlgorithmSupportedButNotFound() throws Exception { sysmetaStream.close(); // Check checksums - Map hexDigests = objInfo.getHexDigests(); + Map hexDigests = objInfo.hexDigests(); assertEquals(md2, hexDigests.get("MD2")); } } From 261467ba2976f541b16658ec45a0573909b1659d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 08:25:47 -0700 Subject: [PATCH 498/553] Refactor 'storeHardLink' method to create InputStream from given filePath and update signature and affected junit tests --- .../FileHashStoreLinks.java | 8 ++------ .../HashStoreConverter.java | 10 ++++------ .../FileHashStoreLinksTest.java | 19 +++++++------------ 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index d051cc74..132fb444 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -78,12 +78,11 @@ public FileHashStoreLinks(Properties hashstoreProperties) * @throws InterruptedException Sync issue when tagging pid and cid */ public ObjectMetadata storeHardLink( - Path filePath, InputStream fileStream, String pid, String checksum, + Path filePath, String pid, String checksum, String checksumAlgorithm) throws NoSuchAlgorithmException, IOException, InterruptedException { // Validate input parameters FileHashStoreUtility.ensureNotNull(filePath, "filePath"); - FileHashStoreUtility.ensureNotNull(fileStream, "fileStream"); FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); FileHashStoreUtility.ensureNotNull(checksum, "checksum"); @@ -94,7 +93,7 @@ public ObjectMetadata storeHardLink( throw new FileNotFoundException(errMsg); } - try { + try (InputStream fileStream = Files.newInputStream(filePath)) { Map hexDigests = generateChecksums(fileStream, checksumAlgorithm); String checksumToMatch = hexDigests.get(checksumAlgorithm); if (!checksum.equalsIgnoreCase(checksumToMatch)) { @@ -130,9 +129,6 @@ public ObjectMetadata storeHardLink( return new ObjectMetadata(pid, objectCid, Files.size(objHardLinkPath), hexDigests); - } finally { - // Close stream - fileStream.close(); } } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 69af9e2b..02fa7f8b 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -84,9 +84,9 @@ public ObjectMetadata convert( FileHashStoreUtility.checkForNotEmptyAndValidString( checksumAlgorithm, "checksumAlgorithm"); - try (InputStream fileStream = Files.newInputStream(filePath)) { - objInfo = fileHashStoreLinks.storeHardLink(filePath, fileStream, pid, checksum, - checksumAlgorithm); + try { + objInfo = + fileHashStoreLinks.storeHardLink(filePath, pid, checksum, checksumAlgorithm); logHashStoreConverter.info("Stored data object for pid: " + pid); } catch (IOException ioe) { @@ -112,10 +112,8 @@ public ObjectMetadata convert( } // Now the sysmeta - try { + try (sysmetaStream) { fileHashStoreLinks.storeMetadata(sysmetaStream, pid); - } finally { - sysmetaStream.close(); } return objInfo; diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 7a05235d..27bc66e9 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -140,10 +140,8 @@ public void storeHardLink() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); assertTrue(Files.exists(testDataFile)); - InputStream dataStream = Files.newInputStream(testDataFile); ObjectMetadata objInfo = - fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid, sha256, "SHA-256"); - dataStream.close(); + fileHashStoreLinks.storeHardLink(testDataFile, pid, sha256, "SHA-256"); // Check id (content identifier based on the store algorithm) String objectCid = testData.pidData.get(pid).get("sha256"); @@ -153,8 +151,10 @@ public void storeHardLink() throws Exception { Path objPath = fileHashStoreLinks.getHashStoreLinksDataObjectPath(pid); // Verify that a hard link has been created - BasicFileAttributes fileAttributes = Files.readAttributes(objPath, BasicFileAttributes.class); - BasicFileAttributes originalFileAttributes = Files.readAttributes(testDataFile, BasicFileAttributes.class); + BasicFileAttributes fileAttributes = + Files.readAttributes(objPath, BasicFileAttributes.class); + BasicFileAttributes originalFileAttributes = + Files.readAttributes(testDataFile, BasicFileAttributes.class); assertEquals(fileAttributes.fileKey(), originalFileAttributes.fileKey()); } } @@ -170,13 +170,8 @@ public void storeHardLink_alreadyExists() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); assertTrue(Files.exists(testDataFile)); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStoreLinks.storeHardLink(testDataFile, dataStream, pid, sha256, "SHA-256"); - dataStream.close(); - InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStoreLinks.storeHardLink(testDataFile, dataStreamTwo, pid + ".test.pid", sha256, - "SHA-256"); - dataStreamTwo.close(); + fileHashStoreLinks.storeHardLink(testDataFile, pid, sha256, "SHA-256"); + fileHashStoreLinks.storeHardLink(testDataFile, pid + ".test.pid", sha256, "SHA-256"); } } From 349a9fb8deaf398fba2fffa2a336641a7628ebb3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 09:15:48 -0700 Subject: [PATCH 499/553] Refactor 'FileHashStore' and 'FileHashStoreLinks' to automate the creation and digestion of 'MessageDigest' objects based on the DefaultHashAlgorithms enum object --- .../filehashstore/FileHashStore.java | 39 ++++++++----------- .../FileHashStoreLinks.java | 38 +++++++++--------- 2 files changed, 35 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 5bb87668..e47fde86 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -96,6 +96,9 @@ public String getName() { } } + // List of default hash algorithms to calculate when storing objects + List digestsToCalculate = new ArrayList<>(); + /** * The two different type of HashStore identifiers */ @@ -188,7 +191,11 @@ public FileHashStore(Properties hashstoreProperties) Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); - logFileHashStore.debug("Created store and store tmp directories."); + // Initialize default hash algorithms + for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { + digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); + } + logFileHashStore.debug("FileHashStore initialized"); } catch (IOException ioe) { logFileHashStore.fatal("Failed to initialize FileHashStore - unable to create" @@ -1419,11 +1426,6 @@ protected Map writeToTmpFileAndGenerateChecksums( } FileOutputStream os = new FileOutputStream(tmpFile); - MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); - MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); - MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); - MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); - MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); MessageDigest additionalAlgo = null; MessageDigest checksumAlgo = null; if (generateAddAlgo) { @@ -1443,11 +1445,9 @@ protected Map writeToTmpFileAndGenerateChecksums( int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { os.write(buffer, 0, bytesRead); - md5.update(buffer, 0, bytesRead); - sha1.update(buffer, 0, bytesRead); - sha256.update(buffer, 0, bytesRead); - sha384.update(buffer, 0, bytesRead); - sha512.update(buffer, 0, bytesRead); + for (MessageDigest digest : digestsToCalculate) { + digest.update(buffer, 0, bytesRead); + } if (generateAddAlgo) { additionalAlgo.update(buffer, 0, bytesRead); } @@ -1469,16 +1469,11 @@ protected Map writeToTmpFileAndGenerateChecksums( // Create map of hash algorithms and corresponding hex digests Map hexDigests = new HashMap<>(); - String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); - String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); - String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); - hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); + for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { + String hexDigest = DatatypeConverter + .printHexBinary(digestsToCalculate.get(algorithm.ordinal()).digest()).toLowerCase(); + hexDigests.put(algorithm.getName(), hexDigest); + } if (generateAddAlgo) { String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); @@ -1491,7 +1486,7 @@ protected Map writeToTmpFileAndGenerateChecksums( } logFileHashStore.debug( "Object has been written to tmpFile: " + tmpFile.getName() + ". To be moved to: " - + sha256Digest); + + hexDigests.get(DefaultHashAlgorithms.SHA_256.getName())); return hexDigests; } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 132fb444..2c4c1365 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -17,7 +17,9 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Properties; @@ -35,6 +37,9 @@ public class FileHashStoreLinks extends FileHashStore { private final String OBJECT_STORE_ALGORITHM; private final Path OBJECT_STORE_DIRECTORY; + // List of default hash algorithms to calculate when storing objects/hard links + List digestsToCalculate = new ArrayList<>(); + /** * Constructor for FireHashStoreLinks. HashStore properties are required. * @@ -61,6 +66,10 @@ public FileHashStoreLinks(Properties hashstoreProperties) DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); + // Initialize default hash algorithms + for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { + digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); + } logFileHashStoreLinks.info("FileHashStoreLinks initialized"); } @@ -169,11 +178,6 @@ protected Map generateChecksums( generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } - MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); - MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); - MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); - MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); - MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); MessageDigest additionalAlgo = null; if (generateAddAlgo) { logFileHashStoreLinks.debug( @@ -186,11 +190,9 @@ protected Map generateChecksums( byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { - md5.update(buffer, 0, bytesRead); - sha1.update(buffer, 0, bytesRead); - sha256.update(buffer, 0, bytesRead); - sha384.update(buffer, 0, bytesRead); - sha512.update(buffer, 0, bytesRead); + for (MessageDigest digest : digestsToCalculate) { + digest.update(buffer, 0, bytesRead); + } if (generateAddAlgo) { additionalAlgo.update(buffer, 0, bytesRead); } @@ -207,21 +209,17 @@ protected Map generateChecksums( // Create map of hash algorithms and corresponding hex digests Map hexDigests = new HashMap<>(); - String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); - String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); - String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); - hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); - hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); + for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { + String hexDigest = DatatypeConverter + .printHexBinary(digestsToCalculate.get(algorithm.ordinal()).digest()).toLowerCase(); + hexDigests.put(algorithm.getName(), hexDigest); + } if (generateAddAlgo) { String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } + logFileHashStoreLinks.debug("Checksums have been calculated."); return hexDigests; } From 29680d2e5d8f6e795fabc20fedcb58ef5dbc4f5a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 09:32:22 -0700 Subject: [PATCH 500/553] Optimize 'writeToTmpFileAndGenerateChecksums' and 'generateChecksums' methods --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 ++------ .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 4 +--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e47fde86..4a582d8d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1432,11 +1432,13 @@ protected Map writeToTmpFileAndGenerateChecksums( logFileHashStore.debug( "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); + digestsToCalculate.add(additionalAlgo); } if (generateCsAlgo) { logFileHashStore.debug( "Adding checksum algorithm to hex digest map, algorithm: " + checksumAlgorithm); checksumAlgo = MessageDigest.getInstance(checksumAlgorithm); + digestsToCalculate.add(checksumAlgo); } // Calculate hex digests @@ -1448,12 +1450,6 @@ protected Map writeToTmpFileAndGenerateChecksums( for (MessageDigest digest : digestsToCalculate) { digest.update(buffer, 0, bytesRead); } - if (generateAddAlgo) { - additionalAlgo.update(buffer, 0, bytesRead); - } - if (generateCsAlgo) { - checksumAlgo.update(buffer, 0, bytesRead); - } } } catch (IOException ioe) { diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 2c4c1365..1c095f4f 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -183,6 +183,7 @@ protected Map generateChecksums( logFileHashStoreLinks.debug( "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); + digestsToCalculate.add(additionalAlgo); } // Calculate hex digests @@ -193,9 +194,6 @@ protected Map generateChecksums( for (MessageDigest digest : digestsToCalculate) { digest.update(buffer, 0, bytesRead); } - if (generateAddAlgo) { - additionalAlgo.update(buffer, 0, bytesRead); - } } } catch (IOException ioe) { From dc6d84dd78116383bdfa035415232b55155156f4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 09:39:49 -0700 Subject: [PATCH 501/553] Make newly added list 'digestsToCalculate' private --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 3 ++- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4a582d8d..a8ca9a91 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -97,7 +97,8 @@ public String getName() { } // List of default hash algorithms to calculate when storing objects - List digestsToCalculate = new ArrayList<>(); + // This list is not final, if additional algorithms are requested we may add to it + private List digestsToCalculate = new ArrayList<>(); /** * The two different type of HashStore identifiers diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 1c095f4f..6173ec5b 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -38,7 +38,8 @@ public class FileHashStoreLinks extends FileHashStore { private final Path OBJECT_STORE_DIRECTORY; // List of default hash algorithms to calculate when storing objects/hard links - List digestsToCalculate = new ArrayList<>(); + // This list is not final, if additional algorithms are requested we may add to it + private List digestsToCalculate = new ArrayList<>(); /** * Constructor for FireHashStoreLinks. HashStore properties are required. @@ -77,7 +78,6 @@ public FileHashStoreLinks(Properties hashstoreProperties) * Store a hard link to HashStore from an existing data object in the filesystem. * * @param filePath Path to the source file which a hard link will be created for - * @param fileStream Stream to the source file to calculate checksums for * @param pid Persistent or authority-based identifier for tagging * @param checksum Value of checksum * @param checksumAlgorithm Ex. "SHA-256" @@ -191,7 +191,7 @@ protected Map generateChecksums( byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { - for (MessageDigest digest : digestsToCalculate) { + forgi (MessageDigest digest : digestsToCalculate) { digest.update(buffer, 0, bytesRead); } } From 7f2b31548c5750e430dd27be8b770e16663ff28d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 09:42:09 -0700 Subject: [PATCH 502/553] Further optimize code to generate checksums by using try-resources on InputStream object --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 3 +-- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a8ca9a91..1200fd00 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1443,7 +1443,7 @@ protected Map writeToTmpFileAndGenerateChecksums( } // Calculate hex digests - try { + try (dataStream) { byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { @@ -1459,7 +1459,6 @@ protected Map writeToTmpFileAndGenerateChecksums( throw ioe; } finally { - dataStream.close(); os.flush(); os.close(); } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 6173ec5b..f04dd703 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -187,11 +187,11 @@ protected Map generateChecksums( } // Calculate hex digests - try { + try (dataStream) { byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { - forgi (MessageDigest digest : digestsToCalculate) { + for (MessageDigest digest : digestsToCalculate) { digest.update(buffer, 0, bytesRead); } } @@ -201,8 +201,6 @@ protected Map generateChecksums( logFileHashStoreLinks.error(errMsg); throw ioe; - } finally { - dataStream.close(); } // Create map of hash algorithms and corresponding hex digests From 465540254ee90a3012076a5534de0e87db6b5061 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 10:38:46 -0700 Subject: [PATCH 503/553] Fix bug in '...Checksums' optimization by creating a local list and modifying it, rather than using a class variable and accessing it directly --- .../hashstore/filehashstore/FileHashStore.java | 8 ++++---- .../hashstoreconverter/FileHashStoreLinks.java | 13 +++++-------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1200fd00..9c53fb09 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -96,9 +96,8 @@ public String getName() { } } - // List of default hash algorithms to calculate when storing objects - // This list is not final, if additional algorithms are requested we may add to it - private List digestsToCalculate = new ArrayList<>(); + // List of default hash algorithms to calculate when storing objects/hard links + protected List defaultHexDigestsList = new ArrayList<>(); /** * The two different type of HashStore identifiers @@ -194,7 +193,7 @@ public FileHashStore(Properties hashstoreProperties) Files.createDirectories(REFS_CID_FILE_DIRECTORY); // Initialize default hash algorithms for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); + defaultHexDigestsList.add(MessageDigest.getInstance(algorithm.getName())); } logFileHashStore.debug("FileHashStore initialized"); @@ -1410,6 +1409,7 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor protected Map writeToTmpFileAndGenerateChecksums( File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { + List digestsToCalculate = defaultHexDigestsList; // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index f04dd703..a3732f47 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -37,10 +37,6 @@ public class FileHashStoreLinks extends FileHashStore { private final String OBJECT_STORE_ALGORITHM; private final Path OBJECT_STORE_DIRECTORY; - // List of default hash algorithms to calculate when storing objects/hard links - // This list is not final, if additional algorithms are requested we may add to it - private List digestsToCalculate = new ArrayList<>(); - /** * Constructor for FireHashStoreLinks. HashStore properties are required. * @@ -67,10 +63,6 @@ public FileHashStoreLinks(Properties hashstoreProperties) DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); - // Initialize default hash algorithms - for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); - } logFileHashStoreLinks.info("FileHashStoreLinks initialized"); } @@ -171,6 +163,11 @@ protected Path getHashStoreLinksDataObjectPath(String pid) protected Map generateChecksums( InputStream dataStream, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, SecurityException { + List digestsToCalculate = defaultHexDigestsList; + // Initialize default hash algorithms + for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { + digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); + } // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { From 3fcf3e8b25a67c72b43c149c5454cbcc316c3887 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 10:43:44 -0700 Subject: [PATCH 504/553] Revise and add new junit tests related to '...Checksums' methods --- .../FileHashStoreProtectedTest.java | 8 +++ .../FileHashStoreLinksTest.java | 55 +++++++++++++------ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 0145fce0..ea0d8cc7 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -825,6 +825,8 @@ public void writeToTmpFileAndGenerateChecksums() throws Exception { assertEquals(sha256, hexDigests.get("SHA-256")); assertEquals(sha384, hexDigests.get("SHA-384")); assertEquals(sha512, hexDigests.get("SHA-512")); + + assertEquals(hexDigests.size(), 5); } } } @@ -877,6 +879,8 @@ public void writeToTmpFileAndGenerateChecksums_addAlgo() throws Exception { // Validate checksum values String md2 = testData.pidData.get(pid).get("md2"); assertEquals(md2, hexDigests.get("MD2")); + + assertEquals(hexDigests.size(), 6); } } } @@ -903,6 +907,8 @@ public void writeToTmpFileAndGenerateChecksums_checksumAlgo() throws Exception { // Validate checksum values String sha512224 = testData.pidData.get(pid).get("sha512-224"); assertEquals(sha512224, hexDigests.get("SHA-512/224")); + + assertEquals(hexDigests.size(), 6); } } } @@ -932,6 +938,8 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce String sha512224 = testData.pidData.get(pid).get("sha512-224"); assertEquals(md2, hexDigests.get("MD2")); assertEquals(sha512224, hexDigests.get("SHA-512/224")); + + assertEquals(hexDigests.size(), 7); } } } diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 27bc66e9..7453fcb0 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -185,24 +185,43 @@ public void testGenerateChecksums() throws Exception { // Get test file Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = - fileHashStoreLinks.generateChecksums(dataStream, "MD2"); - dataStream.close(); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md2, hexDigests.get("MD2")); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStoreLinks.generateChecksums(dataStream, "MD2"); + + String md2 = testData.pidData.get(pid).get("md2"); + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md2, hexDigests.get("MD2")); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + + assertEquals(hexDigests.size(), 6); + } + } + } + + /** + * Confirm that generateChecksums returns the default amount of checksums + */ + @Test + public void testGenerateChecksums_defaultChecksumsFound() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + // Get test file + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStoreLinks.generateChecksums(dataStream, null); + assertEquals(hexDigests.size(), 5); + } } } } From 5c69d339354f39ee9b8224618c9b26b1ce2eb96b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 12:51:16 -0700 Subject: [PATCH 505/553] Change previously public access modifiers for FileHashStore class variables to protected --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9c53fb09..d9b8621d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -82,7 +82,7 @@ public class FileHashStore implements HashStore { /** * The default hash algorithms included in the ObjectMetadata when storing objects. */ - public enum DefaultHashAlgorithms { + protected enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); final String algoName; @@ -102,21 +102,21 @@ public String getName() { /** * The two different type of HashStore identifiers */ - public enum HashStoreIdTypes { + protected enum HashStoreIdTypes { cid, pid } /** * The configuration properties for a HashStore */ - public enum HashStoreProperties { + protected enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } /** * When working with refs files, we either add or remove values */ - enum HashStoreRefUpdateTypes { + protected enum HashStoreRefUpdateTypes { add, remove } @@ -130,8 +130,8 @@ enum HashStoreRefUpdateTypes { * @param pidRefsPath Path to the pid's that references the data object * @param sysmetaPath Path to the pid's system metadata if available */ - record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, - String sysmetaPath) { + protected record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, + String pidRefsPath, String sysmetaPath) { } /** From 530e48dcb230cbadd1d1f245d4a6460c5b28eb7f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Mon, 19 Aug 2024 12:54:10 -0700 Subject: [PATCH 506/553] Rename 'defaultHexDigestsList' to 'defaultMessageDigestsList' and revise comments --- .../dataone/hashstore/filehashstore/FileHashStore.java | 9 +++++---- .../hashstore/hashstoreconverter/FileHashStoreLinks.java | 5 ++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index d9b8621d..dc829cbc 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -97,7 +97,7 @@ public String getName() { } // List of default hash algorithms to calculate when storing objects/hard links - protected List defaultHexDigestsList = new ArrayList<>(); + protected List defaultMessageDigestsList = new ArrayList<>(); /** * The two different type of HashStore identifiers @@ -191,9 +191,9 @@ public FileHashStore(Properties hashstoreProperties) Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); - // Initialize default hash algorithms + // Initialize default hash algorithms to calculate checksums for for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - defaultHexDigestsList.add(MessageDigest.getInstance(algorithm.getName())); + defaultMessageDigestsList.add(MessageDigest.getInstance(algorithm.getName())); } logFileHashStore.debug("FileHashStore initialized"); @@ -1409,7 +1409,8 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor protected Map writeToTmpFileAndGenerateChecksums( File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { - List digestsToCalculate = defaultHexDigestsList; + // Get the default hash algorithms to calculate checksums for + List digestsToCalculate = defaultMessageDigestsList; // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index a3732f47..7c9bfee1 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -17,7 +17,6 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -163,8 +162,8 @@ protected Path getHashStoreLinksDataObjectPath(String pid) protected Map generateChecksums( InputStream dataStream, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, SecurityException { - List digestsToCalculate = defaultHexDigestsList; - // Initialize default hash algorithms + List digestsToCalculate = defaultMessageDigestsList; + // Get the default hash algorithms to calculate checksums for for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); } From 5f8941ab9bba7ecb3d06be864ac4aa5ad3a1092a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 09:57:56 -0700 Subject: [PATCH 507/553] Remove unused import in 'HashStoreConverter' --- .../dataone/hashstore/hashstoreconverter/HashStoreConverter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 02fa7f8b..9df4f215 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.io.InputStream; -import java.nio.file.Files; import java.nio.file.Path; import java.security.NoSuchAlgorithmException; import java.util.Properties; From b5ed3b19626389123f6391f2fcfff0ea09db4cf0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 15:18:13 -0700 Subject: [PATCH 508/553] Update junit test debug messaging for better accuracy (.getMessage instead of .getCause) --- .../filehashstore/FileHashStoreInterfaceTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 33eee518..303af320 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -580,7 +580,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } } catch (Exception e) { System.out.println( - "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getMessage()); assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } @@ -602,7 +602,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } } catch (Exception e) { System.out.println( - "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getMessage()); assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } @@ -624,7 +624,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } } catch (Exception e) { System.out.println( - "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getMessage()); assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } @@ -646,7 +646,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } } catch (Exception e) { System.out.println( - "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getMessage()); assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } @@ -668,7 +668,7 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { } } catch (Exception e) { System.out.println( - "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getMessage()); assertTrue(e instanceof RuntimeException | e instanceof HashStoreRefsAlreadyExistException); } From 2119efb0e3aa6eb0fb74516d47efade03f92259c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 15:18:35 -0700 Subject: [PATCH 509/553] Roll back optimizations to the default hash algorithm calculation process --- .../filehashstore/FileHashStore.java | 53 +++++++++++-------- .../FileHashStoreLinks.java | 36 ++++++++----- 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index dc829cbc..605460c9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -96,9 +96,6 @@ public String getName() { } } - // List of default hash algorithms to calculate when storing objects/hard links - protected List defaultMessageDigestsList = new ArrayList<>(); - /** * The two different type of HashStore identifiers */ @@ -191,10 +188,6 @@ public FileHashStore(Properties hashstoreProperties) Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); - // Initialize default hash algorithms to calculate checksums for - for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - defaultMessageDigestsList.add(MessageDigest.getInstance(algorithm.getName())); - } logFileHashStore.debug("FileHashStore initialized"); } catch (IOException ioe) { @@ -1409,8 +1402,6 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor protected Map writeToTmpFileAndGenerateChecksums( File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { - // Get the default hash algorithms to calculate checksums for - List digestsToCalculate = defaultMessageDigestsList; // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { @@ -1428,19 +1419,22 @@ protected Map writeToTmpFileAndGenerateChecksums( } FileOutputStream os = new FileOutputStream(tmpFile); + MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); + MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); + MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); + MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); + MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); MessageDigest additionalAlgo = null; MessageDigest checksumAlgo = null; if (generateAddAlgo) { logFileHashStore.debug( "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); - digestsToCalculate.add(additionalAlgo); } if (generateCsAlgo) { logFileHashStore.debug( "Adding checksum algorithm to hex digest map, algorithm: " + checksumAlgorithm); checksumAlgo = MessageDigest.getInstance(checksumAlgorithm); - digestsToCalculate.add(checksumAlgo); } // Calculate hex digests @@ -1449,8 +1443,16 @@ protected Map writeToTmpFileAndGenerateChecksums( int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { os.write(buffer, 0, bytesRead); - for (MessageDigest digest : digestsToCalculate) { - digest.update(buffer, 0, bytesRead); + md5.update(buffer, 0, bytesRead); + sha1.update(buffer, 0, bytesRead); + sha256.update(buffer, 0, bytesRead); + sha384.update(buffer, 0, bytesRead); + sha512.update(buffer, 0, bytesRead); + if (generateAddAlgo) { + additionalAlgo.update(buffer, 0, bytesRead); + } + if (generateCsAlgo) { + checksumAlgo.update(buffer, 0, bytesRead); } } @@ -1466,24 +1468,29 @@ protected Map writeToTmpFileAndGenerateChecksums( // Create map of hash algorithms and corresponding hex digests Map hexDigests = new HashMap<>(); - for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - String hexDigest = DatatypeConverter - .printHexBinary(digestsToCalculate.get(algorithm.ordinal()).digest()).toLowerCase(); - hexDigests.put(algorithm.getName(), hexDigest); - } + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); + String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); + String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); + hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); if (generateAddAlgo) { - String extraAlgoDigest = - DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); + String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) + .toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } if (generateCsAlgo) { - String extraChecksumDigest = - DatatypeConverter.printHexBinary(checksumAlgo.digest()).toLowerCase(); + String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) + .toLowerCase(); hexDigests.put(checksumAlgorithm, extraChecksumDigest); } logFileHashStore.debug( "Object has been written to tmpFile: " + tmpFile.getName() + ". To be moved to: " - + hexDigests.get(DefaultHashAlgorithms.SHA_256.getName())); + + sha256Digest); return hexDigests; } diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java index 7c9bfee1..084f2f96 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinks.java @@ -162,11 +162,6 @@ protected Path getHashStoreLinksDataObjectPath(String pid) protected Map generateChecksums( InputStream dataStream, String additionalAlgorithm) throws NoSuchAlgorithmException, IOException, SecurityException { - List digestsToCalculate = defaultMessageDigestsList; - // Get the default hash algorithms to calculate checksums for - for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - digestsToCalculate.add(MessageDigest.getInstance(algorithm.getName())); - } // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { @@ -174,21 +169,29 @@ protected Map generateChecksums( generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } + MessageDigest md5 = MessageDigest.getInstance(DefaultHashAlgorithms.MD5.getName()); + MessageDigest sha1 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_1.getName()); + MessageDigest sha256 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_256.getName()); + MessageDigest sha384 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_384.getName()); + MessageDigest sha512 = MessageDigest.getInstance(DefaultHashAlgorithms.SHA_512.getName()); MessageDigest additionalAlgo = null; if (generateAddAlgo) { logFileHashStoreLinks.debug( "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); - digestsToCalculate.add(additionalAlgo); } - // Calculate hex digests try (dataStream) { byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { - for (MessageDigest digest : digestsToCalculate) { - digest.update(buffer, 0, bytesRead); + md5.update(buffer, 0, bytesRead); + sha1.update(buffer, 0, bytesRead); + sha256.update(buffer, 0, bytesRead); + sha384.update(buffer, 0, bytesRead); + sha512.update(buffer, 0, bytesRead); + if (generateAddAlgo) { + additionalAlgo.update(buffer, 0, bytesRead); } } @@ -201,11 +204,16 @@ protected Map generateChecksums( // Create map of hash algorithms and corresponding hex digests Map hexDigests = new HashMap<>(); - for (DefaultHashAlgorithms algorithm : DefaultHashAlgorithms.values()) { - String hexDigest = DatatypeConverter - .printHexBinary(digestsToCalculate.get(algorithm.ordinal()).digest()).toLowerCase(); - hexDigests.put(algorithm.getName(), hexDigest); - } + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + String sha1Digest = DatatypeConverter.printHexBinary(sha1.digest()).toLowerCase(); + String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha384Digest = DatatypeConverter.printHexBinary(sha384.digest()).toLowerCase(); + String sha512Digest = DatatypeConverter.printHexBinary(sha512.digest()).toLowerCase(); + hexDigests.put(DefaultHashAlgorithms.MD5.getName(), md5Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_1.getName(), sha1Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_256.getName(), sha256Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); + hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); if (generateAddAlgo) { String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); From 14ded555b4523f3014eed447f8aac2429a39aca1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 15:27:34 -0700 Subject: [PATCH 510/553] Re-apply IntelliJ auto formatting --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 605460c9..90e3b22a 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1479,13 +1479,13 @@ protected Map writeToTmpFileAndGenerateChecksums( hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); if (generateAddAlgo) { - String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) - .toLowerCase(); + String extraAlgoDigest = + DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } if (generateCsAlgo) { - String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) - .toLowerCase(); + String extraChecksumDigest = + DatatypeConverter.printHexBinary(checksumAlgo.digest()).toLowerCase(); hexDigests.put(checksumAlgorithm, extraChecksumDigest); } logFileHashStore.debug( From 87fd9e6b30ae5b36e98cae7ecd730362ebf53ea7 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 16:09:38 -0700 Subject: [PATCH 511/553] Declare 'NonMatchingChecksumException' in HashStoreConverter's 'convert' method and add new junit test --- .../HashStoreConverter.java | 11 +++++--- .../FileHashStoreLinksTest.java | 26 ++++++++++++++++--- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java index 9df4f215..34ef6407 100644 --- a/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java +++ b/src/main/java/org/dataone/hashstore/hashstoreconverter/HashStoreConverter.java @@ -3,6 +3,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import java.io.IOException; @@ -61,14 +62,16 @@ public HashStoreConverter(Properties hashstoreProperties) * @param checksum Value of checksum * @param checksumAlgorithm Ex. "SHA-256" * @return ObjectMetadata for the given pid - * @throws IOException An issue with calculating checksums or storing sysmeta - * @throws NoSuchAlgorithmException An algorithm defined is not supported - * @throws InterruptedException Issue with synchronizing storing metadata + * @throws IOException An issue with calculating checksums or storing sysmeta + * @throws NoSuchAlgorithmException An algorithm defined is not supported + * @throws InterruptedException Issue with synchronizing storing metadata + * @throws NonMatchingChecksumException When the checksums calculated/given do not match */ public ObjectMetadata convert( Path filePath, String pid, InputStream sysmetaStream, String checksum, String checksumAlgorithm) - throws IOException, NoSuchAlgorithmException, InterruptedException { + throws IOException, NoSuchAlgorithmException, InterruptedException, + NonMatchingChecksumException { logHashStoreConverter.info("Begin converting data object and sysmeta for pid: " + pid); FileHashStoreUtility.ensureNotNull(sysmetaStream, "sysmetaStream"); FileHashStoreUtility.ensureNotNull(pid, "pid"); diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 7453fcb0..0aae09dd 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -1,6 +1,7 @@ package org.dataone.hashstore.hashstoreconverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.filehashstore.FileHashStore; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -16,8 +17,10 @@ import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; + import org.dataone.hashstore.testdata.TestDataHarness; /** @@ -56,8 +59,7 @@ public void initializeFileHashStoreLinks() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fileHashStoreLinks = new FileHashStoreLinks(storeProperties); @@ -123,8 +125,7 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); } @@ -175,6 +176,23 @@ public void storeHardLink_alreadyExists() throws Exception { } } + /** + * Check that storeHardLink creates hard link and returns the correct ObjectMetadata cid + */ + @Test + public void storeHardLink_nonMatchingChecksum() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + assertTrue(Files.exists(testDataFile)); + + assertThrows(NonMatchingChecksumException.class, + () -> fileHashStoreLinks.storeHardLink(testDataFile, pid, "badchecksum", + "SHA-256")); + + } + } + /** * Confirm that generateChecksums calculates checksums as expected */ From 6e4185194fbe88016f6e4597a4ca7675d5c46cea Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 16:13:43 -0700 Subject: [PATCH 512/553] Fix inaccurate javadoc for new junit test --- .../hashstore/hashstoreconverter/FileHashStoreLinksTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 0aae09dd..6472c6b7 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -177,7 +177,7 @@ public void storeHardLink_alreadyExists() throws Exception { } /** - * Check that storeHardLink creates hard link and returns the correct ObjectMetadata cid + * Check that storeHardLink throws nonMatchingChecksumException when values do not match */ @Test public void storeHardLink_nonMatchingChecksum() throws Exception { From 8c865a8a81eb70946fc9f9510241337613afcf8b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 20 Aug 2024 16:14:34 -0700 Subject: [PATCH 513/553] Cleanup 'FileHashStoreLinksTest' class --- .../hashstore/hashstoreconverter/FileHashStoreLinksTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java index 6472c6b7..94f9b1f4 100644 --- a/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java +++ b/src/test/java/org/dataone/hashstore/hashstoreconverter/FileHashStoreLinksTest.java @@ -180,7 +180,7 @@ public void storeHardLink_alreadyExists() throws Exception { * Check that storeHardLink throws nonMatchingChecksumException when values do not match */ @Test - public void storeHardLink_nonMatchingChecksum() throws Exception { + public void storeHardLink_nonMatchingChecksum() { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); From 893df5ef89eeaf4f853449045c35972fdcd012ab Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 09:59:00 -0700 Subject: [PATCH 514/553] Remove 'unTagObject' call from 'tagObject' method --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 90e3b22a..8d95a476 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -550,13 +550,13 @@ public void tagObject(String pid, String cid) String errMsg = "pid: " + pid + " already references another cid." + " A pid can only reference one cid."; throw new PidRefsFileExistsException(errMsg); - - } catch (Exception e) { - // cid and pid has been released - // Revert the process for all other exceptions - unTagObject(pid, cid); - throw e; } +// } catch (Exception e) { +// // cid and pid has been released +// // Revert the process for all other exceptions +// unTagObject(pid, cid); +// throw e; +// } } @Override From 2bbdb15a6803147edf5e79fd69e32db87537d61c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 10:17:12 -0700 Subject: [PATCH 515/553] Refactor 'storeHashStoreRefsFiles' to 'unTagObject' --- .../filehashstore/FileHashStore.java | 130 ++++++++++-------- 1 file changed, 70 insertions(+), 60 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8d95a476..3666b704 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1621,74 +1621,84 @@ protected void storeHashStoreRefsFiles(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Confirm that reference files are where they are expected to be - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - // We throw an exception so the client is aware that everything is in place - String errMsg = - "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; - logFileHashStore.error(errMsg); - throw new HashStoreRefsAlreadyExistException(errMsg); - - } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { - // If pid refs exists, it can only contain and reference one cid - // First, compare the cid retrieved from the pid refs file from the supplied cid - String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); - if (retrievedCid.equalsIgnoreCase(cid)) { - // The pid correctly references the cid, but the cid refs file is missing - // Create the file and verify tagging process - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + try { + if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Confirm that reference files are where they are expected to be verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid - + " is missing. Missing cid refs file created and tagging completed."); - return; - } else { - // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = - getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); - if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( - pid, retrievedAbsCidRefsPath)) { - // This pid is accounted for and tagged as expected. - String errMsg = "Pid refs file already exists for pid: " + pid - + ", and the associated cid refs file contains the " - + "pid. A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); + // We throw an exception so the client is aware that everything is in place + String errMsg = + "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { + // If pid refs exists, it can only contain and reference one cid + // First, compare the cid retrieved from the pid refs file from the supplied cid + String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); + if (retrievedCid.equalsIgnoreCase(cid)) { + // The pid correctly references the cid, but the cid refs file is missing + // Create the file and verify tagging process + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid + + " is missing. Missing cid refs file created and tagging completed."); + return; + } else { + // Check if the retrieved cid refs file exists and pid is referenced + Path retrievedAbsCidRefsPath = + getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( + pid, retrievedAbsCidRefsPath)) { + // This pid is accounted for and tagged as expected. + String errMsg = "Pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } + // Orphaned pid refs file found, the retrieved cid refs file exists + // but doesn't contain the pid. Proceed to overwrite the pid refs file. } - // Orphaned pid refs file found, the retrieved cid refs file exists - // but doesn't contain the pid. Proceed to overwrite the pid refs file. - } - } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Only update cid refs file if pid is not in the file - if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Only update cid refs file if pid is not in the file + if (!isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); + } + // Get the pid refs file and verify tagging process + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File absPathPidRefsFile = absPidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info("Object with cid: " + cid + + " has been updated and tagged successfully with pid: " + + pid); + return; } - // Get the pid refs file and verify tagging process + + // Get pid and cid refs files File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); + // Move refs files to permanent location File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws an exception if there's an issue verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info("Object with cid: " + cid - + " has been updated and tagged successfully with pid: " - + pid); - return; + logFileHashStore.info( + "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); + } catch (HashStoreRefsAlreadyExistException | PidRefsFileExistsException hse) { + // These exceptions are handled by this method and should be re-thrown + throw hse; + + } catch (Exception e) { + // Revert the process for all other exceptions + unTagObject(pid, cid); + throw e; } - - // Get pid and cid refs files - File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); - // Move refs files to permanent location - File absPathPidRefsFile = absPidRefsPath.toFile(); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws an exception if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); } finally { releaseObjectLockedCids(cid); releaseReferenceLockedPids(pid); From eba6b939792a008e6bb4da84c6e3ac6b4dec22dc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 10:57:15 -0700 Subject: [PATCH 516/553] Add new custom exception 'IdentifierNotLockedException' --- .../exceptions/IdentifierNotLockedException.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/main/java/org/dataone/hashstore/exceptions/IdentifierNotLockedException.java diff --git a/src/main/java/org/dataone/hashstore/exceptions/IdentifierNotLockedException.java b/src/main/java/org/dataone/hashstore/exceptions/IdentifierNotLockedException.java new file mode 100644 index 00000000..21322d13 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/IdentifierNotLockedException.java @@ -0,0 +1,12 @@ +package org.dataone.hashstore.exceptions; + +/** + * This exception is thrown when an identifier is not locked, breaking thread safety. + */ +public class IdentifierNotLockedException extends RuntimeException { + + public IdentifierNotLockedException(String message) { + super(message); + } + +} From cb569349295035de811ca774baade6561eea3cfd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 11:05:34 -0700 Subject: [PATCH 517/553] Refactor 'unTagObject' with guard rails to ensure thread safety, revise access modifier for sync methods to be protected to assist with testing --- .../filehashstore/FileHashStore.java | 191 +++++++++--------- 1 file changed, 100 insertions(+), 91 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3666b704..8826ee96 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -38,6 +38,7 @@ import org.dataone.hashstore.HashStore; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.IdentifierNotLockedException; import org.dataone.hashstore.exceptions.MissingHexDigestsException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; @@ -1711,9 +1712,11 @@ protected void storeHashStoreRefsFiles(String pid, String cid) * * @param pid Persistent or authority-based identifier * @param cid Content identifier of data object - * @throws InterruptedException When there is a synchronization issue - * @throws NoSuchAlgorithmException When there is an algorithm used that is not supported - * @throws IOException When there is an issue deleting refs files + * @throws InterruptedException When there is a synchronization issue + * @throws NoSuchAlgorithmException When there is an algorithm used that is not supported + * @throws IOException When there is an issue deleting refs files + * @throws IdentifierNotLockedException When called to untag a pid or cid that is not currently + * locked, breaking thread safety */ protected void unTagObject(String pid, String cid) throws InterruptedException, NoSuchAlgorithmException, IOException { @@ -1725,101 +1728,107 @@ protected void unTagObject(String pid, String cid) Collection deleteList = new ArrayList<>(); + // To untag a pid, the pid must be found and currently locked + // The pid will not be released until this process is over + if (!referenceLockedPids.contains(pid)) { + String errMsg = "Cannot untag pid that is not currently locked"; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); + } + + // Before we begin untagging process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. try { - synchronizeObjectLockedPids(pid); - // Before we begin untagging process, we look for the `cid` by calling - // `findObject` which will throw custom exceptions if there is an issue with - // the reference files, which help us determine the path to proceed with. - try { - ObjectInfo objInfo = findObject(pid); - cid = objInfo.cid(); - try { - // If no exceptions are thrown, we proceed to synchronization based on the `cid` - synchronizeObjectLockedCids(cid); - // Get paths to reference files to work on - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + ObjectInfo objInfo = findObject(pid); + cid = objInfo.cid(); - // Begin deletion process - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } else { - String warnMsg = "Cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object " + "deletion."; - logFileHashStore.warn(warnMsg); - } - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); + // We must confirm that we are working on a cid that is locked + // If not, this means that this call is not thread safe. + // This `cid` will be released by the calling method. + if (!objectLockedCids.contains(cid)) { + String errMsg = "Cannot untag cid that is not currently locked"; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); + } - } finally { - releaseObjectLockedCids(cid); - } + // Get paths to reference files to work on + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - } catch (OrphanPidRefsFileException oprfe) { - // `findObject` throws this exception when the cid refs file doesn't exist, - // so we only need to delete the pid refs file - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = "Cid refs file does not exist for pid: " + pid - + ". Deleted orphan pid refs file."; + // Begin deletion process + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } else { + String warnMsg = "Cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + "deletion."; logFileHashStore.warn(warnMsg); + } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); - } catch (OrphanRefsFilesException orfe) { - // `findObject` throws this exception when the pid and cid refs file exists, - // but the actual object being referenced by the pid does not exist - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + } catch (OrphanPidRefsFileException oprfe) { + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); - try { - // Since we must access the cid reference file, the `cid` must be synchronized - synchronizeObjectLockedCids(cidRead); + } catch (OrphanRefsFilesException orfe) { + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = "Object with cid: " + cidRead - + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted pid and cid ref files."; - logFileHashStore.warn(warnMsg); + try { + // Since we must access the cid reference file, the `cid` must be synchronized + synchronizeObjectLockedCids(cidRead); - } finally { - releaseObjectLockedCids(cidRead); + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } - } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // `findObject` throws this exception when both the pid and cid refs file exists - // but the pid is not found in the cid refs file. - - // Rename pid refs file for deletion - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); // Delete items FileHashStoreUtility.deleteListItems(deleteList); - String warnMsg = "Pid not found in expected cid refs file for pid: " + pid - + ". Deleted orphan pid refs file."; + String warnMsg = "Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files."; logFileHashStore.warn(warnMsg); - } catch (PidRefsFileNotFoundException prfnfe) { - // `findObject` throws this exception if the pid refs file is not found - // Check to see if pid is in the `cid refs file`and attempt to remove it - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - String errMsg = "Pid refs file not found, removed pid found in cid refs file: " - + absCidRefsPath; - logFileHashStore.warn(errMsg); - } + + } finally { + releaseObjectLockedCids(cidRead); + } + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. + + // Rename pid refs file for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + } catch (PidRefsFileNotFoundException prfnfe) { + // `findObject` throws this exception if the pid refs file is not found + // Check to see if pid is in the `cid refs file`and attempt to remove it + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + String errMsg = "Pid refs file not found, removed pid found in cid refs file: " + + absCidRefsPath; + logFileHashStore.warn(errMsg); } - } finally { - releaseObjectLockedPids(pid); } } @@ -2186,7 +2195,7 @@ protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private static void synchronizeObjectLockedPids(String pid) throws InterruptedException { + private void synchronizeObjectLockedPids(String pid) throws InterruptedException { synchronized (objectLockedPids) { while (objectLockedPids.contains(pid)) { try { @@ -2209,7 +2218,7 @@ private static void synchronizeObjectLockedPids(String pid) throws InterruptedEx * * @param pid Content identifier */ - private static void releaseObjectLockedPids(String pid) { + private void releaseObjectLockedPids(String pid) { synchronized (objectLockedPids) { logFileHashStore.debug("Releasing objectLockedPids for pid: " + pid); objectLockedPids.remove(pid); @@ -2223,7 +2232,7 @@ private static void releaseObjectLockedPids(String pid) { * @param metadataDocId Metadata document id hash(pid+formatId) * @throws InterruptedException When an issue occurs when attempting to sync the metadata doc */ - private static void synchronizeMetadataLockedDocIds(String metadataDocId) + private void synchronizeMetadataLockedDocIds(String metadataDocId) throws InterruptedException { synchronized (metadataLockedDocIds) { while (metadataLockedDocIds.contains(metadataDocId)) { @@ -2249,7 +2258,7 @@ private static void synchronizeMetadataLockedDocIds(String metadataDocId) * * @param metadataDocId Metadata document id hash(pid+formatId) */ - private static void releaseMetadataLockedDocIds(String metadataDocId) { + private void releaseMetadataLockedDocIds(String metadataDocId) { synchronized (metadataLockedDocIds) { logFileHashStore.debug( "Releasing metadataLockedDocIds for metadata doc: " + metadataDocId); @@ -2266,7 +2275,7 @@ private static void releaseMetadataLockedDocIds(String metadataDocId) { * @param cid Content identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private static void synchronizeObjectLockedCids(String cid) throws InterruptedException { + protected void synchronizeObjectLockedCids(String cid) throws InterruptedException { synchronized (objectLockedCids) { while (objectLockedCids.contains(cid)) { try { @@ -2289,7 +2298,7 @@ private static void synchronizeObjectLockedCids(String cid) throws InterruptedEx * * @param cid Content identifier */ - private static void releaseObjectLockedCids(String cid) { + protected void releaseObjectLockedCids(String cid) { synchronized (objectLockedCids) { logFileHashStore.debug("Releasing objectLockedCids for cid: " + cid); objectLockedCids.remove(cid); @@ -2305,7 +2314,7 @@ private static void releaseObjectLockedCids(String cid) { * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private static void synchronizeReferenceLockedPids(String pid) throws InterruptedException { + protected void synchronizeReferenceLockedPids(String pid) throws InterruptedException { synchronized (referenceLockedPids) { while (referenceLockedPids.contains(pid)) { try { @@ -2328,7 +2337,7 @@ private static void synchronizeReferenceLockedPids(String pid) throws Interrupte * * @param pid Persistent or authority-based identifier */ - private static void releaseReferenceLockedPids(String pid) { + protected void releaseReferenceLockedPids(String pid) { synchronized (referenceLockedPids) { logFileHashStore.debug("Releasing referenceLockedPids for pid: " + pid); referenceLockedPids.remove(pid); From 6bb60de5be3529c9b27015d4d3029a5e84c8c6e3 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 14:15:09 -0700 Subject: [PATCH 518/553] Refactor junit tests for 'unTagObject' after making changes --- .../FileHashStoreProtectedTest.java | 218 +++++++++++++----- 1 file changed, 162 insertions(+), 56 deletions(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index ea0d8cc7..4232c684 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -30,6 +30,7 @@ import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.IdentifierNotLockedException; import org.dataone.hashstore.exceptions.NonMatchingChecksumException; import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; @@ -1266,19 +1267,64 @@ public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { */ @Test public void unTagObject() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - fileHashStore.unTagObject(pid, cid); + String cid = objInfo.cid(); + fileHashStore.synchronizeReferenceLockedPids(pid); + fileHashStore.synchronizeObjectLockedCids(cid); - // Confirm refs files do not exist - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); - assertFalse(Files.exists(absCidRefsPath)); - assertFalse(Files.exists(absPidRefsPath)); + fileHashStore.unTagObject(pid, cid); + + fileHashStore.releaseReferenceLockedPids(pid); + fileHashStore.releaseObjectLockedCids(cid); + + // Confirm refs files do not exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertFalse(Files.exists(absCidRefsPath)); + assertFalse(Files.exists(absPidRefsPath)); + } + } + } + + /** + * Confirm IdentifierNotLockedException is thrown when pid is not locked + */ + @Test + public void unTagObject_pid_IdentifierNotLockedException() { + for (String pid : testData.pidList) { + assertThrows( + IdentifierNotLockedException.class, () -> fileHashStore.unTagObject(pid, "cid")); + } + } + + /** + * Confirm IdentifierNotLockedException is thrown when cid is not locked + */ + @Test + public void unTagObject_cid_IdentifierNotLockedException() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + // The object must be stored otherwise the unTag process cannot execute as expected + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } + + fileHashStore.synchronizeReferenceLockedPids(pid); + assertThrows( + IdentifierNotLockedException.class, () -> fileHashStore.unTagObject(pid, "cid")); + fileHashStore.releaseReferenceLockedPids(pid); + } } /** @@ -1287,23 +1333,38 @@ public void unTagObject() throws Exception { */ @Test public void unTagObject_cidWithMultiplePidReferences() throws Exception { - String pid = "dou.test.1"; - String pidTwo = "dou.test.2"; - String pidThree = "dou.test.3"; - String pidFour = "dou.test.4"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - fileHashStore.tagObject(pidTwo, cid); - fileHashStore.tagObject(pidThree, cid); - fileHashStore.tagObject(pidFour, cid); + // Get test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + String cid = testData.pidData.get(pid).get("sha256"); + + Collection pidList = new ArrayList<>(); + for (int i = 1; i < 5; i++) { + pidList.add(pid + "." + i); + } + + // The object must be stored otherwise the unTag process cannot execute as expected + for (String pidToUse : pidList) { + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pidToUse, null, null, null, -1); + } + } + + String pidToCheck = pid + ".1"; + + fileHashStore.synchronizeReferenceLockedPids(pidToCheck); + fileHashStore.synchronizeObjectLockedCids(cid); + + fileHashStore.unTagObject(pidToCheck, cid); - fileHashStore.unTagObject(pid, cid); + fileHashStore.releaseReferenceLockedPids(pidToCheck); + fileHashStore.releaseObjectLockedCids(cid); // Confirm refs files state Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + fileHashStore.getHashStoreRefsPath(pidToCheck, FileHashStore.HashStoreIdTypes.pid); assertFalse(Files.exists(absPidRefsPath)); assertTrue(Files.exists(absCidRefsPath)); @@ -1324,32 +1385,46 @@ public void unTagObject_cidWithMultiplePidReferences() throws Exception { */ @Test public void unTagObject_orphanPidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - // Delete cid refs file to create orphaned pid refs file - Path absCidRefsPath = - fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - Files.delete(absCidRefsPath); - assertFalse(Files.exists(absCidRefsPath)); + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - fileHashStore.unTagObject(pid, cid); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - // Confirm pid refs is deleted - Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); - assertFalse(Files.exists(absPidRefsPath)); + String cid = objInfo.cid(); + // Delete cid refs file to create orphaned pid refs file + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Files.delete(absCidRefsPath); + assertFalse(Files.exists(absCidRefsPath)); - // Confirm number of reference files - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + fileHashStore.synchronizeReferenceLockedPids(pid); + fileHashStore.synchronizeObjectLockedCids(cid); + + fileHashStore.unTagObject(pid, cid); + + fileHashStore.releaseReferenceLockedPids(pid); + fileHashStore.releaseObjectLockedCids(cid); + + // Confirm pid refs is deleted + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertFalse(Files.exists(absPidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - assertEquals(0, pidRefsFiles.size()); - assertEquals(0, cidRefsFiles.size()); + assertEquals(0, pidRefsFiles.size()); + assertEquals(0, cidRefsFiles.size()); + } + } } /** @@ -1358,10 +1433,25 @@ public void unTagObject_orphanPidRefsFile() throws Exception { */ @Test public void unTagObject_missingRefsFiles() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - fileHashStore.unTagObject(pid, cid); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + // Store data object only + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream); + + String cid = objInfo.cid(); + fileHashStore.synchronizeReferenceLockedPids(pid); + fileHashStore.synchronizeObjectLockedCids(cid); + + fileHashStore.unTagObject(pid, cid); + + fileHashStore.releaseReferenceLockedPids(pid); + fileHashStore.releaseObjectLockedCids(cid); + } + } } /** @@ -1370,25 +1460,41 @@ public void unTagObject_missingRefsFiles() throws Exception { */ @Test public void unTagObject_missingPidRefsFile() throws Exception { - String pid = "dou.test.1"; - String pidTwo = "dou.test.2"; - String pidThree = "dou.test.3"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - fileHashStore.tagObject(pidTwo, cid); - fileHashStore.tagObject(pidThree, cid); + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + String cid = testData.pidData.get(pid).get("sha256"); + + Collection pidList = new ArrayList<>(); + for (int i = 1; i < 5; i++) { + pidList.add(pid + "." + i); + } + + // The object must be stored otherwise the unTag process cannot execute as expected + for (String pidToUse : pidList) { + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pidToUse, null, null, null, -1); + } + } + + String pidToCheck = pid + ".1"; // Delete pid refs to create scenario Path absPidRefsPath = - fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + fileHashStore.getHashStoreRefsPath(pidToCheck, FileHashStore.HashStoreIdTypes.pid); Files.delete(absPidRefsPath); assertFalse(Files.exists(absPidRefsPath)); - fileHashStore.unTagObject(pid, cid); + fileHashStore.synchronizeReferenceLockedPids(pidToCheck); + fileHashStore.synchronizeObjectLockedCids(cid); + + fileHashStore.unTagObject(pidToCheck, cid); + + fileHashStore.releaseReferenceLockedPids(pidToCheck); + fileHashStore.releaseObjectLockedCids(cid); Path absCidRefsPath = fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); - assertFalse(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); + assertFalse(fileHashStore.isStringInRefsFile(pidToCheck, absCidRefsPath)); } /** From 9d24873cc4f42cbf49525a5fb890be553ed974db Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 14:20:01 -0700 Subject: [PATCH 519/553] Cleanup code, comments and javadocs --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 6 ------ .../hashstore/filehashstore/FileHashStoreProtectedTest.java | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 8826ee96..98f6cd91 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -552,12 +552,6 @@ public void tagObject(String pid, String cid) + " A pid can only reference one cid."; throw new PidRefsFileExistsException(errMsg); } -// } catch (Exception e) { -// // cid and pid has been released -// // Revert the process for all other exceptions -// unTagObject(pid, cid); -// throw e; -// } } @Override diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 4232c684..fbe5b1b2 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1455,8 +1455,8 @@ public void unTagObject_missingRefsFiles() throws Exception { } /** - * Check that unTagObject does not throw exception when a pid refs file and cid refs file does - * not exist + * Check that unTagObject successfully removes a pid from a cid refs file when a pid refs file + * is missing but the pid is referenced in a cid refs file */ @Test public void unTagObject_missingPidRefsFile() throws Exception { From d22659b7f5de90f47b3026d11115bc8d76f119a4 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 27 Aug 2024 14:34:13 -0700 Subject: [PATCH 520/553] Re-add 'static' access modifiers to synchronized methods --- .../hashstore/filehashstore/FileHashStore.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 98f6cd91..f925cab9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -2189,7 +2189,7 @@ protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - private void synchronizeObjectLockedPids(String pid) throws InterruptedException { + private static void synchronizeObjectLockedPids(String pid) throws InterruptedException { synchronized (objectLockedPids) { while (objectLockedPids.contains(pid)) { try { @@ -2212,7 +2212,7 @@ private void synchronizeObjectLockedPids(String pid) throws InterruptedException * * @param pid Content identifier */ - private void releaseObjectLockedPids(String pid) { + private static void releaseObjectLockedPids(String pid) { synchronized (objectLockedPids) { logFileHashStore.debug("Releasing objectLockedPids for pid: " + pid); objectLockedPids.remove(pid); @@ -2226,7 +2226,7 @@ private void releaseObjectLockedPids(String pid) { * @param metadataDocId Metadata document id hash(pid+formatId) * @throws InterruptedException When an issue occurs when attempting to sync the metadata doc */ - private void synchronizeMetadataLockedDocIds(String metadataDocId) + private static void synchronizeMetadataLockedDocIds(String metadataDocId) throws InterruptedException { synchronized (metadataLockedDocIds) { while (metadataLockedDocIds.contains(metadataDocId)) { @@ -2252,7 +2252,7 @@ private void synchronizeMetadataLockedDocIds(String metadataDocId) * * @param metadataDocId Metadata document id hash(pid+formatId) */ - private void releaseMetadataLockedDocIds(String metadataDocId) { + private static void releaseMetadataLockedDocIds(String metadataDocId) { synchronized (metadataLockedDocIds) { logFileHashStore.debug( "Releasing metadataLockedDocIds for metadata doc: " + metadataDocId); @@ -2269,7 +2269,7 @@ private void releaseMetadataLockedDocIds(String metadataDocId) { * @param cid Content identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - protected void synchronizeObjectLockedCids(String cid) throws InterruptedException { + protected static void synchronizeObjectLockedCids(String cid) throws InterruptedException { synchronized (objectLockedCids) { while (objectLockedCids.contains(cid)) { try { @@ -2292,7 +2292,7 @@ protected void synchronizeObjectLockedCids(String cid) throws InterruptedExcepti * * @param cid Content identifier */ - protected void releaseObjectLockedCids(String cid) { + protected static void releaseObjectLockedCids(String cid) { synchronized (objectLockedCids) { logFileHashStore.debug("Releasing objectLockedCids for cid: " + cid); objectLockedCids.remove(cid); @@ -2308,7 +2308,7 @@ protected void releaseObjectLockedCids(String cid) { * @param pid Persistent or authority-based identifier * @throws InterruptedException When an issue occurs when attempting to sync the pid */ - protected void synchronizeReferenceLockedPids(String pid) throws InterruptedException { + protected static void synchronizeReferenceLockedPids(String pid) throws InterruptedException { synchronized (referenceLockedPids) { while (referenceLockedPids.contains(pid)) { try { @@ -2331,7 +2331,7 @@ protected void synchronizeReferenceLockedPids(String pid) throws InterruptedExce * * @param pid Persistent or authority-based identifier */ - protected void releaseReferenceLockedPids(String pid) { + protected static void releaseReferenceLockedPids(String pid) { synchronized (referenceLockedPids) { logFileHashStore.debug("Releasing referenceLockedPids for pid: " + pid); referenceLockedPids.remove(pid); From 06bf22e9e04fd2e0feaa556c2c70f01c7723cf15 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 28 Aug 2024 15:40:28 -0700 Subject: [PATCH 521/553] Added the code to deploy the jar files to the remote maven repostiory. --- pom.xml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pom.xml b/pom.xml index f2dc363a..3e3a6eb0 100644 --- a/pom.xml +++ b/pom.xml @@ -174,5 +174,24 @@ + + + + org.apache.maven.wagon + wagon-ssh-external + 3.5.3 + + + + + + + + + dataone.org + DataONE Repository + scpexe://maven.dataone.org/var/www/maven + + \ No newline at end of file From 31d20cc9b75facd39d5ac2e6782593d19c63701b Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 28 Aug 2024 16:48:58 -0700 Subject: [PATCH 522/553] Added two profile to specify the deployment location: local or remote. --- pom.xml | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 3e3a6eb0..105e212f 100644 --- a/pom.xml +++ b/pom.xml @@ -18,6 +18,26 @@ 17 + + + + remote + + + + + + scpexe://maven.dataone.org/var/www/maven + + + + local + + file:///var/www/maven + + + + commons-logging @@ -183,15 +203,12 @@ - - - - + dataone.org DataONE Repository - scpexe://maven.dataone.org/var/www/maven + ${url} \ No newline at end of file From fbe25ae6d7dbb01d4f0e484f6eb02217295268a9 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 11:53:54 -0700 Subject: [PATCH 523/553] Fix bug in 'storeHashStoreRefsFiles' where we could unintentionally overwrite a pid reference file and update junit test --- .../filehashstore/FileHashStore.java | 20 ++++++------------- .../FileHashStoreProtectedTest.java | 15 +++----------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f925cab9..c0fd3610 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1642,20 +1642,12 @@ protected void storeHashStoreRefsFiles(String pid, String cid) + " is missing. Missing cid refs file created and tagging completed."); return; } else { - // Check if the retrieved cid refs file exists and pid is referenced - Path retrievedAbsCidRefsPath = - getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); - if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( - pid, retrievedAbsCidRefsPath)) { - // This pid is accounted for and tagged as expected. - String errMsg = "Pid refs file already exists for pid: " + pid - + ", and the associated cid refs file contains the " - + "pid. A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - } - // Orphaned pid refs file found, the retrieved cid refs file exists - // but doesn't contain the pid. Proceed to overwrite the pid refs file. + // If a pid is in use, we throw an exception immediately + String errMsg = "Pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); } } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index fbe5b1b2..1096988c 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1197,8 +1197,7 @@ public void storeHashStoreRefsFiles_PidRefsFileExistsException() throws Exceptio } /** - * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it - * references does not exist (does not have a cid refs file) + * Check storeHashStoreRefsFiles throws exception when a pid refs file already exists */ @Test public void storeHashStoreRefsFiles_pidRefsOrphanedFile() throws Exception { @@ -1214,16 +1213,8 @@ public void storeHashStoreRefsFiles_pidRefsOrphanedFile() throws Exception { File absPathPidRefsFile = absPidRefsPath.toFile(); fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - fileHashStore.storeHashStoreRefsFiles(pid, cid); - // There should only be 1 of each ref file - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - List pidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); - List cidRefsFiles = - FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); - - assertEquals(1, pidRefsFiles.size()); - assertEquals(1, cidRefsFiles.size()); + assertThrows(PidRefsFileExistsException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); } /** From 97d133d9205bc3f9770b84557cefc131849c27dd Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 11:57:00 -0700 Subject: [PATCH 524/553] Refactor 'storeHashStoreRefsFiles' to always throw exception if a pid refs file already exists --- .../filehashstore/FileHashStore.java | 29 +++++-------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c0fd3610..3361d220 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1627,28 +1627,13 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throw new HashStoreRefsAlreadyExistException(errMsg); } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { - // If pid refs exists, it can only contain and reference one cid - // First, compare the cid retrieved from the pid refs file from the supplied cid - String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); - if (retrievedCid.equalsIgnoreCase(cid)) { - // The pid correctly references the cid, but the cid refs file is missing - // Create the file and verify tagging process - File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - logFileHashStore.info( - "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid - + " is missing. Missing cid refs file created and tagging completed."); - return; - } else { - // If a pid is in use, we throw an exception immediately - String errMsg = "Pid refs file already exists for pid: " + pid - + ", and the associated cid refs file contains the " - + "pid. A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - } + // If pid refs exists, the pid has already been claimed and cannot be tagged + // We throw an exception immediately + String errMsg = "Pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file if (!isStringInRefsFile(pid, absCidRefsPath)) { From b31ae8e8c66d38ca47e96558d8dc0d01e1b874f1 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 12:04:42 -0700 Subject: [PATCH 525/553] Refactor 'storeHashStoreRefsFiles' exception scenario to include more context --- .../filehashstore/FileHashStore.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 3361d220..842a7377 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1618,14 +1618,22 @@ protected void storeHashStoreRefsFiles(String pid, String cid) try { if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Confirm that reference files are where they are expected to be - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - // We throw an exception so the client is aware that everything is in place + // If both reference files exist, we confirm that reference files are where + // they are expected to be and throw an exception to inform the client that + // everything is in place - and include other issues for context String errMsg = - "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; - logFileHashStore.error(errMsg); - throw new HashStoreRefsAlreadyExistException(errMsg); - + "Object with cid: " + cid + " already exists and is tagged with pid: " + + pid; + try { + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + + } catch (Exception e) { + String revMsg = errMsg + " . " + e.getMessage(); + logFileHashStore.error(revMsg); + throw new HashStoreRefsAlreadyExistException(revMsg); + } } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { // If pid refs exists, the pid has already been claimed and cannot be tagged // We throw an exception immediately @@ -1634,6 +1642,7 @@ protected void storeHashStoreRefsFiles(String pid, String cid) + "pid. A pid can only reference one cid."; logFileHashStore.error(errMsg); throw new PidRefsFileExistsException(errMsg); + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { // Only update cid refs file if pid is not in the file if (!isStringInRefsFile(pid, absCidRefsPath)) { From ecd7854052f54c6beabf528dd44deb009160c97a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 12:23:29 -0700 Subject: [PATCH 526/553] Refactor 'updateRefsFile' so that duplicate values are never added and add new junit test --- .../filehashstore/FileHashStore.java | 15 ++++++++---- .../FileHashStoreProtectedTest.java | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 842a7377..74b1da3f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1934,11 +1934,16 @@ protected void updateRefsFile(String ref, Path absRefsPath, HashStoreRefUpdateTy Collection lines = new ArrayList<>(Files.readAllLines(absRefsPath)); if (updateType.equals(HashStoreRefUpdateTypes.add)) { - lines.add(ref); - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absRefsPath.toFile(), "refs"); - logFileHashStore.debug( - "Ref: " + ref + " has been added to refs file: " + absRefsPath); + if (!lines.contains(ref)) { // Check for duplicates + lines.add(ref); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absRefsPath.toFile(), "refs"); + logFileHashStore.debug( + "Ref: " + ref + " has been added to refs file: " + absRefsPath); + } else { + logFileHashStore.debug( + "Ref: " + ref + " already exists in refs file: " + absRefsPath); + } } if (updateType.equals(HashStoreRefUpdateTypes.remove)) { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 1096988c..236637a5 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1655,6 +1655,30 @@ public void updateRefsFile_add() throws Exception { assertTrue(pidAdditional_foundInCidRefFiles); } + /** + * Confirm that cid refs file does not add duplicate value + */ + @Test + public void updateRefsFile_addDuplicateValue() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + String pidAdditional = "dou.test.2"; + fileHashStore.updateRefsFile( + pidAdditional, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.add); + // Try re-adding it + fileHashStore.updateRefsFile( + pidAdditional, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.add); + + List lines = Files.readAllLines(cidRefsFilePath); + assertEquals(lines.size(), 2); + } + /** * Check that updateRefsFile removes pid from its cid refs file */ From 1eda5a591134bdd230d03ff45b0602b79c8c6758 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 14:21:06 -0700 Subject: [PATCH 527/553] Adjust 'storeHashStoreRefs' scenario when pid refs doesn't exist but cid refs does to create pid refs file first --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 74b1da3f..1062baa4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1644,14 +1644,14 @@ protected void storeHashStoreRefsFiles(String pid, String cid) throw new PidRefsFileExistsException(errMsg); } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { - // Only update cid refs file if pid is not in the file - if (!isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); - } // Get the pid refs file and verify tagging process File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); File absPathPidRefsFile = absPidRefsPath.toFile(); move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + // Only update cid refs file if pid is not in the file + if (!isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); + } verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); logFileHashStore.info("Object with cid: " + cid + " has been updated and tagged successfully with pid: " From 81653033ea88715f0f0286cdc5d048fb91629a17 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 14:22:50 -0700 Subject: [PATCH 528/553] Clean up/optimize 'updateRefsFile' by combining if statements --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1062baa4..b4c51a21 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1944,9 +1944,7 @@ protected void updateRefsFile(String ref, Path absRefsPath, HashStoreRefUpdateTy logFileHashStore.debug( "Ref: " + ref + " already exists in refs file: " + absRefsPath); } - } - - if (updateType.equals(HashStoreRefUpdateTypes.remove)) { + } else if (updateType.equals(HashStoreRefUpdateTypes.remove)) { lines.remove(ref); Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); move(tmpFile, absRefsPath.toFile(), "refs"); From 604e7a5f9f3fe0b4e5cea601374b58fd5fa8055c Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Fri, 30 Aug 2024 14:49:10 -0700 Subject: [PATCH 529/553] Refactor 'unTagObject' to proceed with as much of the untagging flow as possible by wrapping relevant code blocks in try-catch statements --- .../filehashstore/FileHashStore.java | 120 ++++++++++++++---- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index b4c51a21..90e78aae 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1688,7 +1688,9 @@ protected void storeHashStoreRefsFiles(String pid, String cid) /** * Untags a data object in HashStore by deleting the 'pid reference file' and removing the 'pid' - * from the 'cid reference file'. This method will never delete a data object. + * from the 'cid reference file'. This method will never delete a data object. {@code + * unTagObject} will attempt to proceed with as much of the untagging process as possible and + * swallow relevant exceptions. * * @param pid Persistent or authority-based identifier * @param cid Content identifier of data object @@ -1737,26 +1739,54 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } else { - String warnMsg = "Cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object " + "deletion."; - logFileHashStore.warn(warnMsg); + try { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } else { + String warnMsg = "Cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + "deletion."; + logFileHashStore.warn(warnMsg); + } + } catch (Exception e) { + logFileHashStore.warn( + "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath); + } + + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + } + + try { + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + } catch (Exception e) { + logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + + "for request with pid: " + pid + " and cid: " + cid); } - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + } + + try { + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + } catch (Exception e) { + logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + + "for request with pid: " + pid + " and cid: " + cid); + } String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); @@ -1772,13 +1802,30 @@ protected void unTagObject(String pid, String cid) synchronizeObjectLockedCids(cidRead); Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + try { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } + } catch (Exception e) { + logFileHashStore.warn( + "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath); + } + + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + } + + try { + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + } catch (Exception e) { + logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + + "for request with pid: " + pid + " and cid: " + cid); } - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); String warnMsg = "Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; @@ -1793,9 +1840,20 @@ protected void unTagObject(String pid, String cid) // Rename pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + } + + try { + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + } catch (Exception e) { + logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + + "for request with pid: " + pid + " and cid: " + cid); + } String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); @@ -1803,11 +1861,17 @@ protected void unTagObject(String pid, String cid) // `findObject` throws this exception if the pid refs file is not found // Check to see if pid is in the `cid refs file`and attempt to remove it Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - String errMsg = "Pid refs file not found, removed pid found in cid refs file: " - + absCidRefsPath; - logFileHashStore.warn(errMsg); + try { + if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + String errMsg = "Pid refs file not found, removed pid found in cid refs file: " + + absCidRefsPath; + logFileHashStore.warn(errMsg); + } + } catch (Exception e) { + logFileHashStore.warn( + "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + + "for request with pid: " + pid + " and cid: " + cid); } } } From 650e822bf2b776df4dd8e6e03947e28d1b17d690 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 3 Sep 2024 10:17:56 -0700 Subject: [PATCH 530/553] Clean up 'unTagObject' and re-order process to focus on pid-related tasks --- .../filehashstore/FileHashStore.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 90e78aae..dbcac512 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1718,7 +1718,7 @@ protected void unTagObject(String pid, String cid) throw new IdentifierNotLockedException(errMsg); } - // Before we begin untagging process, we look for the `cid` by calling + // Before we begin the untagging process, we look for the `cid` by calling // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { @@ -1739,6 +1739,14 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + } + try { updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { @@ -1753,13 +1761,6 @@ protected void unTagObject(String pid, String cid) "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath); } - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - } catch (Exception e) { - logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); - } - try { // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); @@ -1775,6 +1776,7 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (Exception e) { logFileHashStore.warn( "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); @@ -1785,7 +1787,7 @@ protected void unTagObject(String pid, String cid) FileHashStoreUtility.deleteListItems(deleteList); } catch (Exception e) { logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid); + + "for orphaned pid refs file for pid: " + pid); } String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1842,6 +1844,7 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + } catch (Exception e) { logFileHashStore.warn( "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); From 2421bec01a062c618f1080417fc90c48040fe294 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 3 Sep 2024 10:59:00 -0700 Subject: [PATCH 531/553] Further clean-up 'unTagObject' to proceed with as much of the untagging flow --- .../filehashstore/FileHashStore.java | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index dbcac512..1a2d909e 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1722,8 +1722,7 @@ protected void unTagObject(String pid, String cid) // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - ObjectInfo objInfo = findObject(pid); - cid = objInfo.cid(); + findObject(pid); // We must confirm that we are working on a cid that is locked // If not, this means that this call is not thread safe. @@ -1800,6 +1799,16 @@ protected void unTagObject(String pid, String cid) String cidRead = new String(Files.readAllBytes(absPidRefsPath)); try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + } + + try { + FileHashStoreUtility.ensureNotNull(cidRead, "cidRead"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cidRead, "cidRead"); // Since we must access the cid reference file, the `cid` must be synchronized synchronizeObjectLockedCids(cidRead); @@ -1813,29 +1822,21 @@ protected void unTagObject(String pid, String cid) logFileHashStore.warn( "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath); } - - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - } catch (Exception e) { - logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); - } - - try { - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - } catch (Exception e) { - logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid); - } - String warnMsg = "Object with cid: " + cidRead - + " does not exist, but pid and cid reference file found for pid: " + pid - + ". Deleted pid and cid ref files."; - logFileHashStore.warn(warnMsg); - } finally { releaseObjectLockedCids(cidRead); } + + try { + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + } catch (Exception e) { + logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + + "for request with pid: " + pid + " and cid: " + cid); + } + String warnMsg = "Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files."; + logFileHashStore.warn(warnMsg); } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file. From eab5016e76f78038051b54e52049052ef01571ca Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 3 Sep 2024 11:06:03 -0700 Subject: [PATCH 532/553] Revise exception messaging to improve clarity in 'unTagObject' --- .../filehashstore/FileHashStore.java | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 1a2d909e..9b22f6ff 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1743,7 +1743,8 @@ protected void unTagObject(String pid, String cid) } catch (Exception e) { logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + "." + + " " + e.getMessage()); } try { @@ -1757,7 +1758,8 @@ protected void unTagObject(String pid, String cid) } } catch (Exception e) { logFileHashStore.warn( - "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath); + "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + + ". " + e.getMessage()); } try { @@ -1765,7 +1767,8 @@ protected void unTagObject(String pid, String cid) FileHashStoreUtility.deleteListItems(deleteList); } catch (Exception e) { logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid); + + "for request with pid: " + pid + " and cid: " + cid + + ". " + e.getMessage()); } logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); @@ -1778,7 +1781,8 @@ protected void unTagObject(String pid, String cid) } catch (Exception e) { logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + + ". " + e.getMessage()); } try { @@ -1786,7 +1790,8 @@ protected void unTagObject(String pid, String cid) FileHashStoreUtility.deleteListItems(deleteList); } catch (Exception e) { logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for orphaned pid refs file for pid: " + pid); + + "for orphaned pid refs file for pid: " + pid + ". " + + e.getMessage()); } String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1803,7 +1808,8 @@ protected void unTagObject(String pid, String cid) } catch (Exception e) { logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + ". " + + e.getMessage()); } try { @@ -1820,8 +1826,13 @@ protected void unTagObject(String pid, String cid) } } catch (Exception e) { logFileHashStore.warn( - "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath); + "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + + ". " + e.getMessage()); } + } catch (Exception e ) { + logFileHashStore.warn( + "Unexpected exception when attempting to remove pid: " + pid + " from cid " + + "refs file for cid: " + cidRead + ". " + e.getMessage()); } finally { releaseObjectLockedCids(cidRead); } @@ -1831,7 +1842,8 @@ protected void unTagObject(String pid, String cid) FileHashStoreUtility.deleteListItems(deleteList); } catch (Exception e) { logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid); + + "for request with pid: " + pid + " and cid: " + cid + + ". " + e.getMessage()); } String warnMsg = "Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid @@ -1848,7 +1860,8 @@ protected void unTagObject(String pid, String cid) } catch (Exception e) { logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid); + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + "." + + " " + e.getMessage()); } try { @@ -1856,7 +1869,8 @@ protected void unTagObject(String pid, String cid) FileHashStoreUtility.deleteListItems(deleteList); } catch (Exception e) { logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid); + + "for request with pid: " + pid + " and cid: " + cid + + ". " + e.getMessage()); } String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1875,7 +1889,8 @@ protected void unTagObject(String pid, String cid) } catch (Exception e) { logFileHashStore.warn( "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath - + "for request with pid: " + pid + " and cid: " + cid); + + "for request with pid: " + pid + " and cid: " + cid + ". " + + e.getMessage()); } } } From 1fad24e91d983ffa12a2447462714e42c63c71ca Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 3 Sep 2024 16:22:35 -0700 Subject: [PATCH 533/553] Add missing javadoc exception in 'findObject' --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 9b22f6ff..2d50168d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1041,10 +1041,11 @@ public String getHexDigest(String pid, String algorithm) * does not exist. * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the expected * pid is not found in the cid refs file. + * @throws PidRefsFileNotFoundException When a pid reference file is not found. */ protected ObjectInfo findObject(String pid) throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, - PidNotFoundInCidRefsFileException, OrphanRefsFilesException { + PidNotFoundInCidRefsFileException, OrphanRefsFilesException, PidRefsFileNotFoundException { logFileHashStore.debug("Finding object for pid: " + pid); FileHashStoreUtility.ensureNotNull(pid, "pid"); FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); From 1ad97c7c9469386e56206b3c02bc2df5a5f47b89 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 4 Sep 2024 08:06:12 -0700 Subject: [PATCH 534/553] Revise 'unTagObject' to check that cid retrieved equals to the cid from the call --- .../hashstore/filehashstore/FileHashStore.java | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2d50168d..68db42be 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1723,12 +1723,20 @@ protected void unTagObject(String pid, String cid) // `findObject` which will throw custom exceptions if there is an issue with // the reference files, which help us determine the path to proceed with. try { - findObject(pid); + ObjectInfo objInfo = findObject(pid); + String cidRetrieved = objInfo.cid(); + + // If the cid retrieved does not match, this untag request is invalid immediately + if (!cid.equals(cidRetrieved)) { + String errMsg = "Cid retrieved: " + cidRetrieved + " does not match untag request" + + " cid: " + cid; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); - // We must confirm that we are working on a cid that is locked - // If not, this means that this call is not thread safe. - // This `cid` will be released by the calling method. - if (!objectLockedCids.contains(cid)) { + } else if (!objectLockedCids.contains(cid)) { + // If it matches, we must confirm that we are working on a cid that is locked + // If not, this means that this call is not thread safe. + // This `cid` will be released by the calling method. String errMsg = "Cannot untag cid that is not currently locked"; logFileHashStore.error(errMsg); throw new IdentifierNotLockedException(errMsg); From bca9f6ccd195ea285647ac8e4a2fae3bd3bc81fc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 4 Sep 2024 08:38:24 -0700 Subject: [PATCH 535/553] Revise 'unTagObject' OrphanRefsFileException scenario to check cid before proceeding with untagging, and add new junit tests --- .../filehashstore/FileHashStore.java | 21 ++++--- .../FileHashStoreProtectedTest.java | 62 +++++++++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 68db42be..566e0094 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1783,7 +1783,7 @@ protected void unTagObject(String pid, String cid) } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, - // so we only need to delete the pid refs file + // so we only need to delete the pid refs file (pid is already locked) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1807,11 +1807,22 @@ protected void unTagObject(String pid, String cid) logFileHashStore.warn(warnMsg); } catch (OrphanRefsFilesException orfe) { - // `findObject` throws this exception when the pid and cid refs file exists, - // but the actual object being referenced by the pid does not exist + // `findObject` throws this exception when: + // - the pid and cid refs file exists, + // - the pid is found in the cid refs file + // - but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + // If the cid retrieved does not match, this untag request is invalid immediately + if (!cid.equals(cidRead)) { + String errMsg = "Cid retrieved: " + cidRead + " does not match untag request" + + " cid: " + cid; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); + + } + try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1824,8 +1835,6 @@ protected void unTagObject(String pid, String cid) try { FileHashStoreUtility.ensureNotNull(cidRead, "cidRead"); FileHashStoreUtility.checkForNotEmptyAndValidString(cidRead, "cidRead"); - // Since we must access the cid reference file, the `cid` must be synchronized - synchronizeObjectLockedCids(cidRead); Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); try { @@ -1842,8 +1851,6 @@ protected void unTagObject(String pid, String cid) logFileHashStore.warn( "Unexpected exception when attempting to remove pid: " + pid + " from cid " + "refs file for cid: " + cidRead + ". " + e.getMessage()); - } finally { - releaseObjectLockedCids(cidRead); } try { diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 236637a5..f4da255e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1488,6 +1488,68 @@ public void unTagObject_missingPidRefsFile() throws Exception { assertFalse(fileHashStore.isStringInRefsFile(pidToCheck, absCidRefsPath)); } + /** + * Check that unTagObject successfully deletes cid and pid refs file when a data object does + * not exist. + */ + @Test + public void unTagObject_refsExistButDataObjectDoesNotExist() throws Exception { + String pid = "jtao.1700.1"; + String cid = testData.pidData.get(pid).get("sha256"); + + Collection pidList = new ArrayList<>(); + for (int i = 1; i < 5; i++) { + pidList.add(pid + "." + i); + } + + // The object must be stored otherwise the unTag process cannot execute as expected + for (String pidToUse : pidList) { + fileHashStore.tagObject(pidToUse, cid); + } + + String pidToCheck = pid + ".1"; + + fileHashStore.synchronizeReferenceLockedPids(pidToCheck); + fileHashStore.synchronizeObjectLockedCids(cid); + + fileHashStore.unTagObject(pidToCheck, cid); + + fileHashStore.releaseReferenceLockedPids(pidToCheck); + fileHashStore.releaseObjectLockedCids(cid); + + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertFalse(fileHashStore.isStringInRefsFile(pidToCheck, absCidRefsPath)); + } + + /** + * Check that unTagObject successfully deletes cid and pid refs file when a data object does + * not exist. + */ + @Test + public void unTagObject_refsExistNoObject_singlePidInCidRefs() throws Exception { + String pid = "jtao.1700.1"; + String cid = testData.pidData.get(pid).get("sha256"); + + fileHashStore.tagObject(pid, cid); + + + fileHashStore.synchronizeReferenceLockedPids(pid); + fileHashStore.synchronizeObjectLockedCids(cid); + + fileHashStore.unTagObject(pid, cid); + + fileHashStore.releaseReferenceLockedPids(pid); + fileHashStore.releaseObjectLockedCids(cid); + + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertFalse(Files.exists(absPidRefsPath)); + assertFalse(Files.exists(absCidRefsPath)); + } + /** * Check that no exception is thrown when pid and cid are tagged correctly */ From db0333a3ea43b96f33b5e28b3d2807b2f3ec067b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 4 Sep 2024 08:48:50 -0700 Subject: [PATCH 536/553] Add additional guard rails for when retrieving/reading cid values, and ensuring cid values are locked --- .../hashstore/filehashstore/FileHashStore.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 566e0094..dcb7481b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1725,6 +1725,8 @@ protected void unTagObject(String pid, String cid) try { ObjectInfo objInfo = findObject(pid); String cidRetrieved = objInfo.cid(); + FileHashStoreUtility.ensureNotNull(cidRetrieved, "cidRetrieved"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cidRetrieved, "cidRetrieved"); // If the cid retrieved does not match, this untag request is invalid immediately if (!cid.equals(cidRetrieved)) { @@ -1813,6 +1815,8 @@ protected void unTagObject(String pid, String cid) // - but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + FileHashStoreUtility.ensureNotNull(cidRead, "cidRead"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cidRead, "cidRead"); // If the cid retrieved does not match, this untag request is invalid immediately if (!cid.equals(cidRead)) { @@ -1821,6 +1825,13 @@ protected void unTagObject(String pid, String cid) logFileHashStore.error(errMsg); throw new IdentifierNotLockedException(errMsg); + } else if (!objectLockedCids.contains(cid)) { + // If it matches, we must confirm that we are working on a cid that is locked + // If not, this means that this call is not thread safe. + // This `cid` will be released by the calling method. + String errMsg = "Cannot untag cid that is not currently locked"; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); } try { @@ -1833,9 +1844,6 @@ protected void unTagObject(String pid, String cid) } try { - FileHashStoreUtility.ensureNotNull(cidRead, "cidRead"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cidRead, "cidRead"); - Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); try { updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); From 44ff55047d1836dd88ad9e7a640a2ade36e31f21 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 4 Sep 2024 08:58:05 -0700 Subject: [PATCH 537/553] Add missing guard rail in 'unTagObjet' PidRefsFileNotFoundException for confirming cid is locked --- .../hashstore/filehashstore/FileHashStore.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index dcb7481b..c54c63aa 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1902,7 +1902,18 @@ protected void unTagObject(String pid, String cid) } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found // Check to see if pid is in the `cid refs file`and attempt to remove it + + // Confirm that we are working on a cid that is locked + // If not, this means that this call is not thread safe. + // This `cid` will be released by the calling method. + if (!objectLockedCids.contains(cid)) { + String errMsg = "Cannot untag cid that is not currently locked"; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); + } + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + try { if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); From cd119f548a108150e295f5e2883879bc69167858 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Wed, 4 Sep 2024 11:09:11 -0700 Subject: [PATCH 538/553] Improve exception messaging in 'unTagObject' --- .../filehashstore/FileHashStore.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index c54c63aa..6e6f5e9b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1730,8 +1730,10 @@ protected void unTagObject(String pid, String cid) // If the cid retrieved does not match, this untag request is invalid immediately if (!cid.equals(cidRetrieved)) { - String errMsg = "Cid retrieved: " + cidRetrieved + " does not match untag request" - + " cid: " + cid; + String errMsg = + "Cid retrieved: " + cidRetrieved + " does not match untag request for cid: " + + cid + " and pid: " + pid + + ". Cannot untag cid that is not currently locked."; logFileHashStore.error(errMsg); throw new IdentifierNotLockedException(errMsg); @@ -1739,7 +1741,8 @@ protected void unTagObject(String pid, String cid) // If it matches, we must confirm that we are working on a cid that is locked // If not, this means that this call is not thread safe. // This `cid` will be released by the calling method. - String errMsg = "Cannot untag cid that is not currently locked"; + String errMsg = + "Cannot untag cid: " + cid + " that is not currently locked (pid: " + pid + ")"; logFileHashStore.error(errMsg); throw new IdentifierNotLockedException(errMsg); } @@ -1820,8 +1823,10 @@ protected void unTagObject(String pid, String cid) // If the cid retrieved does not match, this untag request is invalid immediately if (!cid.equals(cidRead)) { - String errMsg = "Cid retrieved: " + cidRead + " does not match untag request" - + " cid: " + cid; + String errMsg = + "Orphan reference files found but data object does not exist. Cid read: " + + cidRead + " does not match untag request for cid: " + cid + " and pid: " + + pid + ". Cannot untag cid that is not currently locked."; logFileHashStore.error(errMsg); throw new IdentifierNotLockedException(errMsg); @@ -1829,7 +1834,8 @@ protected void unTagObject(String pid, String cid) // If it matches, we must confirm that we are working on a cid that is locked // If not, this means that this call is not thread safe. // This `cid` will be released by the calling method. - String errMsg = "Cannot untag cid that is not currently locked"; + String errMsg = + "Cannot untag cid: " + cid + " that is not currently locked (pid: " + pid + ")"; logFileHashStore.error(errMsg); throw new IdentifierNotLockedException(errMsg); } @@ -1875,9 +1881,9 @@ protected void unTagObject(String pid, String cid) logFileHashStore.warn(warnMsg); } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists - // but the pid is not found in the cid refs file. + // but the pid is not found in the cid refs file (nothing to change here) - // Rename pid refs file for deletion + // Only rename pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); From 6457087c22b90db5c932092ec9146a333d6a474a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 08:40:15 -0700 Subject: [PATCH 539/553] Revise exception type in 'unTagObject' when cid doesn't match, add todo items --- .../hashstore/filehashstore/FileHashStore.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 6e6f5e9b..2276a0a9 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1732,10 +1732,9 @@ protected void unTagObject(String pid, String cid) if (!cid.equals(cidRetrieved)) { String errMsg = "Cid retrieved: " + cidRetrieved + " does not match untag request for cid: " - + cid + " and pid: " + pid - + ". Cannot untag cid that is not currently locked."; + + cid + " and pid: " + pid; logFileHashStore.error(errMsg); - throw new IdentifierNotLockedException(errMsg); + throw new IllegalArgumentException(errMsg); } else if (!objectLockedCids.contains(cid)) { // If it matches, we must confirm that we are working on a cid that is locked @@ -1789,6 +1788,7 @@ protected void unTagObject(String pid, String cid) } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file (pid is already locked) + // TODO: Check that the cid found actually matches what has been provided Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); @@ -1820,15 +1820,16 @@ protected void unTagObject(String pid, String cid) String cidRead = new String(Files.readAllBytes(absPidRefsPath)); FileHashStoreUtility.ensureNotNull(cidRead, "cidRead"); FileHashStoreUtility.checkForNotEmptyAndValidString(cidRead, "cidRead"); + // TODO: Lots of repeated code with the basic scenario, see how to reduce code length // If the cid retrieved does not match, this untag request is invalid immediately if (!cid.equals(cidRead)) { String errMsg = "Orphan reference files found but data object does not exist. Cid read: " + cidRead + " does not match untag request for cid: " + cid + " and pid: " - + pid + ". Cannot untag cid that is not currently locked."; + + pid; logFileHashStore.error(errMsg); - throw new IdentifierNotLockedException(errMsg); + throw new IllegalArgumentException(errMsg); } else if (!objectLockedCids.contains(cid)) { // If it matches, we must confirm that we are working on a cid that is locked @@ -1882,6 +1883,7 @@ protected void unTagObject(String pid, String cid) } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file (nothing to change here) + // TODO: Still need to check that the cid found matches // Only rename pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); @@ -1907,7 +1909,7 @@ protected void unTagObject(String pid, String cid) logFileHashStore.warn(warnMsg); } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found - // Check to see if pid is in the `cid refs file`and attempt to remove it + // Check to see if pid is in the `cid refs file` and attempt to remove it // Confirm that we are working on a cid that is locked // If not, this means that this call is not thread safe. From f772797668998f0a27e369b024786806f37dde7e Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 09:46:20 -0700 Subject: [PATCH 540/553] Update and add a new junit for revised exceptions --- .../FileHashStoreProtectedTest.java | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index f4da255e..f5a3c486 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -1302,6 +1302,30 @@ public void unTagObject_pid_IdentifierNotLockedException() { */ @Test public void unTagObject_cid_IdentifierNotLockedException() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + // The object must be stored otherwise the unTag process cannot execute as expected + ObjectMetadata objInfo = null; + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } + String cid = objInfo.cid(); + + FileHashStore.synchronizeReferenceLockedPids(pid); + assertThrows(IdentifierNotLockedException.class, + () -> fileHashStore.unTagObject(pid, cid)); + FileHashStore.releaseReferenceLockedPids(pid); + } + } + + /** + * Confirm IllegalArgumentException is thrown when cid retrieved does not match what has been + * provided. + */ + @Test + public void unTagObject_cid_doesNotMatchFindObject() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); @@ -1312,9 +1336,11 @@ public void unTagObject_cid_IdentifierNotLockedException() throws Exception { } fileHashStore.synchronizeReferenceLockedPids(pid); + fileHashStore.synchronizeObjectLockedCids("does_not_match"); assertThrows( - IdentifierNotLockedException.class, () -> fileHashStore.unTagObject(pid, "cid")); + IllegalArgumentException.class, () -> fileHashStore.unTagObject(pid, "cid")); fileHashStore.releaseReferenceLockedPids(pid); + fileHashStore.releaseObjectLockedCids("does_not_match"); } } From 5a53f98263eee991cc3cf93a141e1fb3c011f3cc Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 10:01:41 -0700 Subject: [PATCH 541/553] Refactor 'unTagObject' by extracting method 'addAndRenamePidRefsFileToDeleteList' --- .../filehashstore/FileHashStore.java | 56 ++++++++----------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2276a0a9..58bfd0f3 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1751,14 +1751,7 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - } catch (Exception e) { - logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + "." - + " " + e.getMessage()); - } + addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); try { updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); @@ -1789,15 +1782,9 @@ protected void unTagObject(String pid, String cid) // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file (pid is already locked) // TODO: Check that the cid found actually matches what has been provided - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - } catch (Exception e) { - logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + - ". " + e.getMessage()); - } + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); try { // Delete items @@ -1841,14 +1828,7 @@ protected void unTagObject(String pid, String cid) throw new IdentifierNotLockedException(errMsg); } - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - } catch (Exception e) { - logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + ". " - + e.getMessage()); - } + addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); try { Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); @@ -1887,14 +1867,7 @@ protected void unTagObject(String pid, String cid) // Only rename pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - try { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); - - } catch (Exception e) { - logFileHashStore.warn( - "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + "." - + " " + e.getMessage()); - } + addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); try { // Delete items @@ -1938,6 +1911,25 @@ protected void unTagObject(String pid, String cid) } } + /** + * Renames a given path and adds it to a list to delete. + * + * @param pid Persistent identifier for exception messaging + * @param deleteList List to add renamed file + * @param absPidRefsPath Path of file to rename for deletion + */ + private static void addAndRenamePidRefsFileToDeleteList( + String pid, Collection deleteList, Path absPidRefsPath) { + try { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + + } catch (Exception e) { + logFileHashStore.warn( + "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + "." + " " + + e.getMessage()); + } + } + /** * Verifies that the reference files for the given pid and cid exist and contain the expected * values. From fe772b5731d517f238c31c96608da0d1aa66431a Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 10:17:16 -0700 Subject: [PATCH 542/553] Refactor 'unTagObject' by extracting method 'deleteListOfFilesRenamedForDeletion' --- .../filehashstore/FileHashStore.java | 57 ++++++++----------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 58bfd0f3..f58cb80d 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1768,14 +1768,7 @@ protected void unTagObject(String pid, String cid) ". " + e.getMessage()); } - try { - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - } catch (Exception e) { - logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid - + ". " + e.getMessage()); - } + deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); } catch (OrphanPidRefsFileException oprfe) { @@ -1786,14 +1779,7 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - try { - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - } catch (Exception e) { - logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for orphaned pid refs file for pid: " + pid + ". " - + e.getMessage()); - } + deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); @@ -1848,18 +1834,12 @@ protected void unTagObject(String pid, String cid) + "refs file for cid: " + cidRead + ". " + e.getMessage()); } - try { - // Delete all related/relevant items with the least amount of delay - FileHashStoreUtility.deleteListItems(deleteList); - } catch (Exception e) { - logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid - + ". " + e.getMessage()); - } + deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String warnMsg = "Object with cid: " + cidRead + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; logFileHashStore.warn(warnMsg); + } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file (nothing to change here) @@ -1869,17 +1849,11 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - try { - // Delete items - FileHashStoreUtility.deleteListItems(deleteList); - } catch (Exception e) { - logFileHashStore.warn("Unable to delete list of refs files marked for deletion " - + "for request with pid: " + pid + " and cid: " + cid - + ". " + e.getMessage()); - } + deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); + } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found // Check to see if pid is in the `cid refs file` and attempt to remove it @@ -1911,6 +1885,25 @@ protected void unTagObject(String pid, String cid) } } + /** + * Deletes all the file paths contained in a given 'deleteList' + * + * @param pid Persistent identifier, used for logging + * @param cid Content identifier, used for logging + * @param deleteList List of file paths to delete + */ + private static void deleteListOfFilesRenamedForDeletion( + String pid, String cid, Collection deleteList) { + try { + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + } catch (Exception e) { + logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + + "for request with pid: " + pid + " and cid: " + cid + ". " + + e.getMessage()); + } + } + /** * Renames a given path and adds it to a list to delete. * From 8264ed6c731e6d854cbb227434569f4f7a41df0b Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 10:38:14 -0700 Subject: [PATCH 543/553] Refactor 'unTagObject' by extracting method 'removePidFromCidRefsAndDetermineDeletion' --- .../filehashstore/FileHashStore.java | 60 +++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index f58cb80d..4eb97d64 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1753,20 +1753,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - try { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } else { - String warnMsg = "Cid referenced by pid: " + pid - + " is not empty (refs exist for cid). Skipping object " + "deletion."; - logFileHashStore.warn(warnMsg); - } - } catch (Exception e) { - logFileHashStore.warn( - "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + - ". " + e.getMessage()); - } + removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); @@ -1816,23 +1803,8 @@ protected void unTagObject(String pid, String cid) addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - try { - Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); - try { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - if (Files.size(absCidRefsPath) == 0) { - deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); - } - } catch (Exception e) { - logFileHashStore.warn( - "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath - + ". " + e.getMessage()); - } - } catch (Exception e ) { - logFileHashStore.warn( - "Unexpected exception when attempting to remove pid: " + pid + " from cid " - + "refs file for cid: " + cidRead + ". " + e.getMessage()); - } + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String warnMsg = "Object with cid: " + cidRead @@ -1885,6 +1857,32 @@ protected void unTagObject(String pid, String cid) } } + /** + * Removes a pid from a given cid refs file (if it's found) and checks to see if the cid refs is + * empty before renaming this file for deletion. + * + * @param pid Persistent identifier + * @param deleteList If cid refs file needs to be deleted, list to add to + * @param absCidRefsPath Path of the cid refs file + */ + private void removePidFromCidRefsAndDetermineDeletion( + String pid, Collection deleteList, Path absCidRefsPath) { + try { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } else { + String warnMsg = "Cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + "deletion."; + logFileHashStore.warn(warnMsg); + } + } catch (Exception e) { + logFileHashStore.warn( + "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + ". " + + e.getMessage()); + } + } + /** * Deletes all the file paths contained in a given 'deleteList' * From 919f0655304d70c2aa4147fae06a61104e43b109 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 10:52:58 -0700 Subject: [PATCH 544/553] Refactor 'unTagObject' by extracting method 'validateCidAndCheckLocked' --- .../filehashstore/FileHashStore.java | 80 +++++++++---------- 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 4eb97d64..e75decb6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1724,27 +1724,11 @@ protected void unTagObject(String pid, String cid) // the reference files, which help us determine the path to proceed with. try { ObjectInfo objInfo = findObject(pid); - String cidRetrieved = objInfo.cid(); - FileHashStoreUtility.ensureNotNull(cidRetrieved, "cidRetrieved"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cidRetrieved, "cidRetrieved"); + String cidToCheck = objInfo.cid(); + FileHashStoreUtility.ensureNotNull(cidToCheck, "cidRetrieved"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cidToCheck, "cidRetrieved"); - // If the cid retrieved does not match, this untag request is invalid immediately - if (!cid.equals(cidRetrieved)) { - String errMsg = - "Cid retrieved: " + cidRetrieved + " does not match untag request for cid: " - + cid + " and pid: " + pid; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - - } else if (!objectLockedCids.contains(cid)) { - // If it matches, we must confirm that we are working on a cid that is locked - // If not, this means that this call is not thread safe. - // This `cid` will be released by the calling method. - String errMsg = - "Cannot untag cid: " + cid + " that is not currently locked (pid: " + pid + ")"; - logFileHashStore.error(errMsg); - throw new IdentifierNotLockedException(errMsg); - } + validateCidAndCheckLocked(pid, cid, cidToCheck); // Get paths to reference files to work on Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); @@ -1777,37 +1761,20 @@ protected void unTagObject(String pid, String cid) // - the pid is found in the cid refs file // - but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - FileHashStoreUtility.ensureNotNull(cidRead, "cidRead"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cidRead, "cidRead"); + String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); + FileHashStoreUtility.ensureNotNull(cidToCheck, "cidRead"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cidToCheck, "cidRead"); // TODO: Lots of repeated code with the basic scenario, see how to reduce code length - // If the cid retrieved does not match, this untag request is invalid immediately - if (!cid.equals(cidRead)) { - String errMsg = - "Orphan reference files found but data object does not exist. Cid read: " - + cidRead + " does not match untag request for cid: " + cid + " and pid: " - + pid; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - - } else if (!objectLockedCids.contains(cid)) { - // If it matches, we must confirm that we are working on a cid that is locked - // If not, this means that this call is not thread safe. - // This `cid` will be released by the calling method. - String errMsg = - "Cannot untag cid: " + cid + " that is not currently locked (pid: " + pid + ")"; - logFileHashStore.error(errMsg); - throw new IdentifierNotLockedException(errMsg); - } + validateCidAndCheckLocked(pid, cid, cidToCheck); addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + Path absCidRefsPath = getHashStoreRefsPath(cidToCheck, HashStoreIdTypes.cid); removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); - String warnMsg = "Object with cid: " + cidRead + String warnMsg = "Object with cid: " + cidToCheck + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; logFileHashStore.warn(warnMsg); @@ -1857,6 +1824,33 @@ protected void unTagObject(String pid, String cid) } } + /** + * Check that the supplied cid matches the cid retrieved/read before performing untagging + * + * @param pid Persistent identifier for logging + * @param cid Cid to confirm + * @param cidToCheck Cid that was retrieved or read + */ + private static void validateCidAndCheckLocked(String pid, String cid, String cidToCheck) { + // If the cid retrieved does not match, this untag request is invalid immediately + if (!cid.equals(cidToCheck)) { + String errMsg = + "Cid retrieved: " + cidToCheck + " does not match untag request for cid: " + + cid + " and pid: " + pid; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + + } else if (!objectLockedCids.contains(cid)) { + // If it matches, we must confirm that we are working on a cid that is locked + // If not, this means that this call is not thread safe. + // This `cid` will be released by the calling method. + String errMsg = + "Cannot untag cid: " + cid + " that is not currently locked (pid: " + pid + ")"; + logFileHashStore.error(errMsg); + throw new IdentifierNotLockedException(errMsg); + } + } + /** * Removes a pid from a given cid refs file (if it's found) and checks to see if the cid refs is * empty before renaming this file for deletion. From fb9a072eacb3451fd67946acb0077617fe8c7fca Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:16:39 -0700 Subject: [PATCH 545/553] Refactor 'unTagObject' to always confirm cid before proceeding and resolve todo items --- .../filehashstore/FileHashStore.java | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e75decb6..e1c34973 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1725,9 +1725,6 @@ protected void unTagObject(String pid, String cid) try { ObjectInfo objInfo = findObject(pid); String cidToCheck = objInfo.cid(); - FileHashStoreUtility.ensureNotNull(cidToCheck, "cidRetrieved"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cidToCheck, "cidRetrieved"); - validateCidAndCheckLocked(pid, cid, cidToCheck); // Get paths to reference files to work on @@ -1736,21 +1733,22 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); } catch (OrphanPidRefsFileException oprfe) { // `findObject` throws this exception when the cid refs file doesn't exist, // so we only need to delete the pid refs file (pid is already locked) - // TODO: Check that the cid found actually matches what has been provided - Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); + String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); + validateCidAndCheckLocked(pid, cid, cidToCheck); + // Begin deletion process + addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); @@ -1761,19 +1759,15 @@ protected void unTagObject(String pid, String cid) // - the pid is found in the cid refs file // - but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); - FileHashStoreUtility.ensureNotNull(cidToCheck, "cidRead"); - FileHashStoreUtility.checkForNotEmptyAndValidString(cidToCheck, "cidRead"); - // TODO: Lots of repeated code with the basic scenario, see how to reduce code length - validateCidAndCheckLocked(pid, cid, cidToCheck); + // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - - Path absCidRefsPath = getHashStoreRefsPath(cidToCheck, HashStoreIdTypes.cid); removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + String warnMsg = "Object with cid: " + cidToCheck + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; @@ -1782,13 +1776,14 @@ protected void unTagObject(String pid, String cid) } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists // but the pid is not found in the cid refs file (nothing to change here) - // TODO: Still need to check that the cid found matches - - // Only rename pid refs file for deletion Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); + String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); + validateCidAndCheckLocked(pid, cid, cidToCheck); + // Begin deletion process + addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; logFileHashStore.warn(warnMsg); @@ -1807,7 +1802,6 @@ protected void unTagObject(String pid, String cid) } Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - try { if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); @@ -1832,6 +1826,8 @@ protected void unTagObject(String pid, String cid) * @param cidToCheck Cid that was retrieved or read */ private static void validateCidAndCheckLocked(String pid, String cid, String cidToCheck) { + FileHashStoreUtility.ensureNotNull(cidToCheck, "cidToCheck"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cidToCheck, "cidToCheck"); // If the cid retrieved does not match, this untag request is invalid immediately if (!cid.equals(cidToCheck)) { String errMsg = From c2be8e8d73fcd0d5caf32c6cccc9c69de6a60227 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:31:06 -0700 Subject: [PATCH 546/553] Refactor 'unTagObject' further by extracting relevant code to 'removePidFromCidRefsAndDetermineDeletion' --- .../filehashstore/FileHashStore.java | 37 +++++++------------ 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index e1c34973..052db151 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1727,13 +1727,11 @@ protected void unTagObject(String pid, String cid) String cidToCheck = objInfo.cid(); validateCidAndCheckLocked(pid, cid, cidToCheck); - // Get paths to reference files to work on - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); + removePidFromCidRefsAndDetermineDeletion(pid, cid, deleteList); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); @@ -1759,13 +1757,12 @@ protected void unTagObject(String pid, String cid) // - the pid is found in the cid refs file // - but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); validateCidAndCheckLocked(pid, cid, cidToCheck); // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - removePidFromCidRefsAndDetermineDeletion(pid, deleteList, absCidRefsPath); + removePidFromCidRefsAndDetermineDeletion(pid, cid, deleteList); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String warnMsg = "Object with cid: " + cidToCheck @@ -1801,20 +1798,12 @@ protected void unTagObject(String pid, String cid) throw new IdentifierNotLockedException(errMsg); } - Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); - try { - if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { - updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); - String errMsg = "Pid refs file not found, removed pid found in cid refs file: " - + absCidRefsPath; - logFileHashStore.warn(errMsg); - } - } catch (Exception e) { - logFileHashStore.warn( - "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath - + "for request with pid: " + pid + " and cid: " + cid + ". " - + e.getMessage()); - } + removePidFromCidRefsAndDetermineDeletion(pid, cid, deleteList); + deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + + String errMsg = + "Pid refs file not found, removed pid from cid refs file for cid: " + cid; + logFileHashStore.warn(errMsg); } } @@ -1851,13 +1840,15 @@ private static void validateCidAndCheckLocked(String pid, String cid, String cid * Removes a pid from a given cid refs file (if it's found) and checks to see if the cid refs is * empty before renaming this file for deletion. * - * @param pid Persistent identifier - * @param deleteList If cid refs file needs to be deleted, list to add to - * @param absCidRefsPath Path of the cid refs file + * @param pid Persistent identifier + * @param cid Content Identifier + * @param deleteList If cid refs file needs to be deleted, list to add to */ private void removePidFromCidRefsAndDetermineDeletion( - String pid, Collection deleteList, Path absCidRefsPath) { + String pid, String cid, Collection deleteList) { + Path absCidRefsPath = null; try { + absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); From 93df442c018deb231d616595a786417a0ba75a17 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:35:57 -0700 Subject: [PATCH 547/553] Rename 'removePidFromCidRefsAndDetermineDeletion' method to 'removePidAndHandleCidDeletion' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 052db151..db10c04b 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1731,7 +1731,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - removePidFromCidRefsAndDetermineDeletion(pid, cid, deleteList); + removePidAndHandleCidDeletion(pid, cid, deleteList); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); @@ -1762,7 +1762,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - removePidFromCidRefsAndDetermineDeletion(pid, cid, deleteList); + removePidAndHandleCidDeletion(pid, cid, deleteList); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String warnMsg = "Object with cid: " + cidToCheck @@ -1798,7 +1798,7 @@ protected void unTagObject(String pid, String cid) throw new IdentifierNotLockedException(errMsg); } - removePidFromCidRefsAndDetermineDeletion(pid, cid, deleteList); + removePidAndHandleCidDeletion(pid, cid, deleteList); deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); String errMsg = @@ -1844,7 +1844,7 @@ private static void validateCidAndCheckLocked(String pid, String cid, String cid * @param cid Content Identifier * @param deleteList If cid refs file needs to be deleted, list to add to */ - private void removePidFromCidRefsAndDetermineDeletion( + private void removePidAndHandleCidDeletion( String pid, String cid, Collection deleteList) { Path absCidRefsPath = null; try { From a07b5b370e6376880a57435e07876b9df9cb8169 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:37:21 -0700 Subject: [PATCH 548/553] Rename 'deleteListOfFilesRenamedForDeletion' method to 'deleteMarkedFiles' --- .../hashstore/filehashstore/FileHashStore.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index db10c04b..eeabeeea 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1732,7 +1732,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); removePidAndHandleCidDeletion(pid, cid, deleteList); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + deleteMarkedFiles(pid, cid, deleteList); logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); @@ -1745,7 +1745,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + deleteMarkedFiles(pid, cid, deleteList); String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1763,7 +1763,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); removePidAndHandleCidDeletion(pid, cid, deleteList); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + deleteMarkedFiles(pid, cid, deleteList); String warnMsg = "Object with cid: " + cidToCheck + " does not exist, but pid and cid reference file found for pid: " + pid @@ -1779,7 +1779,7 @@ protected void unTagObject(String pid, String cid) // Begin deletion process addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + deleteMarkedFiles(pid, cid, deleteList); String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; @@ -1799,7 +1799,7 @@ protected void unTagObject(String pid, String cid) } removePidAndHandleCidDeletion(pid, cid, deleteList); - deleteListOfFilesRenamedForDeletion(pid, cid, deleteList); + deleteMarkedFiles(pid, cid, deleteList); String errMsg = "Pid refs file not found, removed pid from cid refs file for cid: " + cid; @@ -1871,7 +1871,7 @@ private void removePidAndHandleCidDeletion( * @param cid Content identifier, used for logging * @param deleteList List of file paths to delete */ - private static void deleteListOfFilesRenamedForDeletion( + private static void deleteMarkedFiles( String pid, String cid, Collection deleteList) { try { // Delete all related/relevant items with the least amount of delay From d1e280fc4b617f94e5ade080d39dfa6317d7a6fb Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:38:41 -0700 Subject: [PATCH 549/553] Rename 'addAndRenamePidRefsFileToDeleteList' method to 'markPidRefsFileForDeletion' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index eeabeeea..baf1fffc 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1730,7 +1730,7 @@ protected void unTagObject(String pid, String cid) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); // Begin deletion process - addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); + markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); removePidAndHandleCidDeletion(pid, cid, deleteList); deleteMarkedFiles(pid, cid, deleteList); @@ -1744,7 +1744,7 @@ protected void unTagObject(String pid, String cid) validateCidAndCheckLocked(pid, cid, cidToCheck); // Begin deletion process - addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); + markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); deleteMarkedFiles(pid, cid, deleteList); String warnMsg = "Cid refs file does not exist for pid: " + pid @@ -1761,7 +1761,7 @@ protected void unTagObject(String pid, String cid) validateCidAndCheckLocked(pid, cid, cidToCheck); // Begin deletion process - addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); + markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); removePidAndHandleCidDeletion(pid, cid, deleteList); deleteMarkedFiles(pid, cid, deleteList); @@ -1778,7 +1778,7 @@ protected void unTagObject(String pid, String cid) validateCidAndCheckLocked(pid, cid, cidToCheck); // Begin deletion process - addAndRenamePidRefsFileToDeleteList(pid, deleteList, absPidRefsPath); + markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); deleteMarkedFiles(pid, cid, deleteList); String warnMsg = "Pid not found in expected cid refs file for pid: " + pid @@ -1890,7 +1890,7 @@ private static void deleteMarkedFiles( * @param deleteList List to add renamed file * @param absPidRefsPath Path of file to rename for deletion */ - private static void addAndRenamePidRefsFileToDeleteList( + private static void markPidRefsFileForDeletion( String pid, Collection deleteList, Path absPidRefsPath) { try { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); From 9406648e73f44860afb3af6ce835b2d6a8b1e19f Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:40:05 -0700 Subject: [PATCH 550/553] Rename 'validateCidAndCheckLocked' method to 'validateAndCheckCidLock' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index baf1fffc..7dafdcd4 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1725,7 +1725,7 @@ protected void unTagObject(String pid, String cid) try { ObjectInfo objInfo = findObject(pid); String cidToCheck = objInfo.cid(); - validateCidAndCheckLocked(pid, cid, cidToCheck); + validateAndCheckCidLock(pid, cid, cidToCheck); Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); @@ -1741,7 +1741,7 @@ protected void unTagObject(String pid, String cid) // so we only need to delete the pid refs file (pid is already locked) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); - validateCidAndCheckLocked(pid, cid, cidToCheck); + validateAndCheckCidLock(pid, cid, cidToCheck); // Begin deletion process markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); @@ -1758,7 +1758,7 @@ protected void unTagObject(String pid, String cid) // - but the actual object being referenced by the pid does not exist Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); - validateCidAndCheckLocked(pid, cid, cidToCheck); + validateAndCheckCidLock(pid, cid, cidToCheck); // Begin deletion process markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); @@ -1775,7 +1775,7 @@ protected void unTagObject(String pid, String cid) // but the pid is not found in the cid refs file (nothing to change here) Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); String cidToCheck = new String(Files.readAllBytes(absPidRefsPath)); - validateCidAndCheckLocked(pid, cid, cidToCheck); + validateAndCheckCidLock(pid, cid, cidToCheck); // Begin deletion process markPidRefsFileForDeletion(pid, deleteList, absPidRefsPath); @@ -1814,7 +1814,7 @@ protected void unTagObject(String pid, String cid) * @param cid Cid to confirm * @param cidToCheck Cid that was retrieved or read */ - private static void validateCidAndCheckLocked(String pid, String cid, String cidToCheck) { + private static void validateAndCheckCidLock(String pid, String cid, String cidToCheck) { FileHashStoreUtility.ensureNotNull(cidToCheck, "cidToCheck"); FileHashStoreUtility.checkForNotEmptyAndValidString(cidToCheck, "cidToCheck"); // If the cid retrieved does not match, this untag request is invalid immediately From 28cc6a5dbc29b110da2b57447fd8b30b7765310d Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 14:59:51 -0700 Subject: [PATCH 551/553] Change warning log levels to error level for 'unTagObject' related methods --- .../hashstore/filehashstore/FileHashStore.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 7dafdcd4..738c2044 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1749,7 +1749,7 @@ protected void unTagObject(String pid, String cid) String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; - logFileHashStore.warn(warnMsg); + logFileHashStore.error(warnMsg); } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when: @@ -1768,7 +1768,7 @@ protected void unTagObject(String pid, String cid) String warnMsg = "Object with cid: " + cidToCheck + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; - logFileHashStore.warn(warnMsg); + logFileHashStore.error(warnMsg); } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -1783,7 +1783,7 @@ protected void unTagObject(String pid, String cid) String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; - logFileHashStore.warn(warnMsg); + logFileHashStore.error(warnMsg); } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found @@ -1803,7 +1803,7 @@ protected void unTagObject(String pid, String cid) String errMsg = "Pid refs file not found, removed pid from cid refs file for cid: " + cid; - logFileHashStore.warn(errMsg); + logFileHashStore.error(errMsg); } } @@ -1855,10 +1855,10 @@ private void removePidAndHandleCidDeletion( } else { String warnMsg = "Cid referenced by pid: " + pid + " is not empty (refs exist for cid). Skipping object " + "deletion."; - logFileHashStore.warn(warnMsg); + logFileHashStore.error(warnMsg); } } catch (Exception e) { - logFileHashStore.warn( + logFileHashStore.error( "Unable to remove pid: " + pid + " from cid refs file: " + absCidRefsPath + ". " + e.getMessage()); } @@ -1877,7 +1877,7 @@ private static void deleteMarkedFiles( // Delete all related/relevant items with the least amount of delay FileHashStoreUtility.deleteListItems(deleteList); } catch (Exception e) { - logFileHashStore.warn("Unable to delete list of refs files marked for deletion " + logFileHashStore.error("Unable to delete list of refs files marked for deletion " + "for request with pid: " + pid + " and cid: " + cid + ". " + e.getMessage()); } @@ -1896,7 +1896,7 @@ private static void markPidRefsFileForDeletion( deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); } catch (Exception e) { - logFileHashStore.warn( + logFileHashStore.error( "Unable to delete pid refs file: " + absPidRefsPath + " for pid: " + pid + "." + " " + e.getMessage()); } From eea90de587cf25024ad8ed191886b283b3f02934 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 15:21:56 -0700 Subject: [PATCH 552/553] Further revise logging levels for 'unTagObject' --- .../dataone/hashstore/filehashstore/FileHashStore.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 738c2044..343106d6 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1749,7 +1749,7 @@ protected void unTagObject(String pid, String cid) String warnMsg = "Cid refs file does not exist for pid: " + pid + ". Deleted orphan pid refs file."; - logFileHashStore.error(warnMsg); + logFileHashStore.warn(warnMsg); } catch (OrphanRefsFilesException orfe) { // `findObject` throws this exception when: @@ -1768,7 +1768,7 @@ protected void unTagObject(String pid, String cid) String warnMsg = "Object with cid: " + cidToCheck + " does not exist, but pid and cid reference file found for pid: " + pid + ". Deleted pid and cid ref files."; - logFileHashStore.error(warnMsg); + logFileHashStore.warn(warnMsg); } catch (PidNotFoundInCidRefsFileException pnficrfe) { // `findObject` throws this exception when both the pid and cid refs file exists @@ -1783,7 +1783,7 @@ protected void unTagObject(String pid, String cid) String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + ". Deleted orphan pid refs file."; - logFileHashStore.error(warnMsg); + logFileHashStore.warn(warnMsg); } catch (PidRefsFileNotFoundException prfnfe) { // `findObject` throws this exception if the pid refs file is not found @@ -1803,7 +1803,7 @@ protected void unTagObject(String pid, String cid) String errMsg = "Pid refs file not found, removed pid from cid refs file for cid: " + cid; - logFileHashStore.error(errMsg); + logFileHashStore.warn(errMsg); } } @@ -1855,7 +1855,7 @@ private void removePidAndHandleCidDeletion( } else { String warnMsg = "Cid referenced by pid: " + pid + " is not empty (refs exist for cid). Skipping object " + "deletion."; - logFileHashStore.error(warnMsg); + logFileHashStore.info(warnMsg); } } catch (Exception e) { logFileHashStore.error( From 4d343cb1a1d5f5c02df06ccd10fcd14ff3541c29 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 5 Sep 2024 15:28:55 -0700 Subject: [PATCH 553/553] Revise inaccurate variable name --- .../org/dataone/hashstore/filehashstore/FileHashStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 343106d6..cc293616 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -1853,9 +1853,9 @@ private void removePidAndHandleCidDeletion( if (Files.size(absCidRefsPath) == 0) { deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); } else { - String warnMsg = "Cid referenced by pid: " + pid + String infoMsg = "Cid referenced by pid: " + pid + " is not empty (refs exist for cid). Skipping object " + "deletion."; - logFileHashStore.info(warnMsg); + logFileHashStore.info(infoMsg); } } catch (Exception e) { logFileHashStore.error(