From ff7135a67d9a857543ae09f9dececcc1562ca009 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Thu, 29 Jun 2023 14:06:06 -0700 Subject: [PATCH] Add public API method 'getHexDigest()' plus new junit tests and clean up code --- .../org/dataone/hashstore/HashAddress.java | 10 +- .../java/org/dataone/hashstore/HashStore.java | 22 ++- .../dataone/hashstore/HashStoreFactory.java | 4 +- .../filehashstore/FileHashStore.java | 64 ++++++- .../FileHashStoreInterfaceTest.java | 172 ++++++++++-------- 5 files changed, 177 insertions(+), 95 deletions(-) diff --git a/src/main/java/org/dataone/hashstore/HashAddress.java b/src/main/java/org/dataone/hashstore/HashAddress.java index 095f0e2d..fdd6de70 100644 --- a/src/main/java/org/dataone/hashstore/HashAddress.java +++ b/src/main/java/org/dataone/hashstore/HashAddress.java @@ -11,11 +11,11 @@ * know the underlying file system details. */ public class HashAddress { - private String id; - private String relPath; - private Path absPath; - private boolean isDuplicate; - private Map hexDigests; + private final String id; + private final String relPath; + private final Path absPath; + private final boolean isDuplicate; + private final Map hexDigests; /** * Creates a new instance of HashAddress with the given properties. diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index d3463bd6..dbc0105c 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -105,7 +105,7 @@ String storeMetadata(InputStream metadata, String pid, String formatId) * @throws FileNotFoundException When requested pid has no associated object * @throws IOException I/O error when creating InputStream to * object - * @throws NoSuchAlgorithmException When algorithm used to calcualte object + * @throws NoSuchAlgorithmException When algorithm used to calculate object * address is not supported */ InputStream retrieveObject(String pid) @@ -123,21 +123,21 @@ InputStream retrieveObject(String pid) * associated object * @throws IOException I/O error when creating InputStream to * metadata - * @throws NoSuchAlgorithmException When algorithm used to calcualte metadata + * @throws NoSuchAlgorithmException When algorithm used to calculate metadata * address is not supported */ InputStream retrieveMetadata(String pid, String formatId) throws Exception; /** * The 'deleteObject' method deletes an object permanently from disk using a - * given persistent identifier and any empty subdirecetories. + * given persistent identifier and any empty subdirectories. * * @param pid Authority-based identifier - * @return + * @return True if successful * @throws IllegalArgumentException When pid is null or empty * @throws FileNotFoundException When requested pid has no associated object * @throws IOException I/O error when deleting empty directories - * @throws NoSuchAlgorithmException When algorithm used to calcualte object + * @throws NoSuchAlgorithmException When algorithm used to calculate object * address is not supported */ boolean deleteObject(String pid) throws Exception; @@ -149,11 +149,11 @@ InputStream retrieveObject(String pid) * * @param pid Authority-based identifier * @param formatId Metadata namespace/format - * @return + * @return True if successfulÏ * @throws IllegalArgumentException When pid or formatId is null or empty * @throws FileNotFoundException When requested pid has no metadata * @throws IOException I/O error when deleting empty directories - * @throws NoSuchAlgorithmException When algorithm used to calcualte object + * @throws NoSuchAlgorithmException When algorithm used to calculate object * address is not supported */ boolean deleteMetadata(String pid, String formatId) throws Exception; @@ -164,8 +164,12 @@ InputStream retrieveObject(String pid) * * @param pid Authority-based identifier * @param algorithm Algorithm of desired hex digest - * @return - * @throws Exception TODO: Add specific exceptions + * @return String hex digest of requested pid + * @throws IllegalArgumentException When pid or formatId is null or empty + * @throws FileNotFoundException When requested pid object does not exist + * @throws IOException I/O error when calculating hex digests + * @throws NoSuchAlgorithmException When algorithm used to calculate object + * address is not supported */ String getHexDigest(String pid, String algorithm) throws Exception; } diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index adea9e7e..4b106336 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -26,8 +26,8 @@ public class HashStoreFactory { * (int) * storeAlgorithm * - * @return - * @throws HashStoreFactoryException When HashStore fails to initialize due to + * @return HashStore instance ready to store objects and metadata + * @throws HashStoreFactoryException When HashStore failÏs to initialize due to * permissions or class-related issues * @throws IOException When there is an issue with properties */ diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index 2c3bac45..1d6d371f 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -302,7 +302,7 @@ protected void checkConfigurationEquality(String propertyName, Object suppliedVa * @param storeWidth Width of store * @param storeAlgorithm Algorithm to use to calculate the hex digest * for the - * permanent address of a data sobject + * permanent address of a data object * @param storeMetadataNamespace default formatId of hashstore metadata * @return String that representing the contents of 'hashstore.yaml' */ @@ -533,7 +533,7 @@ public InputStream retrieveObject(String pid) } // Get permanent address of the pid by calculating its sha-256 hex digest - String objectCid = this.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String objectCid = this.getPidHexDigest(pid, this.OBJECT_STORE_ALGORITHM); String objShardString = this.getHierarchicalPathString(this.DIRECTORY_DEPTH, this.DIRECTORY_WIDTH, objectCid); Path objHashAddressPath = this.OBJECT_STORE_DIRECTORY.resolve(objShardString); @@ -579,7 +579,7 @@ public InputStream retrieveMetadata(String pid, String formatId) throws Exceptio } // Get permanent address of the pid by calculating its sha-256 hex digest - String metadataCid = this.getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); + String metadataCid = this.getPidHexDigest(pid + formatId, this.OBJECT_STORE_ALGORITHM); String metadataShardString = this.getHierarchicalPathString(this.DIRECTORY_DEPTH, this.DIRECTORY_WIDTH, metadataCid); Path metadataHashAddressPath = this.METADATA_STORE_DIRECTORY.resolve(metadataShardString); @@ -620,7 +620,7 @@ public boolean deleteObject(String pid) } // Get permanent address of the pid by calculating its sha-256 hex digest - String objectCid = this.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String objectCid = this.getPidHexDigest(pid, this.OBJECT_STORE_ALGORITHM); String objShardString = this.getHierarchicalPathString(this.DIRECTORY_DEPTH, this.DIRECTORY_WIDTH, objectCid); Path objHashAddressPath = this.OBJECT_STORE_DIRECTORY.resolve(objShardString); @@ -673,7 +673,7 @@ public boolean deleteMetadata(String pid, String formatId) } // Get permanent address of the pid by calculating its sha-256 hex digest - String metadataCid = this.getPidHexDigest(pid + formatId, OBJECT_STORE_ALGORITHM); + String metadataCid = this.getPidHexDigest(pid + formatId, this.OBJECT_STORE_ALGORITHM); String metadataCidShardString = this.getHierarchicalPathString(this.DIRECTORY_DEPTH, this.DIRECTORY_WIDTH, metadataCid); Path metadataCidPath = this.METADATA_STORE_DIRECTORY.resolve(metadataCidShardString); @@ -710,9 +710,57 @@ public boolean deleteMetadata(String pid, String formatId) } @Override - public String getHexDigest(String pid, String algorithm) throws Exception { - // TODO: Implement method - return null; + public String getHexDigest(String pid, String algorithm) + throws NoSuchAlgorithmException, FileNotFoundException, IOException { + logFileHashStore.debug("FileHashStore.getHexDigest - Called to calculate hex digest for pid: " + pid); + + if (pid == null || pid.trim().isEmpty()) { + String errMsg = "FileHashStore.getHexDigest - pid cannot be null or empty, pid: " + pid; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + this.validateAlgorithm(algorithm); + + // Get permanent address of the pid by calculating its sha-256 hex digest + String objectCid = this.getPidHexDigest(pid, this.OBJECT_STORE_ALGORITHM); + String objShardString = this.getHierarchicalPathString(this.DIRECTORY_DEPTH, this.DIRECTORY_WIDTH, + objectCid); + Path objHashAddressPath = this.OBJECT_STORE_DIRECTORY.resolve(objShardString); + + // Check to see if object exists + if (!Files.exists(objHashAddressPath)) { + String errMsg = "FileHashStore.getHexDigest - File does not exist for pid: " + pid + + " with object address: " + objHashAddressPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } + + // If so, calculate hex digest/checksum + MessageDigest mdObject = MessageDigest.getInstance(algorithm); + try { + InputStream dataStream = Files.newInputStream(objHashAddressPath); + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = dataStream.read(buffer)) != -1) { + mdObject.update(buffer, 0, bytesRead); + + } + // Close datastream + dataStream.close(); + + } catch (IOException ioe) { + String errMsg = "FileHashStore.getHexDigest - Unexpected IOException encountered: " + + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw ioe; + + } + + String mdObjectHexDigest = DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); + logFileHashStore + .info("FileHashStore.getHexDigest - Hex digest calculated for pid: " + pid + ", with hex digest value: " + + mdObjectHexDigest); + return mdObjectHexDigest; } /** diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index 4b84d5d2..72b4e6dc 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -664,12 +664,8 @@ public void retrieveObject() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveObject_pidNull() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveObject(null); - pidInputStream.close(); - } catch (Exception e) { - throw e; - } + InputStream pidInputStream = fileHashStore.retrieveObject(null); + pidInputStream.close(); } /** @@ -677,12 +673,8 @@ public void retrieveObject_pidNull() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveObject_pidEmpty() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveObject(""); - pidInputStream.close(); - } catch (Exception e) { - throw e; - } + InputStream pidInputStream = fileHashStore.retrieveObject(""); + pidInputStream.close(); } /** @@ -690,12 +682,8 @@ public void retrieveObject_pidEmpty() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveObject_pidEmptySpaces() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveObject(" "); - pidInputStream.close(); - } catch (Exception e) { - throw e; - } + InputStream pidInputStream = fileHashStore.retrieveObject(" "); + pidInputStream.close(); } /** @@ -703,12 +691,8 @@ public void retrieveObject_pidEmptySpaces() throws Exception { */ @Test(expected = FileNotFoundException.class) public void retrieveObject_pidNotFound() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveObject("dou.2023.hs.1"); - pidInputStream.close(); - } catch (Exception e) { - throw e; - } + InputStream pidInputStream = fileHashStore.retrieveObject("dou.2023.hs.1"); + pidInputStream.close(); } /** @@ -781,14 +765,10 @@ public void retrieveMetadata() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveMetadata_pidNull() throws Exception { - try { - String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata(null, storeFormatId); - pidInputStream.close(); + String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata(null, storeFormatId); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -796,14 +776,10 @@ public void retrieveMetadata_pidNull() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveMetadata_pidEmpty() throws Exception { - try { - String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata("", storeFormatId); - pidInputStream.close(); + String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata("", storeFormatId); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -811,14 +787,10 @@ public void retrieveMetadata_pidEmpty() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveMetadata_pidEmptySpaces() throws Exception { - try { - String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata(" ", storeFormatId); - pidInputStream.close(); + String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata(" ", storeFormatId); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -826,13 +798,9 @@ public void retrieveMetadata_pidEmptySpaces() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveMetadata_formatNull() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", null); - pidInputStream.close(); + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", null); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -840,13 +808,9 @@ public void retrieveMetadata_formatNull() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveMetadata_formatEmpty() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", ""); - pidInputStream.close(); + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", ""); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -854,13 +818,9 @@ public void retrieveMetadata_formatEmpty() throws Exception { */ @Test(expected = IllegalArgumentException.class) public void retrieveMetadata_formatEmptySpaces() throws Exception { - try { - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", " "); - pidInputStream.close(); + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", " "); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -868,14 +828,10 @@ public void retrieveMetadata_formatEmptySpaces() throws Exception { */ @Test(expected = FileNotFoundException.class) public void retrieveMetadata_pidNotFound() throws Exception { - try { - String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", storeFormatId); - pidInputStream.close(); + String storeFormatId = (String) this.fhsProperties.get("storeMetadataNamespace"); + InputStream pidInputStream = fileHashStore.retrieveMetadata("dou.2023.hs.1", storeFormatId); + pidInputStream.close(); - } catch (Exception e) { - throw e; - } } /** @@ -927,7 +883,7 @@ public void retrieveMetadata_verifyContent() throws Exception { } /** - * Confirm that deleteObject deletes object and empty sub directories + * Confirm that deleteObject deletes object and empty subdirectories */ @Test public void deleteObject() throws Exception { @@ -1080,4 +1036,78 @@ public void deleteMetadata_formatIdEmptySpaces() throws Exception { String pid = "dou.2023.hashstore.1"; fileHashStore.deleteMetadata(pid, " "); } + + /** + * Confirm correct checksum/hex digest returned + */ + @Test + public void getHexDigest() throws Exception { + for (String pid : testData.pidList) { + // Store file first + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + HashAddress objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null); + + // Then get the checksum + String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); + String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + String objSha256Checksum = objInfo.getHexDigests().get("SHA-256"); + assertEquals(pidHexDigest, sha256DigestFromTestData); + assertEquals(pidHexDigest, objSha256Checksum); + } + } + + /** + * Confirm getHexDigest throws exception when file is not found + */ + @Test(expected = FileNotFoundException.class) + public void getHexDigest_pidNotFound() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + fileHashStore.getHexDigest(pidFormatted, "SHA-256"); + } + } + + /** + * Confirm getHexDigest throws exception when file is not found + */ + @Test(expected = IllegalArgumentException.class) + public void getHexDigest_pidNull() throws Exception { + fileHashStore.getHexDigest(null, "SHA-256"); + } + + /** + * Confirm getHexDigest throws exception when file is not found + */ + @Test(expected = IllegalArgumentException.class) + public void getHexDigest_pidEmpty() throws Exception { + fileHashStore.getHexDigest("", "SHA-256"); + } + + /** + * Confirm getHexDigest throws exception when file is not found + */ + @Test(expected = IllegalArgumentException.class) + public void getHexDigest_pidEmptySpaces() throws Exception { + fileHashStore.getHexDigest(" ", "SHA-256"); + } + + /** + * Confirm getHexDigest throws exception when unsupported algorithm supplied + */ + @Test(expected = NoSuchAlgorithmException.class) + public void getHexDigest_badAlgo() throws Exception { + for (String pid : testData.pidList) { + // Store object first + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + InputStream dataStream = Files.newInputStream(testDataFile); + fileHashStore.storeObject(dataStream, pid, null, null, null); + + fileHashStore.getHexDigest(pid, "BLAKE2S"); + } + } }