diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 253c70fc..3941404b 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -21,10 +21,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: - java-version: '11' + java-version: '17' distribution: 'temurin' cache: maven - name: Build with Maven diff --git a/.idea/.gitignore b/.idea/.gitignore index 26d33521..4de73123 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -1,3 +1,4 @@ # Default ignored files /shelf/ /workspace.xml +/inspectionProfiles diff --git a/.idea/misc.xml b/.idea/misc.xml index d5cd6143..67e1e611 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -8,5 +8,5 @@ - + \ No newline at end of file diff --git a/README.md b/README.md index 3cac7b18..82ccfd02 100644 --- a/README.md +++ b/README.md @@ -7,36 +7,55 @@ - Contact us: support@dataone.org - [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) -HashStore is a server-side java library implementing a content-based identifier file system for storing and accessing data and metadata for DataONE services. The package is used in DataONE system components that need direct, filesystem-based access to data objects, their system metadata, and extended metadata about the objects. This package is a core component of the [DataONE federation](https://dataone.org), and supports large-scale object storage for a variety of repositories, including the [KNB Data Repository](http://knb.ecoinformatics.org), the [NSF Arctic Data Center](https://arcticdata.io/catalog/), the [DataONE search service](https://search.dataone.org), and other repositories. - -DataONE in general, and HashStore in particular, are open source, community projects. We [welcome contributions](https://github.com/DataONEorg/hashstore-java/blob/main/CONTRIBUTING.md) in many forms, including code, graphics, documentation, bug reports, testing, etc. Use the [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) to discuss these contributions with us. +HashStore is a server-side java library that implements an object storage file system for storing +and accessing data and metadata for DataONE services. The package is used in DataONE system +components that need direct, filesystem-based access to data objects, their system metadata, and +extended metadata about the objects. This package is a core component of +the [DataONE federation](https://dataone.org), and supports large-scale object storage for a variety +of repositories, including the [KNB Data Repository](http://knb.ecoinformatics.org), +the [NSF Arctic Data Center](https://arcticdata.io/catalog/), +the [DataONE search service](https://search.dataone.org), and other repositories. + +DataONE in general, and HashStore in particular, are open source, community projects. +We [welcome contributions](https://github.com/DataONEorg/hashstore-java/blob/main/CONTRIBUTING.md) +in many forms, including code, graphics, documentation, bug reports, testing, etc. Use +the [DataONE discussions](https://github.com/DataONEorg/dataone/discussions) to discuss these +contributions with us. ## Documentation -Documentation is a work in progress, and can be found on the [Metacat repository](https://github.com/NCEAS/metacat/blob/feature-1436-storage-and-indexing/docs/user/metacat/source/storage-subsystem.rst#physical-file-layout) as part of the storage redesign planning. Future updates will include documentation here as the package matures. +Documentation is a work in progress, and can be found on +the [Metacat repository](https://github.com/NCEAS/metacat/blob/feature-1436-storage-and-indexing/docs/user/metacat/source/storage-subsystem.rst#physical-file-layout) +as part of the storage redesign planning. Future updates will include documentation here as the +package matures. ## HashStore Overview -HashStore is a content-addressable file management system that utilizes the content identifier of an object to address files. The system stores both objects, references (refs) and metadata in its respective directories and provides an API for interacting with the store. HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected usage of HashStore. +HashStore is an object storage system that provides persistent file-based storage using content +hashes to de-duplicate data. The system stores both objects, references (refs) and metadata in its +respective directories and utilizes an identifier-based API for interacting with the store. +HashStore storage classes (like `FileHashStore`) must implement the HashStore interface to ensure +the expected usage of HashStore. + +### Public API Methods -###### Public API Methods - storeObject -- verifyObject - tagObject -- findObject - storeMetadata - retrieveObject - retrieveMetadata - deleteObject +- deleteInvalidObject - deleteMetadata - getHexDigest -For details, please see the HashStore interface (HashStore.java) +For details, please see the HashStore interface [HashStore.java](https://github.com/DataONEorg/hashstore-java/blob/main/src/main/java/org/dataone/hashstore/HashStore.java) +### How do I create a HashStore? -###### How do I create a HashStore? +To create or interact with a HashStore, instantiate a HashStore object with the following set of +properties: -To create or interact with a HashStore, instantiate a HashStore object with the following set of properties: - storePath - storeDepth - storeWidth @@ -53,112 +72,176 @@ storeProperties.setProperty("storeDepth", "3"); storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata" ); // Instantiate a HashStore HashStore hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); // Store an object -hashStore.storeObject(stream, pid) +hashStore.storeObject(stream, pid); // ... ``` +### What does HashStore look like? -###### Working with objects (store, retrieve, delete) +```sh +# Example layout in HashStore with a single file stored along with its metadata and reference files. +# This uses a store depth of 3 (number of nested levels/directories - e.g. '/4d/19/81/' within +# 'objects', see below), with a width of 2 (number of characters used in directory name - e.g. "4d", +# "19" etc.) and "SHA-256" as its default store algorithm +## Notes: +## - Objects are stored using their content identifier as the file address +## - The reference file for each pid contains a single cid +## - The reference file for each cid contains multiple pids each on its own line +## - There are two metadata docs under the metadata directory for the pid (sysmeta, annotations) + +.../metacat/hashstore +├── hashstore.yaml +└── objects +| └── 4d +| └── 19 +| └── 81 +| └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c +└── metadata +| └── 0d +| └── 55 +| └── 55 +| └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e +| └── 323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7 +| └── sha256(pid+formatId_annotations) +└── refs + ├── cids + | └── 4d + | └── 19 + | └── 81 + | └── 71eef969d553d4c9537b1811a7b078f9a3804fc978a761bc014c05972c + └── pids + └── 0d + └── 55 + └── 55 + └── 5ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e +``` -In HashStore, objects are first saved as temporary files while their content identifiers are calculated. Once the default hash algorithm list and their hashes are generated, objects are stored in their permanent location using the store's algorithm's corresponding hash value, the store depth and the store width. Lastly, reference files are created for the object so that they can be found and retrieved given an identifier (ex. persistent identifier (pid)). Note: Objects are also stored once and only once. +### Working with objects (store, retrieve, delete) + +In HashStore, objects are first saved as temporary files while their content identifiers are +calculated. Once the default hash algorithm list and their hashes are generated, objects are stored +in their permanent location using the store's algorithm's corresponding hash value, the store depth +and the store width. Lastly, objects are 'tagged' with a given identifier (ex. persistent +identifier (pid)). This process produces reference files, which allow objects to be found and +retrieved with a given identifier. +- Note 1: An identifier can only be used once +- Note 2: Each object is stored once and only once using its content identifier (a checksum generated + from using a hashing algorithm). Clients that attempt to store duplicate objects will receive + the expected ObjectMetadata - with HashStore handling the de-duplication process under the hood. + +By calling the various interface methods for `storeObject`, the calling app/client can validate, +store and tag an object simultaneously if the relevant data is available. In the absence of an +identifier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. +The client is then expected to call `deleteIfInvalidObject` when the relevant metadata is available to +confirm that the object is what is expected. And to finalize the process (to make the object +discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an +object: -By calling the various interface methods for `storeObject`, the calling app/client can validate, store and tag an object simultaneously if the relevant data is available. In the absence of an identfiier (ex. persistent identifier (pid)), `storeObject` can be called to solely store an object. The client is then expected to call `verifyObject` when the relevant metadata is available to confirm that the object has been stored as expected. And to finalize the process (to make the object discoverable), the client calls `tagObject``. In summary, there are two expected paths to store an object: ```java // All-in-one process which stores, validates and tags an object -objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize) +objectMetadata objInfo = storeObject(InputStream, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize); // Manual Process // Store object -objectMetadata objInfo = storeObject(InputStream) -// Validate object, throws exceptions if there is a mismatch and deletes the associated file -verifyObject(objInfo, checksum, checksumAlgorithn, objSize) +objectMetadata objInfo = storeObject(InputStream); +// Validate object, if the parameters do not match, the data object associated with the objInfo +// supplied will be deleted +deleteIfInvalidObject(objInfo, checksum, checksumAlgorithn, objSize); // Tag object, makes the object discoverable (find, retrieve, delete) -tagObject(pid, cid) +tagObject(pid, cid); ``` **How do I retrieve an object if I have the pid?** -- To retrieve an object, call the Public API method `retrieveObject` which opens a stream to the object if it exists. -**How do I find an object or check that it exists if I have the pid?** -- To find the location of the object, call the Public API method `findObject` which will return the content identifier (cid) of the object. -- This cid can then be used to locate the object on disk by following HashStore's store configuration. +- To retrieve an object, call the Public API method `retrieveObject` which opens a stream to the + object if it exists. **How do I delete an object if I have the pid?** -- To delete an object, call the Public API method `deleteObject` which will delete the object and its associated references and reference files where relevant. -- Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so that the shared reference files are not unintentionally modified concurrently. An object that is in the process of being deleted should not be tagged, and vice versa. These calls have been implemented to occur sequentially to improve clarity in the event of an unexpected conflict or issue. - - -###### Working with metadata (store, retrieve, delete) -HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. To differentiate between metadata documents for a given object, HashStore includes the 'formatId' (format or namespace of the metadata) when generating the address of the metadata document to store (the hash of the 'pid' + 'formatId'). By default, calling `storeMetadata` will use HashStore's default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to store multiple metadata files about an object, the client app is expected to provide a 'formatId' that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). +- To delete an object, all its associated reference files and its metadata, call the Public API + method `deleteObject()` with `idType` 'pid'. If an `idType` is not given (ex. + calling `deleteObject(String pid)`), the `idType` will be assumed to be a 'pid' +- To delete only an object, call `deleteObject()` with `idType` 'cid' which will remove the object + if it is not referenced by any pids. +- Note, `deleteObject` and `tagObject` calls are synchronized on their content identifier values so + that the shared reference files are not unintentionally modified concurrently. An object that is + in the process of being deleted should not be tagged, and vice versa. These calls have been + implemented to occur sequentially to improve clarity in the event of an unexpected conflict or + issue. + +### Working with metadata (store, retrieve, delete) + +HashStore's '/metadata' directory holds all metadata for objects stored in HashStore. All metadata +documents related to a 'pid' are stored in a directory determined by calculating the hash of the +pid (based on the store's algorithm). Each specific metadata document is then stored by calculating +the hash of its associated `pid+formatId`. By default, calling `storeMetadata` will use HashStore's +default metadata namespace as the 'formatId' when storing metadata. Should the calling app wish to +store multiple metadata files about an object, the client app is expected to provide a 'formatId' +that represents an object format for the metadata type (ex. `storeMetadata(stream, pid, formatId)`). **How do I retrieve a metadata file?** -- To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to the metadata file that's been stored with the default metadata namespace if it exists. -- If there are multiple metadata objects, a 'formatId' must be specified when calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) + +- To find a metadata object, call the Public API method `retrieveMetadata` which returns a stream to + the metadata file that's been stored with the default metadata namespace if it exists. +- If there are multiple metadata objects, a 'formatId' must be specified when + calling `retrieveMetadata` (ex. `retrieveMetadata(pid, formatId)`) **How do I delete a metadata file?** -- Like `retrieveMetadata`, call the Public API method `deleteMetadata` which will delete the metadata object associated with the given pid. -- If there are multiple metadata objects, a 'formatId' must be specified when calling `deleteMetadata` to ensure the expected metadata object is deleted. +- Like `retrieveMetadata`, call the Public API method `deleteMetadata(String pid, String formatId)` + which will delete the metadata object associated with the given pid. +- To delete all metadata objects related to a given 'pid', call `deleteMetadata(String pid)` + +### What are HashStore reference files? -###### What are HashStore reference files? +HashStore assumes that every object to store has a respective identifier. This identifier is then +used when storing, retrieving and deleting an object. In order to facilitate this process, we create +two types of reference files: -HashStore assumes that every object to store has a respective identifier. This identifier is then used when storing, retrieving and deleting an object. In order to facilitate this process, we create two types of reference files: -- pid (persistent identifier) reference files +- pid (persistent identifier) reference files - cid (content identifier) reference files -These reference files are implemented in HashStore underneath the hood with no expectation for modification from the calling app/client. The one and only exception to this process when the calling client/app does not have an identifier, and solely stores an objects raw bytes in HashStore (calling `storeObject(InputStream)`). +These reference files are implemented in HashStore underneath the hood with no expectation for +modification from the calling app/client. The one and only exception to this process is when the +calling client/app does not have an identifier, and solely stores an objects raw bytes in +HashStore (calling `storeObject(InputStream)`). **'pid' Reference Files** + - Pid (persistent identifier) reference files are created when storing an object with an identifier. - Pid reference files are located in HashStores '/refs/pid' directory -- If an identifier is not available at the time of storing an object, the calling app/client must create this association between a pid and the object it represents by calling `tagObject` separately. -- Each pid reference file contains a string that represents the content identifier of the object it references -- Like how objects are stored once and only once, there is also only one pid reference file for each object. +- If an identifier is not available at the time of storing an object, the calling app/client must + create this association between a pid and the object it represents by calling `tagObject` + separately. +- Each pid reference file contains a string that represents the content identifier of the object it + references +- Like how objects are stored once and only once, there is also only one pid reference file for each + object. **'cid' Reference Files** -- Cid (content identifier) reference files are created at the same time as pid reference files when storing an object with an identifier. -- Cid reference files are located in HashStore's '/refs/cid' directory -- A cid reference file is a list of all the pids that reference a cid, delimited by a new line ("\n") character - - -###### What does HashStore look like? - -``` -# Example layout in HashStore with a single file stored along with its metadata and reference files. -# This uses a store depth of 3, with a width of 2 and "SHA-256" as its default store algorithm -## Notes: -## - Objects are stored using their content identifier as the file address -## - The reference file for each pid contains a single cid -## - The reference file for each cid contains multiple pids each on its own line - -.../metacat/hashstore/ -└─ objects - └─ /d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 -└─ metadata - └─ /15/8d/7e/55c36a810d7c14479c9...b20d7df66768b04 -└─ refs - └─ pid/0d/55/5e/d77052d7e166017f779...7230bcf7abcef65e - └─ cid/d5/95/3b/d802fa74edea72eb941...00d154a727ed7c2 -hashstore.yaml -``` +- Cid (content identifier) reference files are created at the same time as pid reference files when + storing an object with an identifier. +- Cid reference files are located in HashStore's '/refs/cid' directory +- A cid reference file is a list of all the pids that reference a cid, delimited by a new line ("\n") + character -## Development build +## Development Build HashStore is a Java package, and built using the [Maven](https://maven.apache.org/) build tool. -To install `hashstore` locally, install Java and Maven on your local machine, +To install `HashStore-java` locally, install Java and Maven on your local machine, and then install or build the package with `mvn install` or `mvn package`, respectively. -We also maintain a parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). +We also maintain a +parallel [Python-based version of HashStore](https://github.com/DataONEorg/hashstore). ## HashStore HashStoreClient Usage @@ -168,38 +251,35 @@ We also maintain a parallel [Python-based version of HashStore](https://github.c $ mvn clean package -Dmaven.test.skip=true # Get help -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -h +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -h -# Step 2: +# Step 2: Determine where your hashstore should live (ex. `/var/hashstore`) ## Create a HashStore (long option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=http://ns.dataone.org/service/types/v2.0 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient --createhashstore --storepath=/path/to/store --storedepth=3 --storewidth=2 --storealgo=SHA-256 --storenamespace=https://ns.dataone.org/service/types/v2.0#SystemMetadata ## Create a HashStore (short option) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -chs -store /path/to/store -dp 3 -wp 2 -ap SHA-256 -nsp https://ns.dataone.org/service/types/v2.0#SystemMetadata # Get the checksum of a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 - -# Find an object in HashStore (returns its content identifer if it exists) -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -findobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -getchecksum -pid testpid1 -algo SHA-256 # Store a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storeobject -path /path/to/data.ext -pid testpid1 # Store a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -storemetadata -path /path/to/metadata.ext -pid testpid1 -format_id https://ns.dataone.org/service/types/v2.0#SystemMetadata # Retrieve a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrieveobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrieveobject -pid testpid1 # Retrieve a metadata object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -retrievemetadata -pid testpid1 -format_id https://ns.dataone.org/service/types/v2.0#SystemMetadata # Delete a data object -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deleteobject -pid testpid1 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deleteobject -pid testpid1 # Delete a metadata file -$ java -cp ./target/hashstore-1.0-SNAPSHOT.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id http://ns.dataone.org/service/types/v2 +$ java -cp ./target/hashstore-1.0-SNAPSHOT-shaded.jar org.dataone.hashstore.HashStoreClient -store /path/to/store -deletemetadata -pid testpid1 -format_id https://ns.dataone.org/service/types/v2.0#SystemMetadata ``` ## License @@ -225,9 +305,12 @@ limitations under the License. Work on this package was supported by: - DataONE Network -- Arctic Data Center: NSF-PLR grant #2042102 to M. B. Jones, A. Budden, M. Schildhauer, and J. Dozier +- Arctic Data Center: NSF-PLR grant #2042102 to M. B. Jones, A. Budden, M. Schildhauer, and J. + Dozier -Additional support was provided for collaboration by the National Center for Ecological Analysis and Synthesis, a Center funded by the University of California, Santa Barbara, and the State of California. +Additional support was provided for collaboration by the National Center for Ecological Analysis and +Synthesis, a Center funded by the University of California, Santa Barbara, and the State of +California. [![DataONE_footer](https://user-images.githubusercontent.com/6643222/162324180-b5cf0f5f-ae7a-4ca6-87c3-9733a2590634.png)](https://dataone.org) diff --git a/pom.xml b/pom.xml index 532e1c7b..f2dc363a 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ org.postgresql postgresql - 42.4.3 + 42.7.2 @@ -71,8 +71,8 @@ maven-compiler-plugin 3.8.1 - 8 - 8 + 17 + 17 @@ -86,6 +86,8 @@ shade + ${basedir}/target/${project.name}-${project.version}-shaded.jar + ${basedir} diff --git a/src/main/java/org/dataone/hashstore/HashStore.java b/src/main/java/org/dataone/hashstore/HashStore.java index 98b6dd5c..b374d94f 100644 --- a/src/main/java/org/dataone/hashstore/HashStore.java +++ b/src/main/java/org/dataone/hashstore/HashStore.java @@ -5,296 +5,264 @@ import java.io.InputStream; import java.security.NoSuchAlgorithmException; -import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; -import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; /** * HashStore is a content-addressable file management system that utilizes the content identifier of * an object to address files. The system stores both objects, references (refs) and metadata in its * respective directories and provides an API for interacting with the store. HashStore storage - * classes (like `FileHashStore`) must implement the HashStore interface to ensure the expected - * usage of the system. + * classes (like {@code FileHashStore}) must implement the HashStore interface to ensure the + * expected usage of the system. */ public interface HashStore { - /** - * The `storeObject` method is responsible for the atomic storage of objects to disk using a - * given InputStream. Upon successful storage, the method returns a (ObjectMetadata) object - * containing relevant file information, such as the file's id (which can be used to locate - * the object on disk), the file's size, and a hex digest dict of algorithms and checksums. - * Storing an object with `store_object` also tags an object (creating references) which - * allow the object to be discoverable. - * - * `storeObject` also ensures that an object is stored only once by synchronizing multiple - * calls and rejecting calls to store duplicate objects. Note, calling `storeObject` without - * a pid is a possibility, but should only store the object without tagging the object. It - * is then the caller's responsibility to finalize the process by calling `tagObject` after - * verifying the correct object is stored. - * - * The file's id is determined by calculating the object's content identifier based on the - * store's default algorithm, which is also used as the permanent address of the file. The - * file's identifier is then sharded using the store's configured depth and width, delimited - * by '/' and concatenated to produce the final permanent address and is stored in the - * `./[storePath]/objects/` directory. - * - * By default, the hex digest map includes the following hash algorithms: MD5, SHA-1, - * SHA-256, SHA-384, SHA-512 - which are the most commonly used algorithms in dataset - * submissions to DataONE and the Arctic Data Center. If an additional algorithm is - * provided, the `storeObject` method checks if it is supported and adds it to the hex - * digests dict along with its corresponding hex digest. An algorithm is considered - * "supported" if it is recognized as a valid hash algorithm in - * `java.security.MessageDigest` class. - * - * Similarly, if a file size and/or checksum & checksumAlgorithm value are provided, - * `storeObject` validates the object to ensure it matches the given arguments before moving - * the file to its permanent address. - * - * @param object Input stream to file - * @param pid Authority-based identifier - * @param additionalAlgorithm Additional hex digest to include in hexDigests - * @param checksum Value of checksum to validate against - * @param checksumAlgorithm Algorithm of checksum submitted - * @param objSize Expected size of object to validate after storing - * @return ObjectMetadata object encapsulating file information - * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is - * invalid - * @throws IOException I/O Error when writing file, generating checksums - * and/or moving file - * @throws PidRefsFileExistsException If a pid refs file already exists, meaning the pid is - * already referencing a file. - * @throws RuntimeException Thrown when there is an issue with permissions, - * illegal arguments (ex. empty pid) or null pointers - * @throws InterruptedException When tagging pid and cid process is interrupted - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; + /** + * The {@code storeObject} method is responsible for the atomic storage of objects to disk using + * a given InputStream. Upon successful storage, the method returns an (@Code ObjectMetadata) + * object containing relevant file information, such as the file's id (which can be used by a + * system administrator -- but not by an API client -- to locate the object on disk), the file's + * size, and a hex digest dict of algorithms and checksums. Storing an object with + * {@code store_object} also tags an object (creating references) which allow the object to be + * discoverable. + * + * {@code storeObject} also ensures that an object is stored only once by synchronizing multiple + * calls and rejecting calls to store duplicate objects. Note, calling {@code storeObject} + * without a pid is a possibility, but should only store the object without tagging the object. + * It is then the caller's responsibility to finalize the process by calling {@code tagObject} + * after verifying the correct object is stored. + * + * The file's id is determined by calculating the object's content identifier based on the + * store's default algorithm, which is also used as the permanent address of the file. The + * file's identifier is then sharded using the store's configured depth and width, delimited by + * '/' and concatenated to produce the final permanent address and is stored in the + * {@code ./[storePath]/objects/} directory. + * + * By default, the hex digest map includes the following hash algorithms: MD5, SHA-1, SHA-256, + * SHA-384, SHA-512 - which are the most commonly used algorithms in dataset submissions to + * DataONE and the Arctic Data Center. If an additional algorithm is provided, the + * {@code storeObject} method checks if it is supported and adds it to the hex digests dict + * along with its corresponding hex digest. An algorithm is considered "supported" if it is + * recognized as a valid hash algorithm in {@code java.security .MessageDigest} class. + * + * Similarly, if a file size and/or checksum & checksumAlgorithm value are provided, + * {@code storeObject} validates the object to ensure it matches the given arguments before + * moving the file to its permanent address. + * + * @param object Input stream to file + * @param pid Authority-based identifier + * @param additionalAlgorithm Additional hex digest to include in hexDigests + * @param checksum Value of checksum to validate against + * @param checksumAlgorithm Algorithm of checksum submitted + * @param objSize Expected size of object to validate after storing + * @return ObjectMetadata object encapsulating file information + * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is invalid + * @throws IOException I/O Error when writing file, generating checksums and/or + * moving file + * @throws PidRefsFileExistsException If a pid refs file already exists, meaning the pid is + * already referencing a file. + * @throws RuntimeException Thrown when there is an issue with permissions, illegal + * arguments (ex. empty pid) or null pointers + * @throws InterruptedException When tagging pid and cid process is interrupted + */ + ObjectMetadata storeObject( + InputStream object, String pid, String additionalAlgorithm, String checksum, + String checksumAlgorithm, long objSize) + throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, + InterruptedException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - */ - public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidRefsFileExistsException, RuntimeException, InterruptedException; + /** + * @see #storeObject(InputStream, String, String, String, String, long) + * + * Store an object only without reference files. + */ + ObjectMetadata storeObject(InputStream object) + throws NoSuchAlgorithmException, IOException, RuntimeException, + InterruptedException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm, - long objSize - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; + /** + * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, + * deleting or calculating a hex digest of an object is based on a pid argument; and to proceed, + * we must be able to find the object associated with the pid. + * + * @param pid Authority-based identifier + * @param cid Content-identifier (hash identifier) + * @throws IOException Failure to create tmp file + * @throws PidRefsFileExistsException When pid refs file already exists + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address does not + * exist + * @throws FileNotFoundException If refs file is missing during verification + * @throws InterruptedException When tagObject is waiting to execute but is interrupted + */ + void tagObject(String pid, String cid) + throws IOException, PidRefsFileExistsException, NoSuchAlgorithmException, + FileNotFoundException, InterruptedException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; + /** + * Confirms that an ObjectMetadata's content is equal to the given values. This method throws an + * exception if there are any issues, and attempts to remove the data object if it is determined + * to be invalid. + * + * @param objectInfo ObjectMetadata object with values + * @param checksum Value of checksum to validate against + * @param checksumAlgorithm Algorithm of checksum submitted + * @param objSize Expected size of object to validate after storing + * @throws NonMatchingObjSizeException Given size =/= objMeta size value + * @throws NonMatchingChecksumException Given checksum =/= objMeta checksum value + * @throws UnsupportedHashAlgorithmException Given algo is not found or supported + * @throws NoSuchAlgorithmException When 'deleteInvalidObject' is true and an algo used + * to get a cid refs file is not supported + * @throws InterruptedException When 'deleteInvalidObject' is true and an issue + * with coordinating deleting objects occurs + * @throws IOException Issue with recalculating supported algo for + * checksum not found + */ + void deleteIfInvalidObject( + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) + throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, + IOException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - */ - public ObjectMetadata storeObject( - InputStream object, String pid, String additionalAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; + /** + * Adds/updates metadata (ex. {@code sysmeta}) to the HashStore by using a given InputStream, a + * persistent identifier ({@code pid}) and metadata format ({@code formatId}). All metadata + * documents for a given pid will be stored in the directory (under ../metadata) that is + * determined by calculating the hash of the given pid, with the document name being the hash of + * the metadata format ({@code formatId}). + * + * Note, multiple calls to store the same metadata content will all be accepted, but is not + * guaranteed to execute sequentially. + * + * @param metadata Input stream to metadata document + * @param pid Authority-based identifier + * @param formatId Metadata namespace/format + * @return Path to metadata content identifier (string representing metadata address) + * @throws IOException When there is an error writing the metadata document + * @throws IllegalArgumentException Invalid values like null for metadata, or empty pids and + * formatIds + * @throws FileNotFoundException When temp metadata file is not found + * @throws InterruptedException metadataLockedIds synchronization issue + * @throws NoSuchAlgorithmException Algorithm used to calculate permanent address is not + * supported + */ + String storeMetadata(InputStream metadata, String pid, String formatId) + throws IOException, IllegalArgumentException, FileNotFoundException, InterruptedException, + NoSuchAlgorithmException; - /** - * @see #storeObject(InputStream, String, String, String, String, long) - */ - public ObjectMetadata storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, - RuntimeException, InterruptedException; + /** + * @see #storeMetadata(InputStream, String, String) + * + * If the '(InputStream metadata, String pid)' signature is used, the metadata format stored + * will default to {@code sysmeta}. + */ + String storeMetadata(InputStream metadata, String pid) + throws IOException, IllegalArgumentException, InterruptedException, + NoSuchAlgorithmException; - /** - * Creates references that allow objects stored in HashStore to be discoverable. Retrieving, - * deleting or calculating a hex digest of an object is based on a pid argument; and to - * proceed, we must be able to find the object associated with the pid. - * - * @param pid Authority-based identifier - * @param cid Content-identifier (hash identifier) - * @throws IOException Failure to create tmp file - * @throws PidRefsFileExistsException When pid refs file already exists - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs address - * does not exist - * @throws FileNotFoundException If refs file is missing during verification - * @throws InterruptedException When tagObject is waiting to execute but is - * interrupted - */ - public void tagObject(String pid, String cid) throws IOException, - PidRefsFileExistsException, NoSuchAlgorithmException, FileNotFoundException, - InterruptedException; + /** + * Returns an InputStream to an object from HashStore using a given persistent identifier. + * + * @param pid Authority-based identifier + * @return Object InputStream + * @throws IllegalArgumentException When pid is null or empty + * @throws FileNotFoundException When requested pid has no associated object + * @throws IOException I/O error when creating InputStream to object + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + */ + InputStream retrieveObject(String pid) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; - /** - * Confirms that an ObjectMetadata's content is equal to the given values. If it is not - * equal, it will return False - otherwise True. - * - * @param objectInfo ObjectMetadata object with values - * @param checksum Value of checksum to validate against - * @param checksumAlgorithm Algorithm of checksum submitted - * @param objSize Expected size of object to validate after storing - * @throws IOException An issue with deleting the object when there is a - * mismatch - * @throws NoSuchAlgorithmException If checksum algorithm (and its respective checksum) is - * not in objectInfo - * @throws IllegalArgumentException An expected value does not match - */ - public boolean verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException; + /** + * Returns an InputStream to the metadata content of a given pid and metadata namespace from + * HashStore. + * + * @param pid Authority-based identifier + * @param formatId Metadata namespace/format + * @return Metadata InputStream + * @throws IllegalArgumentException When pid/formatId is null or empty + * @throws FileNotFoundException When requested pid+formatId has no associated object + * @throws IOException I/O error when creating InputStream to metadata + * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not + * supported + */ + InputStream retrieveMetadata(String pid, String formatId) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; - /** - * Checks whether an object referenced by a pid exists and returns the content identifier. - * - * @param pid Authority-based identifier - * @return Content identifier (cid) - * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs - * file's absolute address is not valid - * @throws IOException Unable to read from a pid refs file or pid refs - * file does not exist - * @throws OrphanPidRefsFileException When pid refs file exists and the cid found - * inside does not exist. - * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the - * expected pid is not found in the cid refs file. - */ - public String findObject(String pid) throws NoSuchAlgorithmException, IOException, - OrphanPidRefsFileException, PidNotFoundInCidRefsFileException; + /** + * @see #retrieveMetadata(String, String) + * + * If {@code retrieveMetadata} is called with signature (String pid), the metadata document + * retrieved will be the given pid's 'sysmeta' + */ + InputStream retrieveMetadata(String pid) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException; - /** - * Adds/updates metadata (ex. `sysmeta`) to the HashStore by using a given InputStream, a - * persistent identifier (`pid`) and metadata format (`formatId`). The permanent address of - * the stored metadata document is determined by calculating the SHA-256 hex digest of the - * provided `pid` + `formatId`. - * - * Note, multiple calls to store the same metadata content will all be accepted, but is not - * guaranteed to execute sequentially. - * - * @param metadata Input stream to metadata document - * @param pid Authority-based identifier - * @param formatId Metadata namespace/format - * @return Metadata content identifier (string representing metadata address) - * @throws IOException When there is an error writing the metadata document - * @throws IllegalArgumentException Invalid values like null for metadata, or empty pids and - * formatIds - * @throws FileNotFoundException When temp metadata file is not found - * @throws InterruptedException metadataLockedIds synchronization issue - * @throws NoSuchAlgorithmException Algorithm used to calculate permanent address is not - * supported - */ - public String storeMetadata(InputStream metadata, String pid, String formatId) - throws IOException, IllegalArgumentException, FileNotFoundException, - InterruptedException, NoSuchAlgorithmException; + /** + * Deletes an object and all relevant associated files (ex. system metadata, reference files, + * etc.) based on a given pid. If other pids still reference the pid's associated object, the + * object will not be deleted. + * + * @param pid Authority-based identifier + * @throws IllegalArgumentException When pid is null or empty + * @throws IOException I/O error when deleting empty directories, + * modifying/deleting reference files + * @throws NoSuchAlgorithmException When algorithm used to calculate an object or metadata's + * address is not supported + * @throws InterruptedException When deletion synchronization is interrupted + */ + void deleteObject(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException; - /** - * @see #storeMetadata(InputStream, String, String) - */ - public String storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, FileNotFoundException, InterruptedException, - NoSuchAlgorithmException; + /** + * Deletes a metadata document (ex. {@code sysmeta}) permanently from HashStore using a given + * persistent identifier and its respective metadata namespace. + * + * @param pid Authority-based identifier + * @param formatId Metadata namespace/format + * @throws IllegalArgumentException When pid or formatId is null or empty + * @throws IOException I/O error when deleting metadata or empty directories + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + * @throws InterruptedException Issue with synchronization on metadata doc + */ + void deleteMetadata(String pid, String formatId) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException; - /** - * Returns an InputStream to an object from HashStore using a given persistent identifier. - * - * @param pid Authority-based identifier - * @return Object InputStream - * @throws IllegalArgumentException When pid is null or empty - * @throws FileNotFoundException When requested pid has no associated object - * @throws IOException I/O error when creating InputStream to object - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - */ - public InputStream retrieveObject(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * Deletes all metadata related for the given 'pid' from HashStore + * + * @param pid Authority-based identifier + * @throws IllegalArgumentException If pid is invalid + * @throws IOException I/O error when deleting metadata or empty directories + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + * @throws InterruptedException Issue with synchronization on metadata doc + */ + void deleteMetadata(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException; - /** - * Returns an InputStream to the metadata content of a given pid and metadata namespace from - * HashStore. - * - * @param pid Authority-based identifier - * @param formatId Metadata namespace/format - * @return Metadata InputStream - * @throws IllegalArgumentException When pid/formatId is null or empty - * @throws FileNotFoundException When requested pid+formatId has no associated object - * @throws IOException I/O error when creating InputStream to metadata - * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not - * supported - */ - public InputStream retrieveMetadata(String pid, String formatId) - throws IllegalArgumentException, FileNotFoundException, IOException, - NoSuchAlgorithmException; - - /** - * @see #retrieveMetadata(String, String) - */ - public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; - - /** - * Deletes an object (and its empty subdirectories) permanently from HashStore using a given - * persistent identifier. - * - * @param pid Authority-based identifier - * @throws IllegalArgumentException When pid is null or empty - * @throws FileNotFoundException When requested pid has no associated object - * @throws IOException I/O error when deleting empty directories, - * modifying/deleting reference files - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - * @throws InterruptedException When deletion synchronization is interrupted - */ - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException; - - /** - * Delete an object based on its content identifier, with a flag to confirm intention. - * - * Note: This overload method should only be called when an issue arises during the storage - * of an object without a pid, and after verifying (via `verifyObject`) that the object is - * not what is expected. - * - * @param cid Content identifier - * @param deleteCid Boolean to confirm - */ - public void deleteObject(String cid, boolean deleteCid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; - - /** - * Deletes a metadata document (ex. `sysmeta`) permanently from HashStore using a given - * persistent identifier and its respective metadata namespace. - * - * @param pid Authority-based identifier - * @param formatId Metadata namespace/format - * @throws IllegalArgumentException When pid or formatId is null or empty - * @throws IOException I/O error when deleting empty directories - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - */ - public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; - - /** - * @see #deleteMetadata(String, String) - */ - public void deleteMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; - - /** - * Calculates the hex digest of an object that exists in HashStore using a given persistent - * identifier and hash algorithm. - * - * @param pid Authority-based identifier - * @param algorithm Algorithm of desired hex digest - * @return String hex digest of requested pid - * @throws IllegalArgumentException When pid or formatId is null or empty - * @throws FileNotFoundException When requested pid object does not exist - * @throws IOException I/O error when calculating hex digests - * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not - * supported - */ - public String getHexDigest(String pid, String algorithm) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * Calculates the hex digest of an object that exists in HashStore using a given persistent + * identifier and hash algorithm. + * + * @param pid Authority-based identifier + * @param algorithm Algorithm of desired hex digest + * @return String hex digest of requested pid + * @throws IllegalArgumentException When pid or formatId is null or empty + * @throws FileNotFoundException When requested pid object does not exist + * @throws IOException I/O error when calculating hex digests + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + */ + String getHexDigest(String pid, String algorithm) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException; } diff --git a/src/main/java/org/dataone/hashstore/HashStoreClient.java b/src/main/java/org/dataone/hashstore/HashStoreClient.java index ceb31efb..084ef873 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreClient.java +++ b/src/main/java/org/dataone/hashstore/HashStoreClient.java @@ -11,8 +11,8 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Properties; import java.sql.Connection; @@ -21,7 +21,6 @@ import java.sql.Statement; import org.dataone.hashstore.filehashstore.FileHashStoreUtility; - import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -35,8 +34,8 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; /** - * HashStoreClient is a command line interface that allows a developer to create a new HashStore or - * interact directly with an existing HashStore. See 'README.md' for usage examples. + * HashStoreClient is a development tool used to create a new HashStore or interact directly with an + * existing HashStore through the command line. See 'README.md' for usage examples. */ public class HashStoreClient { private static HashStore hashStore; @@ -44,7 +43,7 @@ public class HashStoreClient { /** * Entry point to the HashStore Client interface. - * + * * @param args Command line arguments * @throws Exception General exception class to catch all exceptions. See the HashStore * interface for details. @@ -84,15 +83,14 @@ public static void main(String[] args) throws Exception { String storeNameSpace = cmd.getOptionValue("nsp"); createNewHashStore( - storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace - ); + storePath, storeDepth, storeWidth, storeAlgorithm, storeNameSpace); } else { storePath = Paths.get(cmd.getOptionValue("store")); Path hashstoreYaml = storePath.resolve("hashstore.yaml"); if (!Files.exists(hashstoreYaml)) { - String errMsg = "HashStoreClient - Missing hashstore.yaml at storePath (" - + storePath - + "), please create a store with '-chs'. Use '-h' to see options."; + String errMsg = + "HashStoreClient - Missing hashstore.yaml at storePath (" + storePath + + "), please create a store with '-chs'. Use '-h' to see options."; throw new FileNotFoundException(errMsg); } initializeHashStore(storePath); @@ -100,13 +98,14 @@ public static void main(String[] args) throws Exception { // Parse remaining options if (cmd.hasOption("knbvm")) { System.out.println( - "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore.yaml." - ); + "HashStoreClient - Testing with KNBVM, checking pgdb.yaml & hashstore" + + ".yaml."); Path pgdbYaml = storePath.resolve("pgdb.yaml"); if (!Files.exists(pgdbYaml)) { - String errMsg = "HashStoreClient - Missing pgdb.yaml at storePath (" - + storePath + "), please manually create it with the following keys: " - + "db_user, db_password, db_host, db_port, db_name"; + String errMsg = + "HashStoreClient - Missing pgdb.yaml at storePath (" + storePath + + "), please manually create it with the following keys: " + + "db_user, db_password, db_host, db_port, db_name"; throw new FileNotFoundException(errMsg); } @@ -125,36 +124,27 @@ public static void main(String[] args) throws Exception { String originDirectory = cmd.getOptionValue("sdir"); String numObjects = cmd.getOptionValue("nobj"); String sizeOfFilesToSkip = cmd.getOptionValue("gbskip"); - FileHashStoreUtility.ensureNotNull(objType, "-stype", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull( - action, "-sts, -rav, -dfs", "HashStoreClient" - ); + FileHashStoreUtility.ensureNotNull(objType, "-stype"); + FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir"); + FileHashStoreUtility.ensureNotNull(action, "-sts, -rav, -dfs"); testWithKnbvm(action, objType, originDirectory, numObjects, sizeOfFilesToSkip); } else if (cmd.hasOption("getchecksum")) { String pid = cmd.getOptionValue("pid"); String algo = cmd.getOptionValue("algo"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(algo, "-algo", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(algo, "-algo"); String hexDigest = hashStore.getHexDigest(pid, algo); System.out.println(hexDigest); - } else if (cmd.hasOption("findobject")) { - String pid = cmd.getOptionValue("pid"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - - String cid = hashStore.findObject(pid); - System.out.println(cid); - } else if (cmd.hasOption("storeobject")) { System.out.println("Storing object"); String pid = cmd.getOptionValue("pid"); Path path = Paths.get(cmd.getOptionValue("path")); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(path, "-path", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(path, "-path"); String additional_algo = null; if (cmd.hasOption("algo")) { @@ -176,20 +166,20 @@ public static void main(String[] args) throws Exception { } InputStream pidObjStream = Files.newInputStream(path); - ObjectMetadata objInfo = hashStore.storeObject( - pidObjStream, pid, additional_algo, checksum, checksum_algo, size - ); + ObjectMetadata objInfo = + hashStore.storeObject(pidObjStream, pid, additional_algo, checksum, + checksum_algo, size); pidObjStream.close(); System.out.println("Object Info for pid (" + pid + "):"); - System.out.println(objInfo.getHexDigests()); + System.out.println(objInfo.hexDigests()); } else if (cmd.hasOption("storemetadata")) { String pid = cmd.getOptionValue("pid"); Path path = Paths.get(cmd.getOptionValue("path")); String formatId = cmd.getOptionValue("format_id"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(path, "-path", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(path, "-path"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); InputStream pidObjStream = Files.newInputStream(path); String metadataCid = hashStore.storeMetadata(pidObjStream, pid, formatId); @@ -199,7 +189,7 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("retrieveobject")) { String pid = cmd.getOptionValue("pid"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); InputStream objStream = hashStore.retrieveObject(pid); byte[] buffer = new byte[1000]; @@ -213,15 +203,14 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("retrievemetadata")) { String pid = cmd.getOptionValue("pid"); String formatId = cmd.getOptionValue("format_id"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); InputStream metadataStream = hashStore.retrieveMetadata(pid, formatId); byte[] buffer = new byte[1000]; int bytesRead = metadataStream.read(buffer, 0, buffer.length); - String metadataPreview = new String( - buffer, 0, bytesRead, StandardCharsets.UTF_8 - ); + String metadataPreview = + new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); metadataStream.close(); System.out.println(metadataPreview); String retrieveMetadataMsg = "...\n<-- Truncated for Display Purposes -->"; @@ -229,21 +218,20 @@ public static void main(String[] args) throws Exception { } else if (cmd.hasOption("deleteobject")) { String pid = cmd.getOptionValue("pid"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + hashStore.deleteObject(pid); System.out.println("Object for pid (" + pid + ") has been deleted."); } else if (cmd.hasOption("deletemetadata")) { String pid = cmd.getOptionValue("pid"); String formatId = cmd.getOptionValue("format_id"); - FileHashStoreUtility.ensureNotNull(pid, "-pid", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(formatId, "-formatId", "HashStoreClient"); + FileHashStoreUtility.ensureNotNull(pid, "-pid"); + FileHashStoreUtility.ensureNotNull(formatId, "-formatId"); hashStore.deleteMetadata(pid, formatId); - System.out.println( - "Metadata for pid (" + pid + ") and namespace (" + formatId - + ") has been deleted." - ); + System.out.println("Metadata for pid (" + pid + ") and namespace (" + formatId + + ") has been deleted."); } else { System.out.println("HashStoreClient - No options found, use -h for help."); } @@ -272,91 +260,64 @@ private static Options addHashStoreClientOptions() { options.addOption("dp", "storedepth", true, "Depth of HashStore to create."); options.addOption("wp", "storewidth", true, "Width of HashStore to create."); options.addOption( - "ap", "storealgo", true, "Algorithm used for calculating file addresses in a HashStore." - ); + "ap", "storealgo", true, + "Algorithm used for calculating file addresses in a HashStore."); options.addOption( - "nsp", "storenamespace", true, "Default metadata namespace in a HashStore." - ); + "nsp", "storenamespace", true, "Default metadata namespace in a HashStore."); // Public API options + options.addOption("getchecksum", "client_getchecksum", false, + "Flag to get the hex digest of a data object in a HashStore."); options.addOption( - "getchecksum", "client_getchecksum", false, - "Flag to get the hex digest of a data object in a HashStore." - ); + "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore."); options.addOption( - "findobject", "client_findobject", false, - "Flag to get the hex digest of a data object in a HashStore." - ); + "storemetadata", "client_storemetadata", false, + "Flag to store metadata to a HashStore"); + options.addOption("retrieveobject", "client_retrieveobject", false, + "Flag to retrieve objs from a HashStore."); + options.addOption("retrievemetadata", "client_retrievemetadata", false, + "Flag to retrieve metadata objs from a HashStore."); options.addOption( - "storeobject", "client_storeobject", false, "Flag to store objs to a HashStore." - ); - options.addOption( - "storemetadata", "client_storemetadata", false, "Flag to store metadata to a HashStore" - ); - options.addOption( - "retrieveobject", "client_retrieveobject", false, - "Flag to retrieve objs from a HashStore." - ); - options.addOption( - "retrievemetadata", "client_retrievemetadata", false, - "Flag to retrieve metadata objs from a HashStore." - ); - options.addOption( - "deleteobject", "client_deleteobject", false, "Flag to delete objs from a HashStore." - ); - options.addOption( - "deletemetadata", "client_deletemetadata", false, - "Flag to delete metadata objs from a HashStore." - ); + "deleteobject", "client_deleteobject", false, "Flag to delete objs from a HashStore."); + options.addOption("deletemetadata", "client_deletemetadata", false, + "Flag to delete metadata objs from a HashStore."); options.addOption("pid", "pidguid", true, "PID or GUID of object/metadata."); options.addOption("path", "filepath", true, "Path to object/metadata."); - options.addOption( - "algo", "objectalgo", true, - "Algorithm to use when calling '-getchecksum' or '-storeobject' flag." - ); + options.addOption("algo", "objectalgo", true, + "Algorithm to use when calling '-getchecksum' or '-storeobject' flag."); options.addOption("checksum", "obj_checksum", true, "Checksum of object to store."); options.addOption( - "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied." - ); + "checksum_algo", "obj_checksum_algo", true, "Algorithm of checksum supplied."); options.addOption("size", "obj_size", true, "Size of object to store/validate."); - options.addOption( - "format_id", "metadata_format", true, - "Format_id/namespace of metadata to store, retrieve or delete." - ); - // knbvm (test.arcticdata.io) options. Note: In order to test with knbvm, you must manually create + options.addOption("format_id", "metadata_format", true, + "Format_id/namespace of metadata to store, retrieve or delete."); + // knbvm (test.arcticdata.io) options. Note: In order to test with knbvm, you must + // manually create // a `pgdb.yaml` file with the respective JDBC values to access a Metacat db. options.addOption( - "knbvm", "knbvmtestadc", false, "(knbvm) Flag to specify testing with knbvm." - ); + "knbvm", "knbvmtestadc", false, "(knbvm) Flag to specify testing with knbvm."); + options.addOption("nobj", "numberofobj", true, + "(knbvm) Option to specify number of objects to retrieve from a Metacat" + + " db."); options.addOption( - "nobj", "numberofobj", true, - "(knbvm) Option to specify number of objects to retrieve from a Metacat db." - ); + "gbskip", "gbsizetoskip", true, + "(knbvm) Option to specify the size of objects to skip."); + options.addOption("sdir", "storedirectory", true, + "(knbvm) Option to specify the directory of objects to convert."); options.addOption( - "gbskip", "gbsizetoskip", true, "(knbvm) Option to specify the size of objects to skip." - ); + "stype", "storetype", true, "(knbvm) Option to specify 'objects' or 'metadata'"); options.addOption( - "sdir", "storedirectory", true, - "(knbvm) Option to specify the directory of objects to convert." - ); + "sts", "storetohs", false, "(knbvm) Test flag to store objs to a HashStore"); + options.addOption("rav", "retandval", false, + "(knbvm) Test flag to retrieve and validate objs from a HashStore."); options.addOption( - "stype", "storetype", true, "(knbvm) Option to specify 'objects' or 'metadata'" - ); - options.addOption( - "sts", "storetohs", false, "(knbvm) Test flag to store objs to a HashStore" - ); - options.addOption( - "rav", "retandval", false, - "(knbvm) Test flag to retrieve and validate objs from a HashStore." - ); - options.addOption( - "dfs", "delfromhs", false, "(knbvm) Test flag to delete objs from a HashStore" - ); + "dfs", "delfromhs", false, "(knbvm) Test flag to delete objs from a HashStore"); + options.addOption("hsr", "hsservicerequest", false, "Dev option to test threading."); return options; } /** * Create a new HashStore with the given properties. - * + * * @param storePath Path to HashStore. * @param storeDepth Depth of store. * @param storeWidth Width of store. @@ -366,13 +327,12 @@ private static Options addHashStoreClientOptions() { */ private static void createNewHashStore( String storePath, String storeDepth, String storeWidth, String storeAlgorithm, - String storeNameSpace - ) throws HashStoreFactoryException, IOException { - FileHashStoreUtility.ensureNotNull(storePath, "storePath", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeDepth, "storeDepth", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeWidth, "storeWidth", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeAlgorithm, "storeAlgorithm", "HashStoreClient"); - FileHashStoreUtility.ensureNotNull(storeNameSpace, "storeNameSpace", "HashStoreClient"); + String storeNameSpace) throws IOException { + FileHashStoreUtility.ensureNotNull(storePath, "storePath"); + FileHashStoreUtility.ensureNotNull(storeDepth, "storeDepth"); + FileHashStoreUtility.ensureNotNull(storeWidth, "storeWidth"); + FileHashStoreUtility.ensureNotNull(storeAlgorithm, "storeAlgorithm"); + FileHashStoreUtility.ensureNotNull(storeNameSpace, "storeNameSpace"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", storePath); @@ -405,8 +365,7 @@ private static HashMap loadHashStoreYaml(Path storePath) { hsProperties.put("storeWidth", hashStoreYamlProperties.get("store_width")); hsProperties.put("storeAlgorithm", hashStoreYamlProperties.get("store_algorithm")); hsProperties.put( - "storeMetadataNamespace", hashStoreYamlProperties.get("store_metadata_namespace") - ); + "storeMetadataNamespace", hashStoreYamlProperties.get("store_metadata_namespace")); } catch (IOException ioe) { ioe.printStackTrace(); @@ -418,14 +377,14 @@ private static HashMap loadHashStoreYaml(Path storePath) { /** * Initialize HashStore to use in client app. HashStore must already exist or an exception will * be thrown. - * + * * @param storePath Path to store. * @throws HashStoreFactoryException If unable to initialize HashStore. * @throws IOException If 'hashstore.yaml' cannot be loaded. * @throws FileNotFoundException When 'hashstore.yaml' is missing. */ - private static void initializeHashStore(Path storePath) throws HashStoreFactoryException, - IOException { + private static void initializeHashStore(Path storePath) + throws HashStoreFactoryException, IOException { // Load properties and get HashStore HashMap hsProperties = loadHashStoreYaml(storePath); Properties storeProperties = new Properties(); @@ -433,11 +392,9 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE storeProperties.setProperty("storeDepth", hsProperties.get("storeDepth").toString()); storeProperties.setProperty("storeWidth", hsProperties.get("storeWidth").toString()); storeProperties.setProperty( - "storeAlgorithm", hsProperties.get("storeAlgorithm").toString() - ); + "storeAlgorithm", hsProperties.get("storeAlgorithm").toString()); storeProperties.setProperty( - "storeMetadataNamespace", hsProperties.get("storeMetadataNamespace").toString() - ); + "storeMetadataNamespace", hsProperties.get("storeMetadataNamespace").toString()); // Get HashStore String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; @@ -449,19 +406,18 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE /** * Entry point for working with test data found in knbvm (test.arcticdata.io) - * + * * @param actionFlag String representing a knbvm test-related method to call. * @param objType "data" (objects) or "documents" (metadata). * @param originDir Directory path of given objType - * @param numObjects Number of rows to retrieve from metacat db, - * if null, will retrieve all rows. + * @param numObjects Number of rows to retrieve from metacat db, if null, will retrieve + * all rows. * @param sizeOfFilesToSkip Size of files in GB to skip * @throws IOException Related to accessing config files or objects */ private static void testWithKnbvm( String actionFlag, String objType, String originDir, String numObjects, - String sizeOfFilesToSkip - ) throws IOException { + String sizeOfFilesToSkip) throws IOException { // Load metacat db yaml // Note: In order to test with knbvm, you must manually create a `pgdb.yaml` file with the // respective JDBC values to access a Metacat db. @@ -502,7 +458,7 @@ private static void testWithKnbvm( // For each row, get guid, docid, rev, checksum and checksum_algorithm // and create a List to loop over - List> resultObjList = new ArrayList<>(); + Collection> resultObjList = new ArrayList<>(); while (resultSet.next()) { String guid = resultSet.getString("guid"); String docid = resultSet.getString("docid"); @@ -516,8 +472,8 @@ private static void testWithKnbvm( boolean skipFile = false; if (sizeOfFilesToSkip != null) { // Calculate the size of requested gb to skip in bytes - long gbFilesToSkip = Integer.parseInt(sizeOfFilesToSkip) * (1024L * 1024 - * 1024); + long gbFilesToSkip = + Integer.parseInt(sizeOfFilesToSkip) * (1024L * 1024 * 1024); if (setItemSize > gbFilesToSkip) { skipFile = true; } @@ -527,8 +483,7 @@ private static void testWithKnbvm( Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev); if (Files.exists(setItemFilePath)) { System.out.println( - "File exists (" + setItemFilePath + ")! Adding to resultObjList." - ); + "File exists (" + setItemFilePath + ")! Adding to resultObjList."); Map resultObj = new HashMap<>(); resultObj.put("pid", guid); resultObj.put("algorithm", formattedChecksumAlgo); @@ -572,11 +527,11 @@ private static void testWithKnbvm( /** * Store objects to a HashStore with a checksum and checksum algorithm - * + * * @param resultObjList List containing items with the following properties: 'pid', 'path', * 'algorithm', 'checksum' */ - private static void storeObjsWithChecksumFromDb(List> resultObjList) { + private static void storeObjsWithChecksumFromDb(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -587,14 +542,13 @@ private static void storeObjsWithChecksumFromDb(List> result // Store object System.out.println("Storing object for guid: " + guid); - hashStore.storeObject(objStream, guid, checksum, algorithm); + hashStore.storeObject(objStream, guid, null, checksum, algorithm, -1); } catch (PidRefsFileExistsException poee) { String errMsg = "Unexpected Error: " + poee.fillInStackTrace(); try { logExceptionToFile( - guid, errMsg, "java/store_obj_errors/PidRefsFileExistsException" - ); + guid, errMsg, "java/store_obj_errors/PidRefsFileExistsException"); } catch (Exception e) { e.printStackTrace(); } @@ -629,11 +583,11 @@ private static void storeObjsWithChecksumFromDb(List> result /** * Retrieve objects from a HashStore and validate its contents by comparing checksums. - * + * * @param resultObjList List containing items with the following properties: 'pid', 'algorithm', * 'checksum' */ - private static void retrieveAndValidateObjs(List> resultObjList) { + private static void retrieveAndValidateObjs(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -690,10 +644,10 @@ private static void retrieveAndValidateObjs(List> resultObjL /** * Deletes a list of objects from a HashStore - * + * * @param resultObjList List containing items with the following property: 'pid' */ - private static void deleteObjectsFromStore(List> resultObjList) { + private static void deleteObjectsFromStore(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -733,11 +687,11 @@ private static void deleteObjectsFromStore(List> resultObjLi /** * Store a list containing info about metadata to a HashStore - * + * * @param resultObjList List containing items that have the following properties: 'pid', 'path' * and 'namespace' */ - private static void storeMetadataFromDb(List> resultObjList) { + private static void storeMetadataFromDb(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -779,11 +733,11 @@ private static void storeMetadataFromDb(List> resultObjList) /** * Retrieve metadata from a HashStore and validate its contents by comparing checksums. - * + * * @param resultObjList List containing items with the following properties: 'pid', 'namespace', * 'algorithm', 'checksum' */ - private static void retrieveAndValidateMetadata(List> resultObjList) { + private static void retrieveAndValidateMetadata(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -798,9 +752,8 @@ private static void retrieveAndValidateMetadata(List> result // Get hex digest System.out.println("Calculating hex digest with algorithm: " + algorithm); - String streamDigest = FileHashStoreUtility.calculateHexDigest( - metadataStream, algorithm - ); + String streamDigest = + FileHashStoreUtility.calculateHexDigest(metadataStream, algorithm); metadataStream.close(); // If checksums don't match, write a .txt file @@ -809,8 +762,7 @@ private static void retrieveAndValidateMetadata(List> result + ". Checksums do not match, checksum from db: " + checksum + ". Calculated digest: " + streamDigest + ". Algorithm: " + algorithm; logExceptionToFile( - guid, errMsg, "java/retrieve_metadata_errors/checksum_mismatch" - ); + guid, errMsg, "java/retrieve_metadata_errors/checksum_mismatch"); } else { System.out.println("Checksums match!"); } @@ -845,10 +797,10 @@ private static void retrieveAndValidateMetadata(List> result /** * Deletes a list of metadata from a HashStore - * + * * @param resultObjList List containing items with the following property: 'pid' */ - private static void deleteMetadataFromStore(List> resultObjList) { + private static void deleteMetadataFromStore(Collection> resultObjList) { resultObjList.parallelStream().forEach(item -> { String guid = null; try { @@ -892,7 +844,7 @@ private static void deleteMetadataFromStore(List> resultObjL /** * Format an algorithm string value to be compatible with MessageDigest class - * + * * @param value Algorithm value to format * @return Formatted algorithm value */ @@ -913,7 +865,7 @@ private static String formatAlgo(String value) { /** * Log a plain text file with the guid/pid as the file name with a message. - * + * * @param guid Pid/guid for which an exception was encountered. * @param errMsg Message to write into text file. * @param directory Directory within HashStore to log error (txt) files. @@ -927,10 +879,8 @@ private static void logExceptionToFile(String guid, String errMsg, String direct Path objectErrorTxtFile = errorDirectory.resolve(guid + ".txt"); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(objectErrorTxtFile), StandardCharsets.UTF_8 - ) - )) { + new OutputStreamWriter(Files.newOutputStream(objectErrorTxtFile), + StandardCharsets.UTF_8))) { writer.write(errMsg); } catch (Exception e) { diff --git a/src/main/java/org/dataone/hashstore/HashStoreFactory.java b/src/main/java/org/dataone/hashstore/HashStoreFactory.java index 013a9d6f..d428cc80 100644 --- a/src/main/java/org/dataone/hashstore/HashStoreFactory.java +++ b/src/main/java/org/dataone/hashstore/HashStoreFactory.java @@ -18,12 +18,11 @@ public class HashStoreFactory { /** * Factory method to generate a HashStore - * + * * @param classPackage String of the package name, ex. * "org.dataone.hashstore.filehashstore.FileHashStore" * @param storeProperties Properties object with the following keys: storePath, storeDepth, * storeWidth, storeAlgorithm, storeMetadataNamespace - * * @return HashStore instance ready to store objects and metadata * @throws HashStoreFactoryException When HashStore failÏs to initialize due to permissions or * class-related issues @@ -52,34 +51,35 @@ public static HashStore getHashStore(String classPackage, Properties storeProper hashstore = (HashStore) constructor.newInstance(storeProperties); } catch (ClassNotFoundException cnfe) { - String errMsg = "HashStoreFactory - Unable to find 'FileHashStore' classPackage: " - + classPackage + " - " + cnfe.fillInStackTrace(); + String errMsg = + "HashStoreFactory - Unable to find 'FileHashStore' classPackage: " + classPackage + + " - " + cnfe.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (NoSuchMethodException nsme) { - String errMsg = "HashStoreFactory - Constructor not found for 'FileHashStore': " - + classPackage + " - " + nsme.fillInStackTrace(); + String errMsg = + "HashStoreFactory - Constructor not found for 'FileHashStore': " + classPackage + + " - " + nsme.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (IllegalAccessException iae) { String errMsg = "HashStoreFactory - Executing method does not have access to the definition of" - + " the specified class , field, method or constructor. " + iae - .fillInStackTrace(); + + " the specified class , field, method or constructor. " + iae.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (InstantiationException ie) { String errMsg = "HashStoreFactory - Error instantiating 'FileHashStore'" - + "(likely related to `.newInstance()`): " + ie.fillInStackTrace(); + + "(likely related to `.newInstance()`): " + ie.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); } catch (InvocationTargetException ite) { - String errMsg = "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite - .fillInStackTrace(); + String errMsg = + "HashStoreFactory - Error creating 'FileHashStore' instance: " + ite.getCause(); logHashStore.error(errMsg); throw new HashStoreFactoryException(errMsg); diff --git a/src/main/java/org/dataone/hashstore/ObjectMetadata.java b/src/main/java/org/dataone/hashstore/ObjectMetadata.java index 9347a7c7..8d6c360d 100644 --- a/src/main/java/org/dataone/hashstore/ObjectMetadata.java +++ b/src/main/java/org/dataone/hashstore/ObjectMetadata.java @@ -3,54 +3,10 @@ import java.util.Map; /** - * ObjectMetadata is a class that models a unique identifier for an object in the HashStore. It - * encapsulates information about a file's content identifier (cid), size, and associated hash - * digest values. By using ObjectMetadata objects, client code can easily obtain metadata of a store - * object in HashStore without needing to know the underlying file system details. + * ObjectMetadata is a record that that contains metadata about an object in the HashStore. It + * encapsulates information about a file's authority-based/persistent identifier (pid), content + * identifier (cid), size, and associated hash digest values. */ -public class ObjectMetadata { - private final String cid; - private final long size; - private final Map hexDigests; +public record ObjectMetadata(String pid, String cid, long size, Map hexDigests) { - /** - * Creates a new instance of ObjectMetadata with the given properties. - * - * @param cid Unique identifier for the file - * @param size Size of stored file - * @param hexDigests A map of hash algorithm names to their hex-encoded digest values for the - * file - */ - public ObjectMetadata(String cid, long size, Map hexDigests) { - this.cid = cid; - this.size = size; - this.hexDigests = hexDigests; - } - - /** - * Return the cid (content identifier) of the file - * - * @return cid - */ - public String getCid() { - return cid; - } - - /** - * Return the size of the file - * - * @return size - */ - public long getSize() { - return size; - } - - /** - * Return a map of hex digests (checksums) - * - * @return hexDigests - */ - public Map getHexDigests() { - return hexDigests; - } -} +} \ No newline at end of file diff --git a/src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java new file mode 100644 index 00000000..7eb5b299 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/CidNotFoundInPidRefsFileException.java @@ -0,0 +1,12 @@ +package org.dataone.hashstore.exceptions; + +/** + * Custom exception class for FileHashStore when the expected cid is not found in the pid refs file. + */ +public class CidNotFoundInPidRefsFileException extends IllegalArgumentException { + + public CidNotFoundInPidRefsFileException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java b/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java new file mode 100644 index 00000000..5b7a19b3 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/HashStoreRefsAlreadyExistException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +import java.nio.file.FileAlreadyExistsException; + +/** + * Custom exception thrown when called to tag a pid and cid, and reference files already exist + */ +public class HashStoreRefsAlreadyExistException extends FileAlreadyExistsException { + + public HashStoreRefsAlreadyExistException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java b/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java new file mode 100644 index 00000000..e100b702 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/HashStoreServiceException.java @@ -0,0 +1,10 @@ +package org.dataone.hashstore.exceptions; + +/** + * An exception that encapsulates errors from the HashStore Runnable Test Class + */ +public class HashStoreServiceException extends Exception { + public HashStoreServiceException(String message) { + super(message); + } +} \ No newline at end of file diff --git a/src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java b/src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java new file mode 100644 index 00000000..46cd1c00 --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/MissingHexDigestsException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +import java.util.NoSuchElementException; + +/** + * An exception thrown when hexDigests from a supplied ObjectMetadata object is empty. + */ +public class MissingHexDigestsException extends NoSuchElementException { + + public MissingHexDigestsException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java new file mode 100644 index 00000000..3704144e --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingChecksumException.java @@ -0,0 +1,13 @@ +package org.dataone.hashstore.exceptions; + +/** + * An exception thrown when a checksum does not match what is expected. + */ + +public class NonMatchingChecksumException extends IllegalArgumentException { + + public NonMatchingChecksumException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java new file mode 100644 index 00000000..c42cf99c --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/NonMatchingObjSizeException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +/** + * An exception thrown when a data object size does not match what is expected. + */ + +public class NonMatchingObjSizeException extends IllegalArgumentException { + + public NonMatchingObjSizeException(String message) { + super(message); + } + +} + diff --git a/src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java b/src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java new file mode 100644 index 00000000..e8d4cb6b --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/OrphanRefsFilesException.java @@ -0,0 +1,14 @@ +package org.dataone.hashstore.exceptions; + +import java.io.IOException; + +/** + * Custom exception class for FileHashStore when both a pid and cid reference file is found + * but object does not exist. + */ +public class OrphanRefsFilesException extends IOException { + public OrphanRefsFilesException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java b/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java index 2cd9d4b6..d635f304 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/PidNotFoundInCidRefsFileException.java @@ -5,7 +5,7 @@ /** * Custom exception class for FileHashStore when a pid is not found in a cid refs file. */ -public class PidNotFoundInCidRefsFileException extends IOException { +public class PidNotFoundInCidRefsFileException extends IllegalArgumentException { public PidNotFoundInCidRefsFileException(String message) { super(message); } diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java index 586d0f1f..57aae024 100644 --- a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java +++ b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileExistsException.java @@ -3,7 +3,8 @@ import java.io.IOException; /** - * Custom exception class for FileHashStore pidObjects + * Custom exception class thrown when a pid refs file already exists (a single pid can only ever + * reference one cid) */ public class PidRefsFileExistsException extends IOException { public PidRefsFileExistsException(String message) { diff --git a/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java new file mode 100644 index 00000000..0c116bfe --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/PidRefsFileNotFoundException.java @@ -0,0 +1,10 @@ +package org.dataone.hashstore.exceptions; + +import java.io.FileNotFoundException; + +public class PidRefsFileNotFoundException extends FileNotFoundException { + public PidRefsFileNotFoundException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java new file mode 100644 index 00000000..f196c77d --- /dev/null +++ b/src/main/java/org/dataone/hashstore/exceptions/UnsupportedHashAlgorithmException.java @@ -0,0 +1,13 @@ +package org.dataone.hashstore.exceptions; + +/** + * An exception thrown when a given algorithm is not supported by FileHashStore java + */ + +public class UnsupportedHashAlgorithmException extends IllegalArgumentException { + + public UnsupportedHashAlgorithmException(String message) { + super(message); + } + +} diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java index a3afe905..1b4d4715 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStore.java @@ -21,10 +21,10 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Properties; import com.fasterxml.jackson.databind.ObjectMapper; @@ -36,9 +36,17 @@ import org.apache.commons.logging.LogFactory; import org.dataone.hashstore.ObjectMetadata; import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.MissingHexDigestsException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.OrphanRefsFilesException; import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; /** * FileHashStore is a HashStore adapter class that manages the storage of objects and metadata to a @@ -48,9 +56,10 @@ public class FileHashStore implements HashStore { private static final Log logFileHashStore = LogFactory.getLog(FileHashStore.class); private static final int TIME_OUT_MILLISEC = 1000; - private static final ArrayList objectLockedIds = new ArrayList<>(100); - private static final ArrayList metadataLockedIds = new ArrayList<>(100); - private static final ArrayList referenceLockedCids = new ArrayList<>(100); + private static final Collection objectLockedCids = new ArrayList<>(100); + private static final Collection objectLockedPids = new ArrayList<>(100); + private static final Collection metadataLockedDocIds = new ArrayList<>(100); + private static final Collection referenceLockedPids = new ArrayList<>(100); private final Path STORE_ROOT; private final int DIRECTORY_DEPTH; private final int DIRECTORY_WIDTH; @@ -67,9 +76,13 @@ public class FileHashStore implements HashStore { public static final String HASHSTORE_YAML = "hashstore.yaml"; - public static final String[] SUPPORTED_HASH_ALGORITHMS = {"MD2", "MD5", "SHA-1", "SHA-256", - "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; + public static final String[] SUPPORTED_HASH_ALGORITHMS = + {"MD2", "MD5", "SHA-1", "SHA-256", "SHA-384", "SHA-512", "SHA-512/224", "SHA-512/256"}; + + /** + * The default hash algorithms included in the ObjectMetadata when storing objects. + */ enum DefaultHashAlgorithms { MD5("MD5"), SHA_1("SHA-1"), SHA_256("SHA-256"), SHA_384("SHA-384"), SHA_512("SHA-512"); @@ -84,16 +97,47 @@ public String getName() { } } + /** + * The two different type of HashStore identifiers + */ + public enum HashStoreIdTypes { + cid, pid + } + + /** + * The configuration properties for a HashStore + */ enum HashStoreProperties { storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace } /** - * Constructor to initialize HashStore, properties are required. + * When working with refs files, we either add or remove values + */ + enum HashStoreRefUpdateTypes { + add, remove + } + + /** + * Record object to encapsulate information when searching for an existing HashStore object + * based on a given persistent identifier {@code pid} * - * Note: HashStore is not responsible for ensuring that the given store path is accurate. It - * will only check for an existing configuration, directories or objects at the supplied store - * path before initializing. + * @param cid Content identifier of the data object of a given pid + * @param cidObjectPath Path to the data object + * @param cidRefsPath Path to the data object's reference file + * @param pidRefsPath Path to the pid's that references the data object + * @param sysmetaPath Path to the pid's system metadata if available + */ + record ObjectInfo(String cid, String cidObjectPath, String cidRefsPath, String pidRefsPath, + String sysmetaPath) { + } + + /** + * Constructor to initialize FileHashStore, properties are required. FileHashStore is not + * responsible for ensuring that the given store path is accurate. Upon initialization, if an + * existing config file (hashstore.yaml) is present, it will confirm that it is accurate against + * the supplied properties. If not, FileHashSTore will check for 'hashstore' specific + * directories at the supplied store path before initializing. * * @param hashstoreProperties Properties object with the following keys: storePath, storeDepth, * storeWidth, storeAlgorithm, storeMetadataNamespace @@ -101,35 +145,25 @@ enum HashStoreProperties { * @throws IOException Issue with creating directories * @throws NoSuchAlgorithmException Unsupported store algorithm */ - public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException { - logFileHashStore.info("FileHashStore - Call received to instantiate FileHashStore"); - FileHashStoreUtility.ensureNotNull( - hashstoreProperties, "hashstoreProperties", "FileHashStore - constructor" - ); + public FileHashStore(Properties hashstoreProperties) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException { + logFileHashStore.info("Initializing FileHashStore"); + FileHashStoreUtility.ensureNotNull(hashstoreProperties, "hashstoreProperties"); // Get properties - // Note - Paths.get() throws NullPointerException if arg is null - Path storePath = Paths.get( - hashstoreProperties.getProperty(HashStoreProperties.storePath.name()) - ); + Path storePath = + Paths.get(hashstoreProperties.getProperty(HashStoreProperties.storePath.name())); int storeDepth = Integer.parseInt( - hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name()) - ); + hashstoreProperties.getProperty(HashStoreProperties.storeDepth.name())); int storeWidth = Integer.parseInt( - hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name()) - ); - String storeAlgorithm = hashstoreProperties.getProperty( - HashStoreProperties.storeAlgorithm.name() - ); - String storeMetadataNamespace = hashstoreProperties.getProperty( - HashStoreProperties.storeMetadataNamespace.name() - ); - - // Check given properties and/with existing HashStore + hashstoreProperties.getProperty(HashStoreProperties.storeWidth.name())); + String storeAlgorithm = + hashstoreProperties.getProperty(HashStoreProperties.storeAlgorithm.name()); + String storeMetadataNamespace = + hashstoreProperties.getProperty(HashStoreProperties.storeMetadataNamespace.name()); + verifyHashStoreProperties( - storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace - ); + storePath, storeDepth, storeWidth, storeAlgorithm, storeMetadataNamespace); // HashStore configuration has been reviewed, proceed with initialization STORE_ROOT = storePath; @@ -137,20 +171,16 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep DIRECTORY_WIDTH = storeWidth; OBJECT_STORE_ALGORITHM = storeAlgorithm; DEFAULT_METADATA_NAMESPACE = storeMetadataNamespace; - // Resolve object/metadata/refs directories OBJECT_STORE_DIRECTORY = storePath.resolve("objects"); METADATA_STORE_DIRECTORY = storePath.resolve("metadata"); REFS_STORE_DIRECTORY = storePath.resolve("refs"); - // Resolve tmp object/metadata directory paths, this is where objects are - // created before they are moved to their permanent address OBJECT_TMP_FILE_DIRECTORY = OBJECT_STORE_DIRECTORY.resolve("tmp"); METADATA_TMP_FILE_DIRECTORY = METADATA_STORE_DIRECTORY.resolve("tmp"); REFS_TMP_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("tmp"); - REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("pid"); - REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("cid"); + REFS_PID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("pids"); + REFS_CID_FILE_DIRECTORY = REFS_STORE_DIRECTORY.resolve("cids"); try { - // Physically create object & metadata store and tmp directories Files.createDirectories(OBJECT_STORE_DIRECTORY); Files.createDirectories(METADATA_STORE_DIRECTORY); Files.createDirectories(REFS_STORE_DIRECTORY); @@ -159,37 +189,29 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep Files.createDirectories(REFS_TMP_FILE_DIRECTORY); Files.createDirectories(REFS_PID_FILE_DIRECTORY); Files.createDirectories(REFS_CID_FILE_DIRECTORY); - logFileHashStore.debug("FileHashStore - Created store and store tmp directories."); + logFileHashStore.debug("Created store and store tmp directories."); } catch (IOException ioe) { - logFileHashStore.fatal( - "FileHashStore - Failed to initialize FileHashStore - unable to create" - + " directories. Exception: " + ioe.getMessage() - ); + logFileHashStore.fatal("Failed to initialize FileHashStore - unable to create" + + " directories. Exception: " + ioe.getMessage()); throw ioe; } logFileHashStore.debug( - "FileHashStore - HashStore initialized. Store Depth: " + DIRECTORY_DEPTH - + ". Store Width: " + DIRECTORY_WIDTH + ". Store Algorithm: " - + OBJECT_STORE_ALGORITHM + ". Store Metadata Namespace: " - + DEFAULT_METADATA_NAMESPACE - ); + "HashStore initialized. Store Depth: " + DIRECTORY_DEPTH + ". Store Width: " + + DIRECTORY_WIDTH + ". Store Algorithm: " + OBJECT_STORE_ALGORITHM + + ". Store Metadata Namespace: " + DEFAULT_METADATA_NAMESPACE); // Write configuration file 'hashstore.yaml' to store HashStore properties Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); if (!Files.exists(hashstoreYaml)) { - String hashstoreYamlContent = buildHashStoreYamlString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, DEFAULT_METADATA_NAMESPACE - ); + String hashstoreYamlContent = + buildHashStoreYamlString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, OBJECT_STORE_ALGORITHM, + DEFAULT_METADATA_NAMESPACE); writeHashStoreYaml(hashstoreYamlContent); - logFileHashStore.info( - "FileHashStore - 'hashstore.yaml' written to storePath: " + hashstoreYaml - ); + logFileHashStore.info("hashstore.yaml written to storePath: " + hashstoreYaml); } else { logFileHashStore.info( - "FileHashStore - 'hashstore.yaml' exists and has been verified." - + " Initializing FileHashStore." - ); + "hashstore.yaml exists and has been verified." + " Initializing FileHashStore."); } } @@ -197,95 +219,89 @@ public FileHashStore(Properties hashstoreProperties) throws IllegalArgumentExcep /** * Determines whether FileHashStore can instantiate by validating a set of arguments and - * throwing exceptions. HashStore will not instantiate if an existing configuration file's - * properties (`hashstore.yaml`) are different from what is supplied - or if an object store - * exists at the given path, but it is missing the `hashstore.yaml` config file. - * - * If `hashstore.yaml` exists, it will retrieve its properties and compare them with the given - * values; and if there is a mismatch, an exception will be thrown. If not, it will look to see - * if any directories/files exist in the given store path and throw an exception if any file or - * directory is found. + * throwing exceptions. If HashStore configuration file ({@code hashstore.yaml}) exists, it will + * retrieve its properties and compare them with the given values; and if there is a mismatch, + * an exception will be thrown. If not, it will look to see if any relevant HashStore + * directories exist (i.e. '/objects', '/metadata', '/refs') in the given store path and throw + * an exception if any of those directories exist. * * @param storePath Path where HashStore will store objects * @param storeDepth Depth of directories * @param storeWidth Width of directories * @param storeAlgorithm Algorithm to use when calculating object addresses - * @param storeMetadataNamespace Default metadata namespace (`formatId`) + * @param storeMetadataNamespace Default metadata namespace ({@code formatId}) * @throws NoSuchAlgorithmException If algorithm supplied is not supported - * @throws IOException If `hashstore.yaml` config file cannot be retrieved/opened + * @throws IOException If {@code hashstore.yaml} config file cannot be + * retrieved/opened + * @throws IllegalArgumentException If depth or width is less than 0 + * @throws IllegalStateException If dirs/objects exist, but HashStore config is missing */ protected void verifyHashStoreProperties( Path storePath, int storeDepth, int storeWidth, String storeAlgorithm, - String storeMetadataNamespace - ) throws NoSuchAlgorithmException, IOException { + String storeMetadataNamespace) + throws NoSuchAlgorithmException, IOException, IllegalArgumentException, + IllegalStateException { if (storeDepth <= 0 || storeWidth <= 0) { - String errMsg = "FileHashStore - Depth and width must be greater than 0." + " Depth: " - + storeDepth + ". Width: " + storeWidth; + String errMsg = + "Depth and width must be > than 0. Depth: " + storeDepth + ". Width: " + storeWidth; logFileHashStore.fatal(errMsg); throw new IllegalArgumentException(errMsg); } - // Ensure algorithm supplied is not empty, not null and supported validateAlgorithm(storeAlgorithm); - // Review metadata format (formatId) - FileHashStoreUtility.ensureNotNull( - storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" - ); - FileHashStoreUtility.checkForEmptyString( - storeMetadataNamespace, "storeMetadataNamespace", "FileHashStore - constructor" - ); + FileHashStoreUtility.ensureNotNull(storeMetadataNamespace, "storeMetadataNamespace"); + FileHashStoreUtility.checkForNotEmptyAndValidString( + storeMetadataNamespace, "storeMetadataNamespace"); // Check to see if configuration exists before initializing Path hashstoreYamlPredictedPath = Paths.get(storePath + "/hashstore.yaml"); if (Files.exists(hashstoreYamlPredictedPath)) { - logFileHashStore.debug("FileHashStore - 'hashstore.yaml' found, verifying properties."); + logFileHashStore.debug("hashstore.yaml found, checking properties."); HashMap hsProperties = loadHashStoreYaml(storePath); int existingStoreDepth = (int) hsProperties.get(HashStoreProperties.storeDepth.name()); int existingStoreWidth = (int) hsProperties.get(HashStoreProperties.storeWidth.name()); - String existingStoreAlgorithm = (String) hsProperties.get( - HashStoreProperties.storeAlgorithm.name() - ); - String existingStoreMetadataNs = (String) hsProperties.get( - HashStoreProperties.storeMetadataNamespace.name() - ); - - // Verify properties when 'hashstore.yaml' found - checkConfigurationEquality("store depth", storeDepth, existingStoreDepth); - checkConfigurationEquality("store width", storeWidth, existingStoreWidth); - checkConfigurationEquality("store algorithm", storeAlgorithm, existingStoreAlgorithm); - checkConfigurationEquality( - "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs - ); + String existingStoreAlgorithm = + (String) hsProperties.get(HashStoreProperties.storeAlgorithm.name()); + String existingStoreMetadataNs = + (String) hsProperties.get(HashStoreProperties.storeMetadataNamespace.name()); + + FileHashStoreUtility.checkObjectEquality("store depth", storeDepth, existingStoreDepth); + FileHashStoreUtility.checkObjectEquality("store width", storeWidth, existingStoreWidth); + FileHashStoreUtility.checkObjectEquality("store algorithm", storeAlgorithm, + existingStoreAlgorithm); + FileHashStoreUtility.checkObjectEquality( + "store metadata namespace", storeMetadataNamespace, existingStoreMetadataNs); + logFileHashStore.info("hashstore.yaml found and HashStore verified"); } else { - // Check if HashStore exists at the given store path (and is missing config) - logFileHashStore.debug( - "FileHashStore - 'hashstore.yaml' not found, check store path for" - + " objects and directories." - ); - + // Check if HashStore related folders exist at the given store path + logFileHashStore.debug("hashstore.yaml not found, checking store path for" + + " `/objects`, `/metadata` and `/refs` directories."); if (Files.isDirectory(storePath)) { - if (!FileHashStoreUtility.isDirectoryEmpty(storePath)) { - String errMsg = "FileHashStore - Missing 'hashstore.yaml' but directories" - + " and/or objects found."; - logFileHashStore.fatal(errMsg); - throw new IllegalStateException(errMsg); - + Path[] conflictingDirectories = + {storePath.resolve("objects"), storePath.resolve("metadata"), + storePath.resolve("refs")}; + for (Path dir : conflictingDirectories) { + if (Files.exists(dir) && Files.isDirectory(dir)) { + String errMsg = "FileHashStore - Unable to initialize HashStore." + + "`hashstore.yaml` is not found but potential conflicting" + + " directory exists: " + dir + ". Please choose a new folder or" + + " delete the conflicting directory and try again."; + logFileHashStore.fatal(errMsg); + throw new IllegalStateException(errMsg); + } } } - logFileHashStore.debug( - "FileHashStore - 'hashstore.yaml' not found and store path" - + " not yet initialized." - ); + logFileHashStore.debug("hashstore.yaml not found. Supplied properties accepted."); } } /** - * Get the properties of HashStore from 'hashstore.yaml' + * Get the properties of HashStore from an existing 'hashstore.yaml' * * @param storePath Path to root of store * @return HashMap of the properties - * @throws IOException If `hashstore.yaml` doesn't exist + * @throws IOException If {@code hashstore.yaml} doesn't exist */ protected HashMap loadHashStoreYaml(Path storePath) throws IOException { Path hashStoreYamlPath = storePath.resolve(HASHSTORE_YAML); @@ -296,27 +312,19 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc try { HashMap hashStoreYamlProperties = om.readValue(hashStoreYamlFile, HashMap.class); hsProperties.put( - HashStoreProperties.storeDepth.name(), hashStoreYamlProperties.get("store_depth") - ); + HashStoreProperties.storeDepth.name(), hashStoreYamlProperties.get("store_depth")); hsProperties.put( - HashStoreProperties.storeWidth.name(), hashStoreYamlProperties.get("store_width") - ); + HashStoreProperties.storeWidth.name(), hashStoreYamlProperties.get("store_width")); hsProperties.put( - HashStoreProperties.storeAlgorithm.name(), hashStoreYamlProperties.get( - "store_algorithm" - ) - ); + HashStoreProperties.storeAlgorithm.name(), + hashStoreYamlProperties.get("store_algorithm")); hsProperties.put( - HashStoreProperties.storeMetadataNamespace.name(), hashStoreYamlProperties.get( - "store_metadata_namespace" - ) - ); + HashStoreProperties.storeMetadataNamespace.name(), + hashStoreYamlProperties.get("store_metadata_namespace")); } catch (IOException ioe) { logFileHashStore.fatal( - "FileHashStore.getHashStoreYaml() - Unable to retrieve 'hashstore.yaml'." - + " IOException: " + ioe.getMessage() - ); + " Unable to retrieve 'hashstore.yaml'. IOException: " + ioe.getMessage()); throw ioe; } @@ -327,46 +335,22 @@ protected HashMap loadHashStoreYaml(Path storePath) throws IOExc * Write a 'hashstore.yaml' file to STORE_ROOT * * @param yamlString Content of the HashStore configuration - * @throws IOException If unable to write `hashstore.yaml` + * @throws IOException If unable to write {@code hashstore.yaml} */ protected void writeHashStoreYaml(String yamlString) throws IOException { Path hashstoreYaml = STORE_ROOT.resolve(HASHSTORE_YAML); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8) - )) { + new OutputStreamWriter(Files.newOutputStream(hashstoreYaml), StandardCharsets.UTF_8))) { writer.write(yamlString); } catch (IOException ioe) { logFileHashStore.fatal( - "FileHashStore.writeHashStoreYaml() - Unable to write 'hashstore.yaml'." - + " IOException: " + ioe.getMessage() - ); + "Unable to write 'hashstore.yaml'. IOException: " + ioe.getMessage()); throw ioe; } } - /** - * Checks the equality of a supplied value with an existing value for a specific configuration - * property. - * - * @param propertyName The name of the config property being checked - * @param suppliedValue The value supplied for the config property - * @param existingValue The existing value of the config property - * @throws IllegalArgumentException If the supplied value is not equal to the existing value - */ - protected void checkConfigurationEquality( - String propertyName, Object suppliedValue, Object existingValue - ) { - if (!Objects.equals(suppliedValue, existingValue)) { - String errMsg = "FileHashStore.checkConfigurationEquality() - Supplied " + propertyName - + ": " + suppliedValue + " does not match the existing configuration value: " - + existingValue; - logFileHashStore.fatal(errMsg); - throw new IllegalArgumentException(errMsg); - } - } - /** * Build the string content of the configuration file for HashStore - 'hashstore.yaml' * @@ -378,39 +362,40 @@ protected void checkConfigurationEquality( * @return String that representing the contents of 'hashstore.yaml' */ protected String buildHashStoreYamlString( - int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace - ) { - - return String.format( - "# Default configuration variables for HashStore\n\n" - + "############### Directory Structure ###############\n" - + "# Desired amount of directories when sharding an object to " - + "form the permanent address\n" - + "store_depth: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP " + "NEW HASHSTORE\n" - + "# Width of directories created when sharding an object to " - + "form the permanent address\n" - + "store_width: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP " + "NEW HASHSTORE\n" - + "# Example:\n" + "# Below, objects are shown listed in directories that are # " - + "levels deep (DIR_DEPTH=3),\n" - + "# with each directory consisting of 2 characters " + "(DIR_WIDTH=2).\n" - + "# /var/filehashstore/objects\n" + "# ├── 7f\n" + "# │ └── 5c\n" - + "# │ └── c1\n" + "# │ └── " - + "8f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6\n\n" - + "############### Format of the Metadata ###############\n" - + "store_sysmeta_namespace: \"http://ns.dataone" + ".org/service/types/v2.0\"\n\n" - + "############### Hash Algorithms ###############\n" - + "# Hash algorithm to use when calculating object's hex digest " - + "for the permanent address\n" + "store_algorithm: \"%s\"\n" - + "############### Hash Algorithms ###############\n" - + "# Hash algorithm to use when calculating object's hex digest " - + "for the permanent address\n" + "store_metadata_namespace: \"%s\"\n" - + "# The default algorithm list includes the hash algorithms " - + "calculated when storing an\n" - + "# object to disk and returned to the caller after successful " + "storage.\n" - + "store_default_algo_list:\n" + "- \"MD5\"\n" + "- \"SHA-1\"\n" + "- \"SHA-256\"\n" - + "- \"SHA-384\"\n" + "- \"SHA-512\"\n", storeDepth, storeWidth, storeAlgorithm, - storeMetadataNamespace - ); + int storeDepth, int storeWidth, String storeAlgorithm, String storeMetadataNamespace) { + return String.format(""" + # Default configuration variables for HashStore + + ############### Directory Structure ############### + # Desired amount of directories when sharding an object to form the permanent address + store_depth: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP NEW HASHSTORE + # Width of directories created when sharding an object to form the permanent address + store_width: %d # WARNING: DO NOT CHANGE UNLESS SETTING UP NEW HASHSTORE + # Example: + # Below, objects are shown listed in directories that are # levels deep (DIR_DEPTH=3), + # with each directory consisting of 2 characters (DIR_WIDTH=2). + # /var/filehashstore/objects + # ├── 7f + # │ └── 5c + # │ └── c1 + # │ └── 8f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6 + + ############### Format of the Metadata ############### + store_metadata_namespace: "%s" + ############### Hash Algorithms ############### + # Hash algorithm to use when calculating object's hex digest for the permanent address + store_algorithm: "%s" + ############### Hash Algorithms ############### + # Hash algorithm to use when calculating object's hex digest for the permanent address + # The default algorithm list includes the hash algorithms calculated when storing an + # object to disk and returned to the caller after successful storage. + store_default_algo_list: + - "MD5" + - "SHA-1" + - "SHA-256" + - "SHA-384" + - "SHA-512" + """, storeDepth, storeWidth, storeMetadataNamespace, storeAlgorithm); } // HashStore Public API Methods @@ -418,37 +403,33 @@ protected String buildHashStoreYamlString( @Override public ObjectMetadata storeObject( InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException, - PidRefsFileExistsException { - logFileHashStore.debug( - "FileHashStore.storeObject - Called to store object for pid: " + pid - ); - - // Begin input validation - FileHashStoreUtility.ensureNotNull(object, "object", "storeObject"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeObject"); + String checksumAlgorithm, long objSize) + throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException { + logFileHashStore.debug("Storing data object for pid: " + pid); + // Validate input parameters + FileHashStoreUtility.ensureNotNull(object, "object"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Validate algorithms if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - additionalAlgorithm, "additionalAlgorithm", "storeObject" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString( + additionalAlgorithm, "additionalAlgorithm"); validateAlgorithm(additionalAlgorithm); } if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "checksumAlgorithm", "storeObject" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm"); validateAlgorithm(checksumAlgorithm); } if (objSize != -1) { - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); + FileHashStoreUtility.checkPositive(objSize); } - return syncPutObject( - object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize - ); + try (object) { + return syncPutObject( + object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize); + } + // Close stream } /** @@ -456,80 +437,67 @@ public ObjectMetadata storeObject( */ private ObjectMetadata syncPutObject( InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, PidRefsFileExistsException, IOException, RuntimeException, - InterruptedException { - // Lock pid for thread safety, transaction control and atomic writing - // A pid can only be stored once and only once, subsequent calls will - // be accepted but will be rejected if pid hash object exists - synchronized (objectLockedIds) { - if (objectLockedIds.contains(pid)) { - String errMsg = - "FileHashStore.syncPutObject - Duplicate object request encountered for pid: " - + pid + ". Already in progress."; - logFileHashStore.warn(errMsg); - throw new RuntimeException(errMsg); + String checksumAlgorithm, long objSize) + throws NoSuchAlgorithmException, IOException, RuntimeException, InterruptedException { + try { + // Lock pid for thread safety, transaction control and atomic writing + // An object is stored once and only once + synchronized (objectLockedPids) { + if (objectLockedPids.contains(pid)) { + String errMsg = "Duplicate object request encountered for pid: " + pid + + ". Already in progress."; + logFileHashStore.warn(errMsg); + throw new RuntimeException(errMsg); + } + logFileHashStore.debug("Synchronizing objectLockedPids for pid: " + pid); + objectLockedPids.add(pid); } - logFileHashStore.debug( - "FileHashStore.storeObject - Synchronizing objectLockedIds for pid: " + pid - ); - objectLockedIds.add(pid); - } - try { logFileHashStore.debug( - "FileHashStore.syncPutObject - called .putObject() to store pid: " + pid - + ". additionalAlgorithm: " + additionalAlgorithm + ". checksum: " + checksum - + ". checksumAlgorithm: " + checksumAlgorithm - ); + "putObject() called to store pid: " + pid + ". additionalAlgorithm: " + + additionalAlgorithm + ". checksum: " + checksum + ". checksumAlgorithm: " + + checksumAlgorithm); // Store object - ObjectMetadata objInfo = putObject( - object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize - ); + ObjectMetadata objInfo = + putObject(object, pid, additionalAlgorithm, checksum, checksumAlgorithm, objSize); // Tag object - String cid = objInfo.getCid(); + String cid = objInfo.cid(); tagObject(pid, cid); logFileHashStore.info( - "FileHashStore.syncPutObject - Object stored for pid: " + pid - + ". Permanent address: " + getRealPath(pid, "object", null) - ); + "Object stored for pid: " + pid + " at " + getHashStoreDataObjectPath(pid)); return objInfo; } catch (NoSuchAlgorithmException nsae) { - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". NoSuchAlgorithmException: " + nsae.getMessage(); + String errMsg = + "Unable to store object for pid: " + pid + ". NoSuchAlgorithmException: " + + nsae.getMessage(); logFileHashStore.error(errMsg); throw nsae; } catch (PidRefsFileExistsException prfee) { - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". PidRefsFileExistsException: " + prfee.getMessage(); + String errMsg = + "Unable to store object for pid: " + pid + ". PidRefsFileExistsException: " + + prfee.getMessage(); logFileHashStore.error(errMsg); throw prfee; } catch (IOException ioe) { // Covers AtomicMoveNotSupportedException, FileNotFoundException - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". IOException: " + ioe.getMessage(); + String errMsg = + "Unable to store object for pid: " + pid + ". IOException: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; } catch (RuntimeException re) { // Covers SecurityException, IllegalArgumentException, NullPointerException - String errMsg = "FileHashStore.syncPutObject - Unable to store object for pid: " + pid - + ". Runtime Exception: " + re.getMessage(); + String errMsg = "Unable to store object for pid: " + pid + ". Runtime Exception: " + + re.getMessage(); logFileHashStore.error(errMsg); throw re; } finally { // Release lock - synchronized (objectLockedIds) { - logFileHashStore.debug( - "FileHashStore.syncPutObject - Releasing objectLockedIds for pid: " + pid - ); - objectLockedIds.remove(pid); - objectLockedIds.notifyAll(); - } + releaseObjectLockedPids(pid); } } @@ -537,279 +505,84 @@ private ObjectMetadata syncPutObject( * Overload method for storeObject with just an InputStream */ @Override - public ObjectMetadata storeObject(InputStream object) throws NoSuchAlgorithmException, - IOException, PidRefsFileExistsException, RuntimeException { + public ObjectMetadata storeObject(InputStream object) + throws NoSuchAlgorithmException, IOException, RuntimeException, + InterruptedException { // 'putObject' is called directly to bypass the pid synchronization implemented to - // efficiently handle duplicate object store requests. Since there is no pid, calling - // 'storeObject' would unintentionally create a bottleneck for all requests without a - // pid (they would be executed sequentially). This scenario occurs when metadata about - // the object (ex. form data including the pid, checksum, checksum algorithm, etc.) is - // unavailable. + // efficiently handle object store requests without a pid. This scenario occurs when + // metadata about the object (ex. form data including the pid, checksum, checksum + // algorithm, etc.) is unavailable. // // Note: This method does not tag the object to make it discoverable, so the client can - // call 'verifyObject' (optional) to check that the object is valid, and 'tagObject' - // (required) to create the reference files needed to associate the respective pids/cids. - return putObject(object, "HashStoreNoPid", null, null, null, -1); - } - - - /** - * Overload method for storeObject with size and a checksum & checksumAlgorithm. - */ - @Override - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm, long objSize - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); - - return storeObject(object, pid, null, checksum, checksumAlgorithm, objSize); - } - - /** - * Overload method for storeObject with just a checksum and checksumAlgorithm - */ - @Override - public ObjectMetadata storeObject( - InputStream object, String pid, String checksum, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "storeObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "storeObject"); - - return storeObject(object, pid, null, checksum, checksumAlgorithm, -1); + // call 'deleteIfInvalidObject' (optional) to check that the object is valid, and then + // 'tagObject' (required) to create the reference files needed to associate the + // respective pids/cids. + try (object) { + return putObject(object, "HashStoreNoPid", null, null, null, -1); + } + // Close stream } - /** - * Overload method for storeObject with just the size of object to validate - */ - @Override - public ObjectMetadata storeObject(InputStream object, String pid, long objSize) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, - InterruptedException { - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "storeObject"); - - return storeObject(object, pid, null, null, null, objSize); - } - /** - * Overload method for storeObject with an additionalAlgorithm - */ @Override - public ObjectMetadata storeObject(InputStream object, String pid, String additionalAlgorithm) - throws NoSuchAlgorithmException, IOException, PidRefsFileExistsException, RuntimeException, + public void tagObject(String pid, String cid) + throws IOException, NoSuchAlgorithmException, InterruptedException { - FileHashStoreUtility.ensureNotNull( - additionalAlgorithm, "additionalAlgorithm", "storeObject" - ); - - return storeObject(object, pid, additionalAlgorithm, null, null, -1); - } - - @Override - public boolean verifyObject( - ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize - ) throws IOException, NoSuchAlgorithmException, IllegalArgumentException { - logFileHashStore.debug( - "FileHashStore.verifyObject - Called to verify object with id: " + objectInfo.getCid() - ); - FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo", "verifyObject"); - FileHashStoreUtility.ensureNotNull(checksum, "checksum", "verifyObject"); - FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm", "verifyObject"); - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "verifyObject"); - - Map hexDigests = objectInfo.getHexDigests(); - String digestFromHexDigests = hexDigests.get(checksumAlgorithm); - long objInfoRetrievedSize = objectInfo.getSize(); - String objCid = objectInfo.getCid(); - - if (objInfoRetrievedSize != objSize) { - logFileHashStore.info( - "FileHashStore.verifyObject - Object size invalid for cid: " + objCid - + ". Expected size: " + objSize + ". Actual size: " + objInfoRetrievedSize - ); - return false; - - } else if (!digestFromHexDigests.equals(checksum)) { - logFileHashStore.info( - "FileHashStore.verifyObject - Object content invalid for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" - ); - return false; - - } else { - logFileHashStore.info( - "FileHashStore.verifyObject - Object has been validated for cid: " + objCid - + ". Expected checksum: " + checksum + ". Actual checksum calculated: " - + digestFromHexDigests + " (algorithm: " + checksumAlgorithm + ")" - ); - return true; - } - } - - @Override - public void tagObject(String pid, String cid) throws IOException, PidRefsFileExistsException, - NoSuchAlgorithmException, FileNotFoundException, InterruptedException { - logFileHashStore.debug( - "FileHashStore.tagObject - Called to tag cid (" + cid + ") with pid: " + pid - ); + logFileHashStore.debug("Tagging cid (" + cid + ") with pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "tagObject"); - FileHashStoreUtility.ensureNotNull(cid, "cid", "tagObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "tagObject"); - FileHashStoreUtility.checkForEmptyString(cid, "cid", "tagObject"); - - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { - try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.tagObject - referenceLockedCids lock was interrupted while" - + " waiting to tag pid: " + pid + " and cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.tagObject - Synchronizing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.add(cid); - } + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.ensureNotNull(cid, "cid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); try { - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - - // Check that pid refs file doesn't exist yet - if (Files.exists(absPidRefsPath)) { - String errMsg = "FileHashStore.tagObject - pid refs file already exists for pid: " - + pid + ". A pid can only reference one cid."; - logFileHashStore.error(errMsg); - throw new PidRefsFileExistsException(errMsg); - - } else if (Files.exists(absCidRefsPath)) { - // Only update cid refs file if pid is not in the file - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); - if (!pidFoundInCidRefFiles) { - updateCidRefsFiles(pid, absCidRefsPath); - } - // Get the pid refs file - File pidRefsTmpFile = writePidRefsFile(cid); - File absPathPidRefsFile = absPidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - // Verify tagging process, this throws exceptions if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - - logFileHashStore.info( - "FileHashStore.tagObject - Object with cid: " + cid - + " has been updated and tagged successfully with pid: " + pid - ); - - } else { - // Get pid and cid refs files - File pidRefsTmpFile = writePidRefsFile(cid); - File cidRefsTmpFile = writeCidRefsFile(pid); - // Move refs files to permanent location - File absPathPidRefsFile = absPidRefsPath.toFile(); - File absPathCidRefsFile = absCidRefsPath.toFile(); - move(pidRefsTmpFile, absPathPidRefsFile, "refs"); - move(cidRefsTmpFile, absPathCidRefsFile, "refs"); - // Verify tagging process, this throws exceptions if there's an issue - verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); - - logFileHashStore.info( - "FileHashStore.tagObject - Object with cid: " + cid - + " has been tagged successfully with pid: " + pid - ); - } - - } finally { - // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.tagObject - Releasing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll(); - } - } - } - - @Override - public String findObject(String pid) throws NoSuchAlgorithmException, IOException { - logFileHashStore.debug("FileHashStore.findObject - Called to find object for pid: " + pid); - FileHashStoreUtility.ensureNotNull(pid, "pid", "findObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "findObject"); - - // Get path of the pid references file - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - - if (Files.exists(absPidRefsPath)) { - String cid = new String(Files.readAllBytes(absPidRefsPath)); - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); + // This method synchronizes the pid and cid + storeHashStoreRefsFiles(pid, cid); - // Throw exception if the cid refs file doesn't exist - if (!Files.exists(absCidRefsPath)) { - String errMsg = - "FileHashStore.deleteObject - Cid refs file does not exist for cid: " + cid - + " with address: " + absCidRefsPath + ", but pid refs file exists."; - logFileHashStore.error(errMsg); - throw new OrphanPidRefsFileException(errMsg); - } - // If the pid is found in the expected cid refs file, return it - if (isPidInCidRefsFile(pid, absCidRefsPath)) { - logFileHashStore.info( - "FileHashStore.findObject - Cid (" + cid + ") found for pid:" + pid - ); - return cid; + } catch (HashStoreRefsAlreadyExistException hsrfae) { + // cid and pid has been released + // This exception is thrown when the pid and cid are already tagged appropriately + String errMsg = + "HashStore refs files already exist for pid " + pid + " and cid: " + cid; + throw new HashStoreRefsAlreadyExistException(errMsg); - } else { - String errMsg = "FileHashStore.deleteObject - Pid refs file exists, but pid (" + pid - + ") not found in cid refs file for cid: " + cid + " with address: " - + absCidRefsPath; - logFileHashStore.error(errMsg); - throw new PidNotFoundInCidRefsFileException(errMsg); - } + } catch (PidRefsFileExistsException prfe) { + // cid and pid has been released + String errMsg = "pid: " + pid + " already references another cid." + + " A pid can only reference one cid."; + throw new PidRefsFileExistsException(errMsg); - } else { - String errMsg = "FileHashStore.findObject - Unable to find cid for pid: " + pid - + ". Pid refs file does not exist at: " + absPidRefsPath; - logFileHashStore.error(errMsg); - // Create custom exception class - throw new FileNotFoundException(errMsg); + } catch (Exception e) { + // cid and pid has been released + // Revert the process for all other exceptions + unTagObject(pid, cid); + throw e; } } @Override public String storeMetadata(InputStream metadata, String pid, String formatId) - throws IOException, FileNotFoundException, IllegalArgumentException, InterruptedException, + throws IOException, IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.storeMetadata - Called to store metadata for pid: " + pid - + ", with formatId: " + formatId - ); + logFileHashStore.debug("Storing metadata for pid: " + pid + ", with formatId: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(metadata, "metadata", "storeMetadata"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "storeMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "storeMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - // Determine metadata namespace // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "storeMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); checkedFormatId = formatId; } - return syncPutMetadata(metadata, pid, checkedFormatId); + try (metadata) { + return syncPutMetadata(metadata, pid, checkedFormatId); + } + // Close stream } /** @@ -817,68 +590,35 @@ public String storeMetadata(InputStream metadata, String pid, String formatId) */ private String syncPutMetadata(InputStream metadata, String pid, String checkedFormatId) throws InterruptedException, IOException, NoSuchAlgorithmException { - // Lock pid for thread safety, transaction control and atomic writing - // Metadata storage requests for the same pid must be written serially - // However, the same pid could be used with different formatIds, so - // synchronize ids with pid + formatId; + // Get the metadata document id, which is the synchronization value String pidFormatId = pid + checkedFormatId; - synchronized (metadataLockedIds) { - while (metadataLockedIds.contains(pidFormatId)) { - try { - metadataLockedIds.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.storeMetadata - Metadata lock was interrupted while" - + " storing metadata for: " + pid + " and formatId: " + checkedFormatId - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); - } - } - logFileHashStore.debug( - "FileHashStore.storeMetadata - Synchronizing metadataLockedIds for pid: " + pid - ); - metadataLockedIds.add(pidFormatId); - } - + String metadataDocId = + FileHashStoreUtility.getPidHexDigest(pidFormatId, OBJECT_STORE_ALGORITHM); + logFileHashStore.debug( + "putMetadata() called to store metadata for pid: " + pid + ", with formatId: " + + checkedFormatId + " for metadata document: " + metadataDocId); try { - logFileHashStore.debug( - "FileHashStore.storeMetadata - .putMetadata() request for pid: " + pid - + ". formatId: " + checkedFormatId - ); + synchronizeMetadataLockedDocIds(metadataDocId); // Store metadata - String metadataCid = putMetadata(metadata, pid, checkedFormatId); + String pathToStoredMetadata = putMetadata(metadata, pid, checkedFormatId); logFileHashStore.info( - "FileHashStore.storeMetadata - Metadata stored for pid: " + pid - + ". Metadata Content Identifier (metadataCid): " + metadataCid - ); - return metadataCid; + "Metadata stored for pid: " + pid + " at: " + pathToStoredMetadata); + return pathToStoredMetadata; } catch (IOException ioe) { - // Covers FileNotFoundException - String errMsg = "FileHashStore.storeMetadata - Unable to store metadata, IOException" - + " encountered: " + ioe.getMessage(); + String errMsg = + "Unable to store metadata, IOException encountered: " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; } catch (NoSuchAlgorithmException nsae) { - String errMsg = - "FileHashStore.storeMetadata - Unable to store metadata, algorithm to calculate" - + " permanent address is not supported: " + nsae.getMessage(); + String errMsg = "Unable to store metadata, algorithm to calculate" + + " permanent address is not supported: " + nsae.getMessage(); logFileHashStore.error(errMsg); throw nsae; } finally { - // Release lock - synchronized (metadataLockedIds) { - logFileHashStore.debug( - "FileHashStore.storeMetadata - Releasing metadataLockedIds for pid: " + pid - + " and formatId " + checkedFormatId - ); - metadataLockedIds.remove(pidFormatId); - metadataLockedIds.notifyAll(); - } + releaseMetadataLockedDocIds(metadataDocId); } } @@ -886,44 +626,40 @@ private String syncPutMetadata(InputStream metadata, String pid, String checkedF * Overload method for storeMetadata with default metadata namespace */ @Override - public String storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, InterruptedException, NoSuchAlgorithmException { + public String storeMetadata(InputStream metadata, String pid) + throws IOException, IllegalArgumentException, InterruptedException, + NoSuchAlgorithmException { return storeMetadata(metadata, pid, DEFAULT_METADATA_NAMESPACE); } @Override - public InputStream retrieveObject(String pid) throws IllegalArgumentException, - NoSuchAlgorithmException, FileNotFoundException, IOException { - logFileHashStore.debug( - "FileHashStore.retrieveObject - Called to retrieve object for pid: " + pid - ); + public InputStream retrieveObject(String pid) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { + logFileHashStore.debug("Retrieving InputStream to data object for pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveObject"); - - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); // Check to see if object exists + Path objRealPath = getHashStoreDataObjectPath(pid); if (!Files.exists(objRealPath)) { - String errMsg = "FileHashStore.retrieveObject - File does not exist for pid: " + pid - + " with object address: " + objRealPath; + String errMsg = + "File does not exist for pid: " + pid + " with object address: " + objRealPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } - // If so, return an input stream for the object + // Return an InputStream to read from the data object try { InputStream objectCidInputStream = Files.newInputStream(objRealPath); - logFileHashStore.info( - "FileHashStore.retrieveObject - Retrieved object for pid: " + pid - ); + logFileHashStore.info("Retrieved object for pid: " + pid); return objectCidInputStream; } catch (IOException ioe) { String errMsg = - "FileHashStore.retrieveObject - Unexpected error when creating InputStream" - + " for pid: " + pid + ", IOException: " + ioe.getMessage(); + "Unexpected error when creating InputStream for pid: " + pid + ", IOException: " + + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } @@ -932,325 +668,459 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, @Override public InputStream retrieveMetadata(String pid, String formatId) - throws IllegalArgumentException, FileNotFoundException, IOException, + throws IllegalArgumentException, IOException, NoSuchAlgorithmException { logFileHashStore.debug( - "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid - + " with formatId: " + formatId - ); + "Retrieving metadata document for pid: " + pid + " with formatId: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.ensureNotNull(formatId, "formatId", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "retrieveMetadata"); - - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", formatId); - - // Check to see if metadata exists - if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " - + pid + " with formatId: " + formatId + ". Metadata address: " + metadataCidPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } - - // If so, return an input stream for the metadata - try { - InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); - logFileHashStore.info( - "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid - + " with formatId: " + formatId - ); - return metadataCidInputStream; + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.ensureNotNull(formatId, "formatId"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); - } catch (IOException ioe) { - String errMsg = - "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" - + " for pid: " + pid + " with formatId: " + formatId + ". IOException: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } + return getHashStoreMetadataInputStream(pid, formatId); } /** * Overload method for retrieveMetadata with default metadata namespace */ @Override - public InputStream retrieveMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { + public InputStream retrieveMetadata(String pid) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { logFileHashStore.debug( - "FileHashStore.retrieveMetadata - Called to retrieve metadata for pid: " + pid - + " with default metadata namespace: " + DEFAULT_METADATA_NAMESPACE - ); + "Retrieving metadata for pid: " + pid + " with default metadata namespace: "); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "retrieveMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "retrieveMetadata"); - - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", DEFAULT_METADATA_NAMESPACE); - - // Check to see if metadata exists - if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.retrieveMetadata - Metadata does not exist for pid: " - + pid + " with formatId: " + DEFAULT_METADATA_NAMESPACE + ". Metadata address: " - + metadataCidPath; - logFileHashStore.warn(errMsg); - throw new FileNotFoundException(errMsg); - } - - // If so, return an input stream for the metadata - InputStream metadataCidInputStream; - try { - metadataCidInputStream = Files.newInputStream(metadataCidPath); - logFileHashStore.info( - "FileHashStore.retrieveMetadata - Retrieved metadata for pid: " + pid - + " with formatId: " + DEFAULT_METADATA_NAMESPACE - ); - } catch (IOException ioe) { - String errMsg = - "FileHashStore.retrieveMetadata - Unexpected error when creating InputStream" - + " for pid: " + pid + " with formatId: " + DEFAULT_METADATA_NAMESPACE - + ". IOException: " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - - return metadataCidInputStream; - } - - @Override - public void deleteObject(String cid, boolean deleteCid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete object with content identifeir: " + cid - ); - if (deleteCid) { - // Validate input parameters - FileHashStoreUtility.ensureNotNull(cid, "cid", "deleteObject"); - FileHashStoreUtility.checkForEmptyString(cid, "cid", "deleteObject"); - - // Confirm that the object called to delete does not have a cid reference file - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - if (Files.exists(absCidRefsPath)) { - // The cid is referenced by pids, do not delete. - return; - - } else { - // Get permanent address of the actual cid - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid - ); - Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - // If file exists, delete it. - if (Files.exists(expectedRealPath)) { - Files.delete(expectedRealPath); - } - } - } + return getHashStoreMetadataInputStream(pid, DEFAULT_METADATA_NAMESPACE); } @Override - public void deleteObject(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException, InterruptedException, - PidNotFoundInCidRefsFileException { - logFileHashStore.debug( - "FileHashStore.deleteObject - Called to delete object for pid: " + pid - ); + public void deleteObject(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException { + logFileHashStore.debug("Deleting object for pid: " + pid); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteObject"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteObject"); + FileHashStoreUtility.ensureNotNull(pid, "id"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "id"); + Collection deleteList = new ArrayList<>(); - // First, find the object and evaluate its state - String cid; try { - cid = findObject(pid); + // Storing, deleting and untagging objects are synchronized together + // Duplicate store object requests for a pid are rejected, but deleting an object + // will wait for a pid to be released if it's found to be in use before proceeding. + synchronizeObjectLockedPids(pid); - } catch (OrphanPidRefsFileException oprfe) { - // Delete the pid refs file and return, nothing else to delete. - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - Files.delete(absPidRefsPath); + // Before we begin deletion process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. + try { + ObjectInfo objInfoMap = findObject(pid); + String cid = objInfoMap.cid(); - String warnMsg = "FileHashStore.deleteObject - Cid refs file does not exist for pid: " - + pid + ". Deleted orphan pid refs file."; - logFileHashStore.warn(warnMsg); - return; - - } catch (PidNotFoundInCidRefsFileException pnficrfe) { - // Delete pid refs file and return, nothing else to delete - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - Files.delete(absPidRefsPath); - - String warnMsg = - "FileHashStore.deleteObject - Pid not found in expected cid refs file for pid: " - + pid + ". Deleted orphan pid refs file."; - logFileHashStore.warn(warnMsg); - return; - } + // If no exceptions are thrown, we proceed to synchronization based on the `cid` + synchronizeObjectLockedCids(cid); - // If cid has been retrieved without any errors, proceed with second stage of deletion. - synchronized (referenceLockedCids) { - while (referenceLockedCids.contains(cid)) { try { - referenceLockedCids.wait(TIME_OUT_MILLISEC); - - } catch (InterruptedException ie) { - String errMsg = - "FileHashStore.deleteObject - referenceLockedCids lock was interrupted while" - + " waiting to delete object with cid: " + cid - + ". InterruptedException: " + ie.getMessage(); - logFileHashStore.error(errMsg); - throw new InterruptedException(errMsg); + // Proceed with comprehensive deletion - cid exists, nothing out of place + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + + // Begin deletion process + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + Path objRealPath = getHashStoreDataObjectPath(pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(objRealPath)); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } else { + String warnMsg = "cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object deletion."; + logFileHashStore.warn(warnMsg); + } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + deleteMetadata(pid); + logFileHashStore.info("Data file and references deleted for: " + pid); + + } finally { + // Release lock + releaseObjectLockedCids(cid); } - } - logFileHashStore.debug( - "FileHashStore.deleteObject - Synchronizing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.add(cid); - } - - try { - // Get permanent address of the pid by calculating its sha-256 hex digest - Path objRealPath = getRealPath(pid, "object", null); - // Get the path to the cid refs file to work with - Path absCidRefsPath = getRealPath(cid, "refs", "cid"); - if (!Files.exists(objRealPath)) { - // Throw exception if object doesn't exist - String errMsg = "FileHashStore.deleteObject - File does not exist for pid: " + pid - + " with object address: " + objRealPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); + } catch (OrphanPidRefsFileException oprfe) { + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file and related metadata documents + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + deleteMetadata(pid); + String warnMsg = "Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file and metadata."; + logFileHashStore.warn(warnMsg); + + } catch (OrphanRefsFilesException orfe) { + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); - } else { - // Proceed to delete the reference files and object - // Delete pid reference file - deletePidRefsFile(pid); - // Remove pid from cid refs file - deleteCidRefsPid(pid, absCidRefsPath); - // Delete obj and cid refs file only if the cid refs file is empty - if (Files.size(absCidRefsPath) == 0) { - // Delete empty cid refs file - Files.delete(absCidRefsPath); - // Delete actual object - Files.delete(objRealPath); - } else { - String warnMsg = "FileHashStore.deleteObject - cid referenced by pid: " + pid - + " is not empty (references exist for the cid). Skipping object deletion."; + try { + // Since we must access the cid reference file, the `cid` must be synchronized + synchronizeObjectLockedCids(cidRead); + + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + deleteMetadata(pid); + String warnMsg = "Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files and metadata."; logFileHashStore.warn(warnMsg); + + } finally { + // Release lock + releaseObjectLockedCids(cidRead); } - logFileHashStore.info( - "FileHashStore.deleteObject - File and references deleted for: " + pid - + " with object address: " + objRealPath - ); - // TODO: Discuss where deleteObject should also remove all default system metadata + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + FileHashStoreUtility.deleteListItems(deleteList); + deleteMetadata(pid); + String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + + ". Deleted orphan pid refs file and metadata."; + logFileHashStore.warn(warnMsg); } } finally { // Release lock - synchronized (referenceLockedCids) { - logFileHashStore.debug( - "FileHashStore.deleteObject - Releasing referenceLockedCids for cid: " + cid - ); - referenceLockedCids.remove(cid); - referenceLockedCids.notifyAll(); - } + releaseObjectLockedPids(pid); } - } + @Override - public void deleteMetadata(String pid, String formatId) throws IllegalArgumentException, - IOException, NoSuchAlgorithmException { - logFileHashStore.debug( - "FileHashStore.deleteMetadata - Called to delete metadata for pid: " + pid - ); + public void deleteIfInvalidObject( + ObjectMetadata objectInfo, String checksum, String checksumAlgorithm, long objSize) + throws NonMatchingObjSizeException, NonMatchingChecksumException, + UnsupportedHashAlgorithmException, InterruptedException, NoSuchAlgorithmException, + IOException { + logFileHashStore.debug("Verifying data object for cid: " + objectInfo.cid()); // Validate input parameters - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deleteMetadata"); - FileHashStoreUtility.ensureNotNull(formatId, "formatId", "deleteMetadata"); - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "deleteMetadata"); - - // Get permanent address of the pid by calculating its sha-256 hex digest - Path metadataCidPath = getRealPath(pid, "metadata", formatId); + FileHashStoreUtility.ensureNotNull(objectInfo, "objectInfo"); + FileHashStoreUtility.ensureNotNull(objectInfo.hexDigests(), "objectInfo.getHexDigests()"); + if (objectInfo.hexDigests().isEmpty()) { + throw new MissingHexDigestsException("Missing hexDigests in supplied ObjectMetadata"); + } + FileHashStoreUtility.ensureNotNull(checksum, "checksum"); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm"); + FileHashStoreUtility.checkPositive(objSize); + + String objCid = objectInfo.cid(); + long objInfoRetrievedSize = objectInfo.size(); + Map hexDigests = objectInfo.hexDigests(); + String digestFromHexDigests = hexDigests.get(checksumAlgorithm); - if (!Files.exists(metadataCidPath)) { - String errMsg = "FileHashStore.deleteMetadata - File does not exist for pid: " + pid - + " with metadata address: " + metadataCidPath; - logFileHashStore.warn(errMsg); - return; - - } else { - // Proceed to delete - Files.delete(metadataCidPath); - logFileHashStore.info( - "FileHashStore.deleteMetadata - File deleted for: " + pid - + " with metadata address: " + metadataCidPath - ); + // Confirm that requested checksum to verify against is available + if (digestFromHexDigests == null) { + try { + validateAlgorithm(checksumAlgorithm); + // If no exceptions thrown, calculate the checksum with the given algo + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objCid); + Path pathToCidObject = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + try (InputStream inputStream = Files.newInputStream(pathToCidObject)) { + digestFromHexDigests = + FileHashStoreUtility.calculateHexDigest(inputStream, checksumAlgorithm); + } catch (IOException ioe) { + String errMsg = + "Unexpected error when calculating a checksum for cid: " + objCid + + " with algorithm (" + checksumAlgorithm + + ") that is not part of the default list. " + ioe.getMessage(); + throw new IOException(errMsg); + } + } catch (NoSuchAlgorithmException nsae) { + String errMsg = "checksumAlgorithm given: " + checksumAlgorithm + + " is not supported. Supported algorithms: " + Arrays.toString( + SUPPORTED_HASH_ALGORITHMS); + logFileHashStore.error(errMsg); + throw new UnsupportedHashAlgorithmException(errMsg); + } } + // Validate checksum + if (!digestFromHexDigests.equals(checksum)) { + deleteObjectByCid(objCid); + String errMsg = + "Object content invalid for cid: " + objCid + ". Expected checksum: " + checksum + + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + + checksumAlgorithm + ")"; + logFileHashStore.error(errMsg); + throw new NonMatchingChecksumException(errMsg); + } + // Validate size + if (objInfoRetrievedSize != objSize) { + deleteObjectByCid(objCid); + String errMsg = "Object size invalid for cid: " + objCid + ". Expected size: " + objSize + + ". Actual size: " + objInfoRetrievedSize; + logFileHashStore.error(errMsg); + throw new NonMatchingObjSizeException(errMsg); + } + + String infoMsg = + "Object has been validated for cid: " + objCid + ". Expected checksum: " + checksum + + ". Actual checksum calculated: " + digestFromHexDigests + " (algorithm: " + + checksumAlgorithm + ")"; + logFileHashStore.info(infoMsg); + } + + @Override + public void deleteMetadata(String pid, String formatId) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException { + logFileHashStore.debug( + "Deleting metadata document for pid: " + pid + " with formatId: " + formatId); + // Validate input parameters + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.ensureNotNull(formatId, "formatId"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); + + // Get the path to the metadata document and add it to a list + Path metadataDocPath = getHashStoreMetadataPath(pid, formatId); + Collection metadataDocPaths = new ArrayList<>(); + metadataDocPaths.add(metadataDocPath); + + if (!metadataDocPaths.isEmpty()) { + Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); + // Delete all items in the list + FileHashStoreUtility.deleteListItems(deleteList); + } + logFileHashStore.info( + "Metadata document deleted for: " + pid + " with metadata address: " + metadataDocPath); } /** * Overload method for deleteMetadata with default metadata namespace */ @Override - public void deleteMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, - IOException, NoSuchAlgorithmException { - deleteMetadata(pid, DEFAULT_METADATA_NAMESPACE); + public void deleteMetadata(String pid) + throws IllegalArgumentException, IOException, NoSuchAlgorithmException, + InterruptedException { + logFileHashStore.debug("Deleting all metadata documents for pid: " + pid); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + + // Get the path to the pid metadata document directory + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, OBJECT_STORE_ALGORITHM); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + pidHexDigest); + Path expectedPidMetadataDirectory = METADATA_STORE_DIRECTORY.resolve(pidRelativePath); + // Add all metadata docs found in the metadata doc directory to a list to iterate over + List metadataDocPaths = + FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); + + if (!metadataDocPaths.isEmpty()) { + Collection deleteList = syncRenameMetadataDocForDeletion(metadataDocPaths); + // Delete all items in the list + FileHashStoreUtility.deleteListItems(deleteList); + } + logFileHashStore.info("All metadata documents deleted for: " + pid); } - @Override - public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, - FileNotFoundException, IOException { - logFileHashStore.debug( - "FileHashStore.getHexDigest - Called to calculate hex digest for pid: " + pid - ); + /** + * Synchronize renaming metadata documents for deletion + * + * @param metadataDocPaths List of metadata document paths + * @throws IOException If there is an issue renaming paths + * @throws InterruptedException If there is an issue with synchronization metadata calls + */ + protected Collection syncRenameMetadataDocForDeletion( + Collection metadataDocPaths) throws IOException, InterruptedException { + FileHashStoreUtility.ensureNotNull(metadataDocPaths, "metadataDocPaths"); + if (metadataDocPaths.isEmpty()) { + String errMsg = "metadataDocPaths supplied cannot be empty."; + logFileHashStore.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + // Rename paths and add to a List + Collection metadataDocsToDelete = new ArrayList<>(); + try { + for (Path metadataDocToDelete : metadataDocPaths) { + String metadataDocId = metadataDocToDelete.getFileName().toString(); + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToDelete)) { + metadataDocsToDelete.add( + FileHashStoreUtility.renamePathForDeletion(metadataDocToDelete)); + } + } finally { + releaseMetadataLockedDocIds(metadataDocId); + } + } + } catch (Exception ge) { + // If there is any exception, attempt to revert the process and throw an exception + if (!metadataDocsToDelete.isEmpty()) { + for (Path metadataDocToPlaceBack : metadataDocsToDelete) { + Path fileNameWithDeleted = metadataDocToPlaceBack.getFileName(); + String metadataDocId = fileNameWithDeleted.toString().replace("_delete", ""); + try { + synchronizeMetadataLockedDocIds(metadataDocId); + if (Files.exists(metadataDocToPlaceBack)) { + FileHashStoreUtility.renamePathForRestoration(metadataDocToPlaceBack); + } + } finally { + releaseMetadataLockedDocIds(metadataDocId); + } + } + } + String errMsg = "An unexpected exception has occurred when deleting metadata " + + "documents. Attempts to restore all affected metadata documents have " + + "been made. Additional details: " + ge.getMessage(); + logFileHashStore.error(errMsg); + throw ge; + } + + return metadataDocsToDelete; + } - FileHashStoreUtility.ensureNotNull(pid, "pid", "getHexDigest"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "getHexDigest"); + @Override + public String getHexDigest(String pid, String algorithm) + throws IllegalArgumentException, IOException, + NoSuchAlgorithmException { + logFileHashStore.debug("Calculating hex digest for pid: " + pid); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); validateAlgorithm(algorithm); // Find the content identifier if (algorithm.equals(OBJECT_STORE_ALGORITHM)) { - String cid = findObject(pid); - return cid; + ObjectInfo objInfo = findObject(pid); + return objInfo.cid(); } else { - // Get permanent address of the pid - Path objRealPath = getRealPath(pid, "object", null); - - // Check to see if object exists + // Get permanent address of the pid object + Path objRealPath = getHashStoreDataObjectPath(pid); if (!Files.exists(objRealPath)) { - String errMsg = "FileHashStore.getHexDigest - File does not exist for pid: " + pid - + " with object address: " + objRealPath; + String errMsg = + "File does not exist for pid: " + pid + " with object address: " + objRealPath; logFileHashStore.warn(errMsg); throw new FileNotFoundException(errMsg); } InputStream dataStream = Files.newInputStream(objRealPath); - String mdObjectHexDigest = FileHashStoreUtility.calculateHexDigest( - dataStream, algorithm - ); + String mdObjectHexDigest = + FileHashStoreUtility.calculateHexDigest(dataStream, algorithm); logFileHashStore.info( - "FileHashStore.getHexDigest - Hex digest calculated for pid: " + pid - + ", with hex digest value: " + mdObjectHexDigest - ); + "Hex digest calculated for pid: " + pid + ", with hex digest value: " + + mdObjectHexDigest); return mdObjectHexDigest; } } // FileHashStore Core & Supporting Methods + /** + * Checks whether an object referenced by a pid exists and returns a map containing the absolute + * path to the object, pid refs file, cid refs file and sysmeta document. + * + * @param pid Authority-based identifier + * @return Map containing the following keys: cid, cid_object_path, cid_refs_path, + * pid_refs_path, sysmeta_path + * @throws NoSuchAlgorithmException When algorithm used to calculate pid refs file's + * absolute address is not valid + * @throws IOException Unable to read from a pid refs file or pid refs + * file does not exist + * @throws OrphanRefsFilesException pid and cid refs file found, but object does not + * exist + * @throws OrphanPidRefsFileException When pid refs file exists and the cid found inside + * does not exist. + * @throws PidNotFoundInCidRefsFileException When pid and cid ref files exists but the expected + * pid is not found in the cid refs file. + */ + protected ObjectInfo findObject(String pid) + throws NoSuchAlgorithmException, IOException, OrphanPidRefsFileException, + PidNotFoundInCidRefsFileException, OrphanRefsFilesException { + logFileHashStore.debug("Finding object for pid: " + pid); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + + // Get path of the pid references file + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + + if (Files.exists(absPidRefsPath)) { + String cid = new String(Files.readAllBytes(absPidRefsPath)); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + + // Throw exception if the cid refs file doesn't exist + if (!Files.exists(absCidRefsPath)) { + String errMsg = "Cid refs file does not exist for cid: " + cid + " with address: " + + absCidRefsPath + ", but pid refs file exists."; + logFileHashStore.error(errMsg); + throw new OrphanPidRefsFileException(errMsg); + } + // If the pid is found in the expected cid refs file, and the object exists, return it + if (isStringInRefsFile(pid, absCidRefsPath)) { + logFileHashStore.info("cid (" + cid + ") found for pid: " + pid); + + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + cid); + Path realPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + if (Files.exists(realPath)) { + // If the default system metadata exists, include it + Path metadataPidExpectedPath = + getHashStoreMetadataPath(pid, DEFAULT_METADATA_NAMESPACE); + if (Files.exists(metadataPidExpectedPath)) { + return new ObjectInfo(cid, realPath.toString(), absCidRefsPath.toString(), + absPidRefsPath.toString(), + metadataPidExpectedPath.toString()); + } else { + return new ObjectInfo(cid, realPath.toString(), absCidRefsPath.toString(), + absPidRefsPath.toString(), "Does not exist"); + } + + } else { + String errMsg = "Object with cid: " + cid + + " does not exist, but pid and cid reference file found for pid: " + pid; + logFileHashStore.error(errMsg); + throw new OrphanRefsFilesException(errMsg); + } + + } else { + String errMsg = "Pid refs file exists, but pid (" + pid + + ") not found in cid refs file for cid: " + cid + " with address: " + + absCidRefsPath; + logFileHashStore.error(errMsg); + throw new PidNotFoundInCidRefsFileException(errMsg); + } + + } else { + String errMsg = + "Unable to find cid for pid: " + pid + ". Pid refs file does not exist at: " + + absPidRefsPath; + logFileHashStore.error(errMsg); + throw new PidRefsFileNotFoundException(errMsg); + } + } + /** * Takes a given InputStream and writes it to its permanent address on disk based on the SHA-256 * hex digest value of an authority based identifier, usually provided as a persistent - * identifier (pid). - * - * If an additional algorithm is provided and supported, its respective hex digest value will be - * included in hexDigests map. If a checksum and checksumAlgorithm is provided, FileHashStore - * will validate the given checksum against the hex digest produced of the supplied - * checksumAlgorithm. + * identifier (pid). If an additional algorithm is provided and supported, its respective hex + * digest value will be included in hexDigests map. If a checksum and checksumAlgorithm is + * provided, FileHashStore will validate the given checksum against the hex digest produced of + * the supplied checksumAlgorithm. * * @param object InputStream for file * @param pid Authority-based identifier @@ -1259,7 +1129,7 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE * @param checksumAlgorithm Algorithm of checksum submitted * @param objSize Expected size of object to validate after storing * @return 'ObjectMetadata' object that contains the file id, size, and a checksum map based on - * the default algorithm list. + * the default algorithm list. * @throws IOException I/O Error when writing file, generating checksums, * moving file or deleting tmpFile upon duplicate found * @throws NoSuchAlgorithmException When additionalAlgorithm or checksumAlgorithm is @@ -1272,184 +1142,162 @@ public String getHexDigest(String pid, String algorithm) throws NoSuchAlgorithmE * etc.) * @throws NullPointerException Arguments are null for pid or object * @throws AtomicMoveNotSupportedException When attempting to move files across file systems + * @throws InterruptedException An issue synchronizing the cid when moving object */ protected ObjectMetadata putObject( InputStream object, String pid, String additionalAlgorithm, String checksum, - String checksumAlgorithm, long objSize - ) throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, + String checksumAlgorithm, long objSize) + throws IOException, NoSuchAlgorithmException, SecurityException, FileNotFoundException, PidRefsFileExistsException, IllegalArgumentException, NullPointerException, - AtomicMoveNotSupportedException { - logFileHashStore.debug("FileHashStore.putObject - Called to put object for pid: " + pid); - - // Validate algorithms if not null or empty, throws exception if not supported + AtomicMoveNotSupportedException, InterruptedException { + logFileHashStore.debug("Begin writing data object for pid: " + pid); + // If validation is desired, checksumAlgorithm and checksum must both be present + boolean compareChecksum = verifyChecksumParameters(checksum, checksumAlgorithm); + // Validate additional algorithm if not null or empty, throws exception if not supported if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - additionalAlgorithm, "additionalAlgorithm", "putObject" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString( + additionalAlgorithm, "additionalAlgorithm"); validateAlgorithm(additionalAlgorithm); } - if (checksumAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "checksumAlgorithm", "putObject" - ); - validateAlgorithm(checksumAlgorithm); - } - if (checksum != null) { - FileHashStoreUtility.checkForEmptyString(checksum, "checksum", "putObject"); - } if (objSize != -1) { - FileHashStoreUtility.checkNotNegativeOrZero(objSize, "putObject"); + FileHashStoreUtility.checkPositive(objSize); } - // If validation is desired, checksumAlgorithm and checksum must both be present - boolean requestValidation = verifyChecksumParameters(checksum, checksumAlgorithm); - // Generate tmp file and write to it - logFileHashStore.debug("FileHashStore.putObject - Generating tmpFile"); File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", OBJECT_TMP_FILE_DIRECTORY); - Path tmpFilePath = tmpFile.toPath(); Map hexDigests; try { - hexDigests = writeToTmpFileAndGenerateChecksums( - tmpFile, object, additionalAlgorithm, checksumAlgorithm - ); + hexDigests = writeToTmpFileAndGenerateChecksums(tmpFile, object, additionalAlgorithm, + checksumAlgorithm); } catch (Exception ge) { // If the process to write to the tmpFile is interrupted for any reason, - // we will delete the tmpFile. - boolean deleteStatus = tmpFile.delete(); - String errMsg = - "FileHashStore.putObject - Unexpected Exception while storing object for: " + pid; - if (deleteStatus) { - errMsg = errMsg + ". Deleting temp file: " + tmpFile + ". Aborting request."; - } else { - errMsg = errMsg + ". Failed to delete temp file: " + tmpFile - + ". Aborting request."; - } + // we will delete the tmpFile. + Files.delete(tmpFile.toPath()); + String errMsg = "Unexpected Exception while storing object for pid: " + pid + ". " + + ge.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } - long storedObjFileSize = Files.size(Paths.get(tmpFile.toString())); // Validate object if checksum and checksum algorithm is passed validateTmpObject( - requestValidation, checksum, checksumAlgorithm, tmpFilePath, hexDigests, objSize, - storedObjFileSize - ); + compareChecksum, checksum, checksumAlgorithm, tmpFile, hexDigests, objSize); // Gather the elements to form the permanent address String objectCid = hexDigests.get(OBJECT_STORE_ALGORITHM); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); - Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); - - // Confirm that the object does not yet exist, delete tmpFile if so - if (Files.exists(objRealPath)) { - String errMsg = "FileHashStore.putObject - File already exists for pid: " + pid - + ". Object address: " + objRealPath + ". Deleting temporary file."; - logFileHashStore.warn(errMsg); - tmpFile.delete(); - } else { - // Move object - File permFile = objRealPath.toFile(); - move(tmpFile, permFile, "object"); - logFileHashStore.debug( - "FileHashStore.putObject - Move object success, permanent address: " + objRealPath - ); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objectCid); + Path objRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + + try { + synchronizeObjectLockedCids(objectCid); + // Confirm that the object does not yet exist, delete tmpFile if so + if (!Files.exists(objRealPath)) { + logFileHashStore.info("Storing tmpFile: " + tmpFile); + // Move object + File permFile = objRealPath.toFile(); + move(tmpFile, permFile, "object"); + logFileHashStore.debug("Successfully moved data object: " + objRealPath); + } else { + Files.delete(tmpFile.toPath()); + String errMsg = + "File already exists for pid: " + pid + ". Object address: " + objRealPath + + ". Deleting temporary file: " + tmpFile; + logFileHashStore.warn(errMsg); + } + } catch (Exception e) { + String errMsg = + "Unexpected exception when moving object with cid: " + objectCid + " for pid:" + pid + + ". Additional Details: " + e.getMessage(); + logFileHashStore.error(errMsg); + throw e; + } finally { + releaseObjectLockedCids(objectCid); } - // Create ObjectMetadata to return with pertinent data - return new ObjectMetadata(objectCid, storedObjFileSize, hexDigests); + return new ObjectMetadata(pid, objectCid, Files.size(objRealPath), hexDigests); } /** - * If requestValidation is true, determines the integrity of an object with a given checksum & + * If compareChecksum is true, determines the integrity of an object with a given checksum & * algorithm against a list of hex digests. If there is a mismatch, the tmpFile will be deleted * and exceptions will be thrown. * - * @param requestValidation Boolean to decide whether to proceed with validation + * @param compareChecksum Decide whether to proceed with comparing checksums * @param checksum Expected checksum value of object * @param checksumAlgorithm Hash algorithm of checksum value - * @param tmpFile tmpFile that has been written - * @param hexDigests Map of the hex digests available to check with * @param tmpFile Path to the file that is being evaluated * @param hexDigests Map of the hex digests to parse data from - * @param objSize Expected size of object - * @param storedObjFileSize Actual size of object stored - * @return - * @throws NoSuchAlgorithmException - * @throws IOException + * @param expectedSize Expected size of object + * @throws NoSuchAlgorithmException If algorithm requested to validate against is absent */ - private boolean validateTmpObject( - boolean requestValidation, String checksum, String checksumAlgorithm, Path tmpFile, - Map hexDigests, long objSize, long storedObjFileSize - ) throws NoSuchAlgorithmException, IOException { - if (objSize > 0) { - if (objSize != storedObjFileSize) { + protected void validateTmpObject( + boolean compareChecksum, String checksum, String checksumAlgorithm, File tmpFile, + Map hexDigests, long expectedSize) + throws NoSuchAlgorithmException, NonMatchingChecksumException, NonMatchingObjSizeException, + IOException { + if (expectedSize > 0) { + long storedObjFileSize = Files.size(Paths.get(tmpFile.toString())); + if (expectedSize != storedObjFileSize) { // Delete tmp File try { - Files.delete(tmpFile); + Files.delete(tmpFile.toPath()); } catch (Exception ge) { String errMsg = - "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " - + storedObjFileSize + ". Failed to delete tmpFile: " + tmpFile + ". " - + ge.getMessage(); + "objSize given is not equal to the stored object size. ObjSize: " + + expectedSize + ". storedObjFileSize: " + storedObjFileSize + + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new NonMatchingObjSizeException(errMsg); } String errMsg = - "FileHashStore.validateTmpObject - objSize given is not equal to the" - + " stored object size. ObjSize: " + objSize + ". storedObjFileSize: " - + storedObjFileSize + ". Deleting tmpFile: " + tmpFile; + "objSize given is not equal to the stored object size. ObjSize: " + expectedSize + + ". storedObjFileSize: " + storedObjFileSize + ". Deleting tmpFile: " + + tmpFile; logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); + throw new NonMatchingObjSizeException(errMsg); } } - if (requestValidation) { - logFileHashStore.info( - "FileHashStore.validateTmpObject - Validating object, checksum arguments" - + " supplied and valid." - ); + if (compareChecksum) { + logFileHashStore.info("Validating object, checksum arguments supplied and valid."); String digestFromHexDigests = hexDigests.get(checksumAlgorithm); if (digestFromHexDigests == null) { - String errMsg = - "FileHashStore.validateTmpObject - checksum not found in hex digest map" - + " when validating object." + " checksumAlgorithm checked: " - + checksumAlgorithm; + String baseErrMsg = "Object cannot be validated. Algorithm not found in given " + + "hexDigests map. Algorithm requested: " + checksumAlgorithm; + try { + Files.delete(tmpFile.toPath()); + } catch (Exception ge) { + String errMsg = baseErrMsg + ". Failed to delete tmpFile: " + tmpFile + ". " + + ge.getMessage(); + logFileHashStore.error(errMsg); + throw new NonMatchingChecksumException(errMsg); + } + String errMsg = baseErrMsg + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); throw new NoSuchAlgorithmException(errMsg); } if (!checksum.equalsIgnoreCase(digestFromHexDigests)) { - // Delete tmp File + String baseErrMsg = "Checksum given is not equal to the calculated hex digest: " + + digestFromHexDigests + ". Checksum" + " provided: " + checksum; try { - Files.delete(tmpFile); - + Files.delete(tmpFile.toPath()); } catch (Exception ge) { - String errMsg = - "FileHashStore.validateTmpObject - Object cannot be validated. Checksum given" - + " is not equal to the calculated hex digest: " + digestFromHexDigests - + ". Checksum" + " provided: " + checksum - + ". Failed to delete tmpFile: " + tmpFile + ". " + ge.getMessage(); - ; + String errMsg = baseErrMsg + ". Failed to delete tmpFile: " + tmpFile + ". " + + ge.getMessage(); logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new NonMatchingChecksumException(errMsg); } - String errMsg = - "FileHashStore.validateTmpObject - Checksum given is not equal to the" - + " calculated hex digest: " + digestFromHexDigests + ". Checksum" - + " provided: " + checksum + ". tmpFile has been deleted: " + tmpFile; + String errMsg = baseErrMsg + ". tmpFile has been deleted: " + tmpFile; logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); + throw new NonMatchingChecksumException(errMsg); } } - - return true; } /** @@ -1462,17 +1310,15 @@ private boolean validateTmpObject( * @throws IllegalArgumentException Algorithm cannot be empty * @throws NoSuchAlgorithmException Algorithm not supported */ - protected boolean validateAlgorithm(String algorithm) throws NullPointerException, - IllegalArgumentException, NoSuchAlgorithmException { - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "putObject"); - FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "validateAlgorithm"); + protected boolean validateAlgorithm(String algorithm) + throws NullPointerException, IllegalArgumentException, NoSuchAlgorithmException { + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); boolean algorithmSupported = Arrays.asList(SUPPORTED_HASH_ALGORITHMS).contains(algorithm); if (!algorithmSupported) { - String errMsg = "FileHashStore - validateAlgorithm: Algorithm not supported: " - + algorithm + ". Supported algorithms: " + Arrays.toString( - SUPPORTED_HASH_ALGORITHMS - ); + String errMsg = "Algorithm not supported: " + algorithm + ". Supported algorithms: " + + Arrays.toString(SUPPORTED_HASH_ALGORITHMS); logFileHashStore.error(errMsg); throw new NoSuchAlgorithmException(errMsg); } @@ -1481,20 +1327,23 @@ protected boolean validateAlgorithm(String algorithm) throws NullPointerExceptio } /** - * Checks whether the algorithm supplied is included in the DefaultHashAlgorithms - * + * Determines if an algorithm should be generated by checking whether the algorithm supplied is + * included in the DefaultHashAlgorithms + * * @param algorithm Algorithm to check - * @return True if it's included + * @return Boolean */ - private boolean isDefaultAlgorithm(String algorithm) { - boolean isDefaultAlgorithm = false; + protected boolean shouldCalculateAlgorithm(String algorithm) { + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); + boolean shouldCalculateAlgorithm = true; for (DefaultHashAlgorithms defAlgo : DefaultHashAlgorithms.values()) { if (algorithm.equals(defAlgo.getName())) { - isDefaultAlgorithm = true; + shouldCalculateAlgorithm = false; break; } } - return isDefaultAlgorithm; + return shouldCalculateAlgorithm; } /** @@ -1507,27 +1356,28 @@ private boolean isDefaultAlgorithm(String algorithm) { */ protected boolean verifyChecksumParameters(String checksum, String checksumAlgorithm) throws NoSuchAlgorithmException { + // First ensure algorithm is compatible and values are valid if they aren't null + if (checksumAlgorithm != null) { + FileHashStoreUtility.checkForNotEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm"); + validateAlgorithm(checksumAlgorithm); + } + if (checksum != null) { + FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); + } // If checksum is supplied, checksumAlgorithm cannot be empty if (checksum != null && !checksum.trim().isEmpty()) { - FileHashStoreUtility.ensureNotNull( - checksumAlgorithm, "checksumAlgorithm", "verifyChecksumParameters" - ); - FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "algorithm", "verifyChecksumParameters" - ); + FileHashStoreUtility.ensureNotNull(checksumAlgorithm, "checksumAlgorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksumAlgorithm, "algorithm"); } // Ensure algorithm is supported, not null and not empty boolean requestValidation = false; if (checksumAlgorithm != null && !checksumAlgorithm.trim().isEmpty()) { requestValidation = validateAlgorithm(checksumAlgorithm); - // Ensure checksum is not null or empty if checksumAlgorithm is supplied in + // Ensure checksum is not null or empty if checksumAlgorithm is supplied if (requestValidation) { - FileHashStoreUtility.ensureNotNull( - checksum, "checksum", "verifyChecksumParameters" - ); - FileHashStoreUtility.checkForEmptyString( - checksum, "checksum", "verifyChecksumParameters" - ); + FileHashStoreUtility.ensureNotNull(checksum, "checksum"); + FileHashStoreUtility.checkForNotEmptyAndValidString(checksum, "checksum"); } } return requestValidation; @@ -1536,9 +1386,8 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor /** * Write the input stream into a given file (tmpFile) and return a HashMap consisting of * algorithms and their respective hex digests. If an additional algorithm is supplied and - * supported, it and its checksum value will be included in the hex digests map. - * - * Default algorithms: MD5, SHA-1, SHA-256, SHA-384, SHA-512 + * supported, it and its checksum value will be included in the hex digests map. Default + * algorithms: MD5, SHA-1, SHA-256, SHA-384, SHA-512 * * @param tmpFile file to write input stream data into * @param dataStream input stream of data to store @@ -1549,27 +1398,25 @@ protected boolean verifyChecksumParameters(String checksum, String checksumAlgor * @throws NoSuchAlgorithmException Unable to generate new instance of supplied algorithm * @throws IOException Issue with writing file from InputStream * @throws SecurityException Unable to write to tmpFile - * @throws FileNotFoundException tmnpFile cannot be found + * @throws FileNotFoundException tmpFile cannot be found */ protected Map writeToTmpFileAndGenerateChecksums( - File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm - ) throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { + File tmpFile, InputStream dataStream, String additionalAlgorithm, String checksumAlgorithm) + throws NoSuchAlgorithmException, IOException, FileNotFoundException, SecurityException { // Determine whether to calculate additional or checksum algorithms boolean generateAddAlgo = false; if (additionalAlgorithm != null) { - FileHashStoreUtility.checkForEmptyString( - additionalAlgorithm, "additionalAlgorithm", "writeToTmpFileAndGenerateChecksums" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString( + additionalAlgorithm, "additionalAlgorithm"); validateAlgorithm(additionalAlgorithm); - generateAddAlgo = !isDefaultAlgorithm(additionalAlgorithm); + generateAddAlgo = shouldCalculateAlgorithm(additionalAlgorithm); } boolean generateCsAlgo = false; if (checksumAlgorithm != null && !checksumAlgorithm.equals(additionalAlgorithm)) { - FileHashStoreUtility.checkForEmptyString( - checksumAlgorithm, "checksumAlgorithm", "writeToTmpFileAndGenerateChecksums" - ); + FileHashStoreUtility.checkForNotEmptyAndValidString( + checksumAlgorithm, "checksumAlgorithm"); validateAlgorithm(checksumAlgorithm); - generateCsAlgo = !isDefaultAlgorithm(checksumAlgorithm); + generateCsAlgo = shouldCalculateAlgorithm(checksumAlgorithm); } FileOutputStream os = new FileOutputStream(tmpFile); @@ -1582,16 +1429,12 @@ protected Map writeToTmpFileAndGenerateChecksums( MessageDigest checksumAlgo = null; if (generateAddAlgo) { logFileHashStore.debug( - "FileHashStore.writeToTmpFileAndGenerateChecksums - Adding additional algorithm" - + " to hex digest map, algorithm: " + additionalAlgorithm - ); + "Adding additional algorithm to hex digest map, algorithm: " + additionalAlgorithm); additionalAlgo = MessageDigest.getInstance(additionalAlgorithm); } if (generateCsAlgo) { logFileHashStore.debug( - "FileHashStore.writeToTmpFileAndGenerateChecksums - Adding checksum algorithm" - + " to hex digest map, algorithm: " + checksumAlgorithm - ); + "Adding checksum algorithm to hex digest map, algorithm: " + checksumAlgorithm); checksumAlgo = MessageDigest.getInstance(checksumAlgorithm); } @@ -1615,9 +1458,7 @@ protected Map writeToTmpFileAndGenerateChecksums( } } catch (IOException ioe) { - String errMsg = - "FileHashStore.writeToTmpFileAndGenerateChecksums - Unexpected Exception: " + ioe - .fillInStackTrace(); + String errMsg = "Unexpected Exception ~ " + ioe.getMessage(); logFileHashStore.error(errMsg); throw ioe; @@ -1640,19 +1481,18 @@ protected Map writeToTmpFileAndGenerateChecksums( hexDigests.put(DefaultHashAlgorithms.SHA_384.getName(), sha384Digest); hexDigests.put(DefaultHashAlgorithms.SHA_512.getName(), sha512Digest); if (generateAddAlgo) { - String extraAlgoDigest = DatatypeConverter.printHexBinary(additionalAlgo.digest()) - .toLowerCase(); + String extraAlgoDigest = + DatatypeConverter.printHexBinary(additionalAlgo.digest()).toLowerCase(); hexDigests.put(additionalAlgorithm, extraAlgoDigest); } if (generateCsAlgo) { - String extraChecksumDigest = DatatypeConverter.printHexBinary(checksumAlgo.digest()) - .toLowerCase(); + String extraChecksumDigest = + DatatypeConverter.printHexBinary(checksumAlgo.digest()).toLowerCase(); hexDigests.put(checksumAlgorithm, extraChecksumDigest); } logFileHashStore.debug( - "FileHashStore.writeToTmpFileAndGenerateChecksums - Object has been written to" - + " tmpFile: " + tmpFile.getName() + ". To be moved to: " + sha256Digest - ); + "Object has been written to tmpFile: " + tmpFile.getName() + ". To be moved to: " + + sha256Digest); return hexDigests; } @@ -1671,18 +1511,16 @@ protected Map writeToTmpFileAndGenerateChecksums( * @throws AtomicMoveNotSupportedException When ATOMIC_MOVE is not supported (usually * encountered when moving across file systems) */ - protected void move(File source, File target, String entity) throws IOException, - SecurityException, AtomicMoveNotSupportedException, FileAlreadyExistsException { + protected void move(File source, File target, String entity) + throws IOException, SecurityException, AtomicMoveNotSupportedException, + FileAlreadyExistsException { logFileHashStore.debug( - "FileHashStore.move - called to move entity type: " + entity + ", from source: " - + source + ", to target: " + target - ); + "Moving " + entity + ", from source: " + source + ", to target: " + target); // Validate input parameters - FileHashStoreUtility.ensureNotNull(entity, "entity", "move"); - FileHashStoreUtility.checkForEmptyString(entity, "entity", "move"); - // Entity is only used when checking for an existence of an object + FileHashStoreUtility.ensureNotNull(entity, "entity"); + FileHashStoreUtility.checkForNotEmptyAndValidString(entity, "entity"); if (entity.equals("object") && target.exists()) { - String errMsg = "FileHashStore.move - File already exists for target: " + target; + String errMsg = "File already exists for target: " + target; logFileHashStore.warn(errMsg); return; } @@ -1691,7 +1529,13 @@ protected void move(File source, File target, String entity) throws IOException, // Create parent directory if it doesn't exist if (!destinationDirectory.exists()) { Path destinationDirectoryPath = destinationDirectory.toPath(); - Files.createDirectories(destinationDirectoryPath); + try { + Files.createDirectories(destinationDirectoryPath); + + } catch (FileAlreadyExistsException faee) { + logFileHashStore.warn("Directory already exists at: " + destinationDirectoryPath + + " - Skipping directory creation"); + } } // Move file @@ -1700,268 +1544,425 @@ protected void move(File source, File target, String entity) throws IOException, try { Files.move(sourceFilePath, targetFilePath, StandardCopyOption.ATOMIC_MOVE); logFileHashStore.debug( - "FileHashStore.move - file moved from: " + sourceFilePath + ", to: " - + targetFilePath - ); + "File moved from: " + sourceFilePath + ", to: " + targetFilePath); + + } catch (FileAlreadyExistsException faee) { + logFileHashStore.warn( + "File already exists, skipping request to move object. Source: " + source + + ". Target: " + target); } catch (AtomicMoveNotSupportedException amnse) { - logFileHashStore.error( - "FileHashStore.move - StandardCopyOption.ATOMIC_MOVE failed. AtomicMove is" - + " not supported across file systems. Source: " + source + ". Target: " - + target - ); + logFileHashStore.error("StandardCopyOption.ATOMIC_MOVE failed. AtomicMove is" + + " not supported across file systems. Source: " + source + + ". Target: " + target); throw amnse; } catch (IOException ioe) { logFileHashStore.error( - "FileHashStore.move - Unable to move file. Source: " + source + ". Target: " - + target - ); + "Unable to move file. Source: " + source + ". Target: " + target); throw ioe; } } /** - * Verifies that the reference files for the given pid and cid exist and contain - * the expected values. - * + * Attempt to delete an object based on the given content identifier (cid). If the object has + * pids that references it and/or a cid refs file exists, the object will not be deleted. + * + * @param cid Content identifier + * @throws IOException If an issue arises during deletion of object + * @throws NoSuchAlgorithmException Incompatible algorithm used to find relative path to cid + * @throws InterruptedException Issue with synchronization of cid deletion + */ + protected void deleteObjectByCid(String cid) + throws IOException, NoSuchAlgorithmException, InterruptedException { + logFileHashStore.debug("Called to delete data object with cid: " + cid); + // Get expected path of the cid refs file & permanent address of the actual cid + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, cid); + Path expectedRealPath = OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + + try { + synchronizeObjectLockedCids(cid); + if (Files.exists(absCidRefsPath)) { + // The cid refs file exists, so the cid object cannot be deleted. + String warnMsg = "cid refs file still contains references, skipping deletion."; + logFileHashStore.warn(warnMsg); + } else { + // If file exists, delete it. + if (Files.exists(expectedRealPath)) { + Files.delete(expectedRealPath); + } + String debugMsg = "Object deleted at" + expectedRealPath; + logFileHashStore.debug(debugMsg); + } + } finally { + // Release lock + releaseObjectLockedCids(cid); + } + } + + /** + * Create the pid refs file and create/update cid refs files in HashStore to establish the + * relationship between a 'pid' and a 'cid' + * + * @param pid Persistent or authority-based identifier + * @param cid Content identifier + * @throws NoSuchAlgorithmException If there is an issue related to calculating hashes + * @throws IOException If there is an issue reading/writing a refs file + * @throws InterruptedException If there is an issue when synchronizing pid or cid values + */ + protected void storeHashStoreRefsFiles(String pid, String cid) + throws NoSuchAlgorithmException, IOException, InterruptedException { + try { + // Immediately synchronize cid and pid + synchronizeObjectLockedCids(cid); + synchronizeReferenceLockedPids(pid); + + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + + if (Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Confirm that reference files are where they are expected to be + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + // We throw an exception so the client is aware that everything is in place + String errMsg = + "Object with cid: " + cid + " already exists and is tagged with pid: " + pid; + logFileHashStore.error(errMsg); + throw new HashStoreRefsAlreadyExistException(errMsg); + + } else if (Files.exists(absPidRefsPath) && !Files.exists(absCidRefsPath)) { + // If pid refs exists, it can only contain and reference one cid + // First, compare the cid retrieved from the pid refs file from the supplied cid + String retrievedCid = new String(Files.readAllBytes(absPidRefsPath)); + if (retrievedCid.equalsIgnoreCase(cid)) { + // The pid correctly references the cid, but the cid refs file is missing + // Create the file and verify tagging process + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "Pid refs file exists for pid: " + pid + ", but cid refs file for: " + cid + + " is missing. Missing cid refs file created and tagging completed."); + return; + } else { + // Check if the retrieved cid refs file exists and pid is referenced + Path retrievedAbsCidRefsPath = + getHashStoreRefsPath(retrievedCid, HashStoreIdTypes.cid); + if (Files.exists(retrievedAbsCidRefsPath) && isStringInRefsFile( + pid, retrievedAbsCidRefsPath)) { + // This pid is accounted for and tagged as expected. + String errMsg = "Pid refs file already exists for pid: " + pid + + ", and the associated cid refs file contains the " + + "pid. A pid can only reference one cid."; + logFileHashStore.error(errMsg); + throw new PidRefsFileExistsException(errMsg); + } + // Orphaned pid refs file found, the retrieved cid refs file exists + // but doesn't contain the pid. Proceed to overwrite the pid refs file. + } + } else if (!Files.exists(absPidRefsPath) && Files.exists(absCidRefsPath)) { + // Only update cid refs file if pid is not in the file + if (!isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.add); + } + // Get the pid refs file and verify tagging process + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File absPathPidRefsFile = absPidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info("Object with cid: " + cid + + " has been updated and tagged successfully with pid: " + + pid); + return; + } + + // Get pid and cid refs files + File pidRefsTmpFile = writeRefsFile(cid, HashStoreIdTypes.pid.name()); + File cidRefsTmpFile = writeRefsFile(pid, HashStoreIdTypes.cid.name()); + // Move refs files to permanent location + File absPathPidRefsFile = absPidRefsPath.toFile(); + File absPathCidRefsFile = absCidRefsPath.toFile(); + move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + move(cidRefsTmpFile, absPathCidRefsFile, "refs"); + // Verify tagging process, this throws an exception if there's an issue + verifyHashStoreRefsFiles(pid, cid, absPidRefsPath, absCidRefsPath); + logFileHashStore.info( + "Object with cid: " + cid + " has been tagged successfully with pid: " + pid); + } finally { + releaseObjectLockedCids(cid); + releaseReferenceLockedPids(pid); + } + } + + /** + * Untags a data object in HashStore by deleting the 'pid reference file' and removing the 'pid' + * from the 'cid reference file'. This method will never delete a data object. + * + * @param pid Persistent or authority-based identifier + * @param cid Content identifier of data object + * @throws InterruptedException When there is a synchronization issue + * @throws NoSuchAlgorithmException When there is an algorithm used that is not supported + * @throws IOException When there is an issue deleting refs files + */ + protected void unTagObject(String pid, String cid) + throws InterruptedException, NoSuchAlgorithmException, IOException { + // Validate input parameters + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.ensureNotNull(cid, "cid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(cid, "cid"); + + Collection deleteList = new ArrayList<>(); + + try { + synchronizeObjectLockedPids(pid); + // Before we begin untagging process, we look for the `cid` by calling + // `findObject` which will throw custom exceptions if there is an issue with + // the reference files, which help us determine the path to proceed with. + try { + ObjectInfo objInfo = findObject(pid); + cid = objInfo.cid(); + try { + // If no exceptions are thrown, we proceed to synchronization based on the `cid` + synchronizeObjectLockedCids(cid); + // Get paths to reference files to work on + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + + // Begin deletion process + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } else { + String warnMsg = "Cid referenced by pid: " + pid + + " is not empty (refs exist for cid). Skipping object " + "deletion."; + logFileHashStore.warn(warnMsg); + } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete all related/relevant items with the least amount of delay + FileHashStoreUtility.deleteListItems(deleteList); + logFileHashStore.info("Untagged pid: " + pid + " with cid: " + cid); + + } finally { + releaseObjectLockedCids(cid); + } + + } catch (OrphanPidRefsFileException oprfe) { + // `findObject` throws this exception when the cid refs file doesn't exist, + // so we only need to delete the pid refs file + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "Cid refs file does not exist for pid: " + pid + + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + + } catch (OrphanRefsFilesException orfe) { + // `findObject` throws this exception when the pid and cid refs file exists, + // but the actual object being referenced by the pid does not exist + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + String cidRead = new String(Files.readAllBytes(absPidRefsPath)); + + try { + // Since we must access the cid reference file, the `cid` must be synchronized + synchronizeObjectLockedCids(cidRead); + + Path absCidRefsPath = getHashStoreRefsPath(cidRead, HashStoreIdTypes.cid); + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + if (Files.size(absCidRefsPath) == 0) { + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absCidRefsPath)); + } + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "Object with cid: " + cidRead + + " does not exist, but pid and cid reference file found for pid: " + pid + + ". Deleted pid and cid ref files."; + logFileHashStore.warn(warnMsg); + + } finally { + releaseObjectLockedCids(cidRead); + } + } catch (PidNotFoundInCidRefsFileException pnficrfe) { + // `findObject` throws this exception when both the pid and cid refs file exists + // but the pid is not found in the cid refs file. + + // Rename pid refs file for deletion + Path absPidRefsPath = getHashStoreRefsPath(pid, HashStoreIdTypes.pid); + deleteList.add(FileHashStoreUtility.renamePathForDeletion(absPidRefsPath)); + // Delete items + FileHashStoreUtility.deleteListItems(deleteList); + String warnMsg = "Pid not found in expected cid refs file for pid: " + pid + + ". Deleted orphan pid refs file."; + logFileHashStore.warn(warnMsg); + } catch (PidRefsFileNotFoundException prfnfe) { + // `findObject` throws this exception if the pid refs file is not found + // Check to see if pid is in the `cid refs file`and attempt to remove it + Path absCidRefsPath = getHashStoreRefsPath(cid, HashStoreIdTypes.cid); + if (Files.exists(absCidRefsPath) && isStringInRefsFile(pid, absCidRefsPath)) { + updateRefsFile(pid, absCidRefsPath, HashStoreRefUpdateTypes.remove); + String errMsg = "Pid refs file not found, removed pid found in cid refs file: " + + absCidRefsPath; + logFileHashStore.warn(errMsg); + } + } + } finally { + releaseObjectLockedPids(pid); + } + } + + /** + * Verifies that the reference files for the given pid and cid exist and contain the expected + * values. + * * @param pid Authority-based or persistent identifier * @param cid Content identifier * @param absPidRefsPath Path to where the pid refs file exists * @param absCidRefsPath Path to where the cid refs file exists - * @throws FileNotFoundException Any refs files are missing - * @throws IOException Unable to read any of the refs files or if the refs content - * is not what is expected + * @throws FileNotFoundException Any refs files are missing + * @throws CidNotFoundInPidRefsFileException When the expected cid is not found in the pid refs + * @throws PidNotFoundInCidRefsFileException When a pid is not found in the cid refs file + * @throws IOException Unable to read any of the refs files */ protected void verifyHashStoreRefsFiles( - String pid, String cid, Path absPidRefsPath, Path absCidRefsPath - ) throws FileNotFoundException, IOException { - // First confirm that the files were created + String pid, String cid, Path absPidRefsPath, Path absCidRefsPath) + throws FileNotFoundException, CidNotFoundInPidRefsFileException, + PidNotFoundInCidRefsFileException, IOException { + // First confirm that the refs files have been created/moved to where they need to be if (!Files.exists(absCidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - cid refs file is missing: " - + absCidRefsPath + " for pid: " + pid; + String errMsg = "Cid refs file is missing: " + absCidRefsPath + " for pid: " + pid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } if (!Files.exists(absPidRefsPath)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - pid refs file is missing: " - + absPidRefsPath + " for cid: " + cid; + String errMsg = "Pid refs file is missing: " + absPidRefsPath + " for cid: " + cid; logFileHashStore.error(errMsg); throw new FileNotFoundException(errMsg); } - // Now verify the content + // Now confirm that the content is what is expected try { String cidRead = new String(Files.readAllBytes(absPidRefsPath)); if (!cidRead.equals(cid)) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Unexpected cid: " - + cidRead + " found in pid refs file: " + absPidRefsPath + ". Expected cid: " - + cid; + String errMsg = + "Unexpected cid: " + cidRead + " found in pid refs file: " + absPidRefsPath + + ". Expected cid: " + cid; logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new CidNotFoundInPidRefsFileException(errMsg); } - boolean pidFoundInCidRefFiles = isPidInCidRefsFile(pid, absCidRefsPath); - if (!pidFoundInCidRefFiles) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - Missing expected pid: " - + pid + " in cid refs file: " + absCidRefsPath; + if (!isStringInRefsFile(pid, absCidRefsPath)) { + String errMsg = + "Missing expected pid: " + pid + " in cid refs file: " + absCidRefsPath; logFileHashStore.error(errMsg); - throw new IOException(errMsg); + throw new PidNotFoundInCidRefsFileException(errMsg); } } catch (IOException ioe) { - String errMsg = "FileHashStore.verifyHashStoreRefsFiles - " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); + logFileHashStore.error(ioe.getMessage()); + throw ioe; } } + /** - * Writes the given 'pid' into a file in the 'cid' refs file format, which consists of - * multiple pids that references a 'cid' each on its own line (delimited by "\n"). + * Writes the given ref into a temporary file. The client must explicitly move this file to + * where it belongs otherwise it will be removed during garbage collection. * - * @param pid Authority-based or persistent identifier to write - * @throws IOException Failure to write pid refs file + * @param ref Authority-based or persistent identifier to write + * @param refType Type of reference 'pid' or 'cid' to include in the log + * @return File object with single reference + * @throws IOException Failure to write refs file */ - protected File writeCidRefsFile(String pid) throws IOException { + protected File writeRefsFile(String ref, String refType) throws IOException { File cidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(cidRefsTmpFile.toPath()), StandardCharsets.UTF_8 - ) - )) { - writer.write(pid); + new OutputStreamWriter(Files.newOutputStream(cidRefsTmpFile.toPath()), + StandardCharsets.UTF_8))) { + writer.write(ref); writer.close(); - logFileHashStore.debug( - "FileHashStore.writeCidRefsFile - cid refs file written for: " + pid - ); + logFileHashStore.debug(refType + " refs file written for: " + ref); return cidRefsTmpFile; } catch (IOException ioe) { - logFileHashStore.error( - "FileHashStore.writeCidRefsFile - Unable to write cid refs file for pid: " + pid - + " IOException: " + ioe.getMessage() - ); - throw ioe; - } - } - - /** - * Writes the given 'cid' into a file in the 'pid' refs file format. A pid refs file - * contains a single 'cid'. Note, a 'pid' can only ever reference one 'cid'. - * - * @param cid Content identifier to write - * @throws IOException Failure to write pid refs file - */ - protected File writePidRefsFile(String cid) throws IOException { - File pidRefsTmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - try (BufferedWriter writer = new BufferedWriter( - new OutputStreamWriter( - Files.newOutputStream(pidRefsTmpFile.toPath()), StandardCharsets.UTF_8 - ) - )) { - writer.write(cid); - writer.close(); - - logFileHashStore.debug( - "FileHashStore.writePidRefsFile - pid refs file written for: " + cid - ); - return pidRefsTmpFile; - - } catch (IOException ioe) { - String errMsg = - "FileHashStore.writePidRefsFile - Unable to write pid refs file for cid: " + cid - + " IOException: " + ioe.getMessage(); + String errMsg = "Unable to write refs file for ref: " + refType + " IOException: " + + ioe.getMessage(); logFileHashStore.error(errMsg); throw new IOException(errMsg); } } /** - * Checks a given cid refs file for a pid. This is case-sensitive. - * - * @param pid Authority-based or persistent identifier to search - * @param absCidRefsPath Path to the cid refs file to check + * Checks a given refs file for a ref. This is case-sensitive. + * + * @param ref Authority-based or persistent identifier to search + * @param absRefsPath Path to the refs file to check * @return True if cid is found, false otherwise * @throws IOException If unable to read the cid refs file. */ - protected boolean isPidInCidRefsFile(String pid, Path absCidRefsPath) throws IOException { - List lines = Files.readAllLines(absCidRefsPath); - boolean pidFoundInCidRefFiles = false; + protected boolean isStringInRefsFile(String ref, Path absRefsPath) throws IOException { + List lines = Files.readAllLines(absRefsPath); + boolean refFoundInCidRefFiles = false; for (String line : lines) { - if (line.equals(pid)) { - pidFoundInCidRefFiles = true; + if (line.equals(ref)) { + refFoundInCidRefFiles = true; break; } } - return pidFoundInCidRefFiles; + return refFoundInCidRefFiles; } /** - * Updates a cid refs file with a pid that references the cid - * - * @param pid Authority-based or persistent identifier - * @param absCidRefsPath Path to the cid refs file to update - * @throws IOException Issue with updating a cid refs file + * Adds or removes a ref value from a refs file given an 'updateType' + * + * @param ref Authority-based or persistent identifier + * @param absRefsPath Path to the refs file to update + * @param updateType {@link HashStoreRefUpdateTypes} + * @throws IOException Issue with updating or accessing a refs file */ - protected void updateCidRefsFiles(String pid, Path absCidRefsPath) throws IOException { + protected void updateRefsFile(String ref, Path absRefsPath, HashStoreRefUpdateTypes updateType) + throws IOException { // This update process is atomic, so we first write the updated content // into a temporary file before overwriting it. File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); Path tmpFilePath = tmpFile.toPath(); + try { // Obtain a lock on the file before updating it - try (FileChannel channel = FileChannel.open( - absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock ignored = channel.lock()) { - List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); - lines.add(pid); - - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absCidRefsPath.toFile(), "refs"); - logFileHashStore.debug( - "FileHashStore.updateCidRefsFiles - Pid: " + pid - + " has been added to cid refs file: " + absCidRefsPath - ); - } - // The lock is automatically released when the try block exits - } catch (IOException ioe) { - String errMsg = "FileHashStore.updateCidRefsFiles - " + ioe.getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); - } - } - - /** - * Deletes a pid references file - * - * @param pid Authority-based or persistent identifier - * @throws NoSuchAlgorithmException Incompatible algorithm used to find pid refs file - * @throws IOException Unable to delete object or open pid refs file - */ - protected void deletePidRefsFile(String pid) throws NoSuchAlgorithmException, IOException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "deletePidRefsFile"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "deletePidRefsFile"); - - Path absPidRefsPath = getRealPath(pid, "refs", "pid"); - // Check to see if pid refs file exists - if (!Files.exists(absPidRefsPath)) { - String errMsg = - "FileHashStore.deletePidRefsFile - File refs file does not exist for pid: " + pid - + " with address: " + absPidRefsPath; - logFileHashStore.error(errMsg); - throw new FileNotFoundException(errMsg); - - } else { - // Proceed to delete - Files.delete(absPidRefsPath); - logFileHashStore.debug( - "FileHashStore.deletePidRefsFile - Pid refs file deleted for: " + pid - + " with address: " + absPidRefsPath - ); - } - } - + try (FileChannel channel = FileChannel.open(absRefsPath, StandardOpenOption.READ, + StandardOpenOption.WRITE); + FileLock ignored = channel.lock()) { + Collection lines = new ArrayList<>(Files.readAllLines(absRefsPath)); + + if (updateType.equals(HashStoreRefUpdateTypes.add)) { + lines.add(ref); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absRefsPath.toFile(), "refs"); + logFileHashStore.debug( + "Ref: " + ref + " has been added to refs file: " + absRefsPath); + } - /** - * Removes a pid from a cid refs file. - * - * @param pid Authority-based or persistent identifier. - * @param absCidRefsPath Path to the cid refs file to remove the pid from - * @throws IOException Unable to access cid refs file - */ - protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "deleteCidRefsPid"); - FileHashStoreUtility.ensureNotNull(absCidRefsPath, "absCidRefsPath", "deleteCidRefsPid"); - // This deletes process is atomic, so we first write the updated content - // into a temporary file before overwriting it. - File tmpFile = FileHashStoreUtility.generateTmpFile("tmp", REFS_TMP_FILE_DIRECTORY); - Path tmpFilePath = tmpFile.toPath(); - try (FileChannel channel = FileChannel.open( - absCidRefsPath, StandardOpenOption.READ, StandardOpenOption.WRITE - ); FileLock ignored = channel.lock()) { - // Read all lines into a List - List lines = new ArrayList<>(Files.readAllLines(absCidRefsPath)); - lines.remove(pid); - Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); - move(tmpFile, absCidRefsPath.toFile(), "refs"); - logFileHashStore.debug( - "FileHashStore.deleteCidRefsPid - Pid: " + pid + " removed from cid refs file: " - + absCidRefsPath - ); + if (updateType.equals(HashStoreRefUpdateTypes.remove)) { + lines.remove(ref); + Files.write(tmpFilePath, lines, StandardOpenOption.WRITE); + move(tmpFile, absRefsPath.toFile(), "refs"); + logFileHashStore.debug( + "Ref: " + ref + " has been removed from refs file: " + absRefsPath); + } + } // The lock is automatically released when the try block exits } catch (IOException ioe) { - String errMsg = "FileHashStore.deleteCidRefsPid - Unable to remove pid: " + pid - + " from cid refs file: " + absCidRefsPath + ". Additional Info: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); - throw new IOException(errMsg); + logFileHashStore.error(ioe.getMessage()); + throw ioe; } } /** * Takes a given input stream and writes it to its permanent address on disk based on the * SHA-256 hex digest of the given pid + formatId. If no formatId is supplied, it will use the - * default store namespace as defined by `hashstore.yaml` + * default store namespace as defined by {@code hashstore.yaml}. * * @param metadata InputStream to metadata * @param pid Authority-based identifier @@ -1974,49 +1975,39 @@ protected void deleteCidRefsPid(String pid, Path absCidRefsPath) throws IOExcept protected String putMetadata(InputStream metadata, String pid, String formatId) throws NoSuchAlgorithmException, IOException { logFileHashStore.debug( - "FileHashStore.putMetadata - Called to put metadata for pid:" + pid - + " , with metadata namespace: " + formatId - ); - + "Writing metadata for pid: " + pid + " , with metadata namespace: " + formatId); // Validate input parameters - FileHashStoreUtility.ensureNotNull(metadata, "metadata", "putMetadata"); - FileHashStoreUtility.ensureNotNull(pid, "pid", "putMetadata"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "putMetadata"); + FileHashStoreUtility.ensureNotNull(metadata, "metadata"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); - // Determine metadata namespace // If no formatId is supplied, use the default namespace to store metadata String checkedFormatId; if (formatId == null) { checkedFormatId = DEFAULT_METADATA_NAMESPACE; } else { - FileHashStoreUtility.checkForEmptyString(formatId, "formatId", "putMetadata"); + FileHashStoreUtility.checkForNotEmptyAndValidString(formatId, "formatId"); checkedFormatId = formatId; } // Get permanent address for the given metadata document - String metadataCid = FileHashStoreUtility.getPidHexDigest( - pid + checkedFormatId, OBJECT_STORE_ALGORITHM - ); - Path metadataCidPath = getRealPath(pid, "metadata", checkedFormatId); - - // Store metadata to tmpMetadataFile - File tmpMetadataFile = FileHashStoreUtility.generateTmpFile( - "tmp", METADATA_TMP_FILE_DIRECTORY - ); + // All metadata documents for a pid are stored in a directory that is formed + // by using the hash of the 'pid', with the file name being the hash of the 'pid+formatId' + Path pathToStoredMetadata = getHashStoreMetadataPath(pid, checkedFormatId); + + File tmpMetadataFile = + FileHashStoreUtility.generateTmpFile("tmp", METADATA_TMP_FILE_DIRECTORY); boolean tmpMetadataWritten = writeToTmpMetadataFile(tmpMetadataFile, metadata); if (tmpMetadataWritten) { logFileHashStore.debug( - "FileHashStore.putMetadata - tmp metadata file has been written, moving to" - + " permanent location: " + metadataCidPath - ); - File permMetadataFile = metadataCidPath.toFile(); + "Tmp metadata file has been written, moving to" + " permanent location: " + + pathToStoredMetadata); + File permMetadataFile = pathToStoredMetadata.toFile(); move(tmpMetadataFile, permMetadataFile, "metadata"); } logFileHashStore.debug( - "FileHashStore.putMetadata - Move metadata success, permanent address: " - + metadataCidPath - ); - return metadataCid; + "Metadata moved successfully, permanent address: " + pathToStoredMetadata); + return pathToStoredMetadata.toString(); } /** @@ -2042,10 +2033,7 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea return true; } catch (IOException ioe) { - String errMsg = - "FileHashStore.writeToTmpMetadataFile - Unexpected IOException encountered: " + ioe - .getMessage(); - logFileHashStore.error(errMsg); + logFileHashStore.error(ioe.getMessage()); throw ioe; } finally { @@ -2055,62 +2043,288 @@ protected boolean writeToTmpMetadataFile(File tmpFile, InputStream metadataStrea } /** - * Get the absolute path of a HashStore object or metadata file + * Get the absolute path to a HashStore data object * - * @param abId Authority-based, persistent or content identifier - * @param entity "object" or "metadata" - * @param formatId Metadata namespace or reference type (pid/cid) - * @return Actual path to object - * @throws IllegalArgumentException If entity is not object or metadata - * @throws NoSuchAlgorithmException If store algorithm is not supported - * @throws IOException If unable to retrieve cid + * @param abpId Authority-based or persistent identifier + * @return Path to the HasHStore data object + * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported + * @throws IOException Issue when reading a pid refs file to retrieve a 'cid' */ - protected Path getRealPath(String abId, String entity, String formatId) - throws IllegalArgumentException, NoSuchAlgorithmException, IOException { + protected Path getHashStoreDataObjectPath(String abpId) + throws NoSuchAlgorithmException, IOException { + // Retrieve the 'cid' from the pid refs file + String objectCid; + String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); + String pidRefsFileRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + hashedId); + Path pathToPidRefsFile = REFS_PID_FILE_DIRECTORY.resolve(pidRefsFileRelativePath); + if (!Files.exists(pathToPidRefsFile)) { + String errMsg = + "Pid Refs file does not exist for pid: " + abpId + " with object address: " + + pathToPidRefsFile + ". Cannot retrieve " + "cid."; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } else { + objectCid = new String(Files.readAllBytes(pathToPidRefsFile)); + } + // If cid is found, return the expected real path to object + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + objectCid); + // Real path to the data object + return OBJECT_STORE_DIRECTORY.resolve(objRelativePath); + } + + /** + * Get the absolute path to a HashStore metadata document + * + * @param abpId Authority-based or persistent identifier + * @param formatId Metadata formatId or namespace + * @return Path to the requested metadata document + * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported + */ + protected Path getHashStoreMetadataPath(String abpId, String formatId) + throws NoSuchAlgorithmException { + // Get the pid metadata directory + String hashedId = FileHashStoreUtility.getPidHexDigest(abpId, OBJECT_STORE_ALGORITHM); + String pidMetadataDirRelPath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + hashedId); + // The file name for the metadata document is the hash of the supplied 'pid + 'formatId' + String metadataDocHash = + FileHashStoreUtility.getPidHexDigest(abpId + formatId, OBJECT_STORE_ALGORITHM); + // Real path to metadata doc + return METADATA_STORE_DIRECTORY.resolve(pidMetadataDirRelPath).resolve(metadataDocHash); + } + + /** + * Get an InputStream to a metadata document if it exists in FileHashStore + * + * @param pid Persistent or authority-based identifier + * @param formatId Metadata namespace + * @return InputStream to metadata doc + * @throws NoSuchAlgorithmException An algorithm used in the calculation is not supported + * @throws FileNotFoundException If the metadata document is not found + * @throws IOException If there is an issue returning an input stream + */ + protected InputStream getHashStoreMetadataInputStream(String pid, String formatId) + throws NoSuchAlgorithmException, IOException, FileNotFoundException { + Path metadataCidPath = getHashStoreMetadataPath(pid, formatId); + + // Check to see if metadata exists + if (!Files.exists(metadataCidPath)) { + String errMsg = + "Metadata does not exist for pid: " + pid + " with formatId: " + formatId + + ". Metadata address: " + metadataCidPath; + logFileHashStore.warn(errMsg); + throw new FileNotFoundException(errMsg); + } + + // Return an InputStream to read from the metadata document + try { + InputStream metadataCidInputStream = Files.newInputStream(metadataCidPath); + logFileHashStore.info( + "Retrieved metadata for pid: " + pid + " with formatId: " + formatId); + return metadataCidInputStream; + + } catch (IOException ioe) { + String errMsg = + "Unexpected error when creating InputStream for pid: " + pid + " with formatId: " + + formatId + ". IOException: " + ioe.getMessage(); + logFileHashStore.error(errMsg); + throw new IOException(errMsg); + } + } + + /** + * Get the absolute path to a HashStore pid or cid ref file + * + * @param abpcId Authority-based identifier, persistent identifier or content identifier + * @param refType {@link HashStoreIdTypes} + * @return Path to the requested refs file + * @throws NoSuchAlgorithmException When an algorithm used to calculate a hash is not supported + */ + protected Path getHashStoreRefsPath(String abpcId, HashStoreIdTypes refType) + throws NoSuchAlgorithmException { Path realPath; - if (entity.equalsIgnoreCase("object")) { - // 'abId' is expected to be a pid - String objectCid = findObject(abId); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); - realPath = OBJECT_STORE_DIRECTORY.resolve(objShardString); - - } else if (entity.equalsIgnoreCase("metadata")) { - String objectCid = FileHashStoreUtility.getPidHexDigest( - abId + formatId, OBJECT_STORE_ALGORITHM - ); - String objShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, objectCid - ); - realPath = METADATA_STORE_DIRECTORY.resolve(objShardString); - - } else if (entity.equalsIgnoreCase("refs")) { - if (formatId.equalsIgnoreCase("pid")) { - String pidRefId = FileHashStoreUtility.getPidHexDigest( - abId, OBJECT_STORE_ALGORITHM - ); - String pidShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, pidRefId - ); - realPath = REFS_PID_FILE_DIRECTORY.resolve(pidShardString); - } else if (formatId.equalsIgnoreCase("cid")) { - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - DIRECTORY_DEPTH, DIRECTORY_WIDTH, abId - ); - realPath = REFS_CID_FILE_DIRECTORY.resolve(cidShardString); - } else { - String errMsg = - "FileHashStore.getRealPath - formatId must be 'pid' or 'cid' when entity is 'refs'"; - logFileHashStore.error(errMsg); - throw new IllegalArgumentException(errMsg); - } - } else { - throw new IllegalArgumentException( - "FileHashStore.getRealPath - entity must be 'object' or 'metadata'" - ); + switch (refType) { + case pid -> { + String hashedId = + FileHashStoreUtility.getPidHexDigest(abpcId, OBJECT_STORE_ALGORITHM); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + hashedId); + realPath = REFS_PID_FILE_DIRECTORY.resolve(pidRelativePath); + } + case cid -> { + String cidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(DIRECTORY_DEPTH, DIRECTORY_WIDTH, + abpcId); + realPath = REFS_CID_FILE_DIRECTORY.resolve(cidRelativePath); + } + default -> throw new IllegalArgumentException( + "Ref type must be a type of HashStoreIdTypes " + "(pid or cid)"); } return realPath; } + + /** + * Storing, deleting and untagging objects are synchronized together. Duplicate store object + * requests for a pid are rejected, but deleting an object will wait for a pid to be released if + * it's found to be in use before proceeding. + * + * @param pid Persistent or authority-based identifier + * @throws InterruptedException When an issue occurs when attempting to sync the pid + */ + private static void synchronizeObjectLockedPids(String pid) throws InterruptedException { + synchronized (objectLockedPids) { + while (objectLockedPids.contains(pid)) { + try { + objectLockedPids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync pid: " + pid; + logFileHashStore.warn(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug("Synchronizing objectLockedPids for pid: " + pid); + objectLockedPids.add(pid); + } + } + + /** + * Remove the given pid from 'objectLockedPids' and notify other threads + * + * @param pid Content identifier + */ + private static void releaseObjectLockedPids(String pid) { + synchronized (objectLockedPids) { + logFileHashStore.debug("Releasing objectLockedPids for pid: " + pid); + objectLockedPids.remove(pid); + objectLockedPids.notify(); + } + } + + /** + * All requests to store/delete metadata will be accepted but must be executed serially + * + * @param metadataDocId Metadata document id hash(pid+formatId) + * @throws InterruptedException When an issue occurs when attempting to sync the metadata doc + */ + private static void synchronizeMetadataLockedDocIds(String metadataDocId) + throws InterruptedException { + synchronized (metadataLockedDocIds) { + while (metadataLockedDocIds.contains(metadataDocId)) { + try { + metadataLockedDocIds.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync metadata doc: " + + metadataDocId; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug( + "Synchronizing metadataLockedDocIds for metadata doc: " + metadataDocId); + metadataLockedDocIds.add(metadataDocId); + } + } + + /** + * Remove the given metadata doc from 'metadataLockedDocIds' and notify other threads + * + * @param metadataDocId Metadata document id hash(pid+formatId) + */ + private static void releaseMetadataLockedDocIds(String metadataDocId) { + synchronized (metadataLockedDocIds) { + logFileHashStore.debug( + "Releasing metadataLockedDocIds for metadata doc: " + metadataDocId); + metadataLockedDocIds.remove(metadataDocId); + metadataLockedDocIds.notify(); + } + } + + /** + * Multiple threads may access a data object via its 'cid' or the respective 'cid reference + * file' (which contains a list of 'pid's that reference a 'cid') and this needs to be + * coordinated. + * + * @param cid Content identifier + * @throws InterruptedException When an issue occurs when attempting to sync the pid + */ + private static void synchronizeObjectLockedCids(String cid) throws InterruptedException { + synchronized (objectLockedCids) { + while (objectLockedCids.contains(cid)) { + try { + objectLockedCids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync cid: " + cid; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug("Synchronizing objectLockedCids for cid: " + cid); + objectLockedCids.add(cid); + } + } + + /** + * Remove the given cid from 'objectLockedCids' and notify other threads + * + * @param cid Content identifier + */ + private static void releaseObjectLockedCids(String cid) { + synchronized (objectLockedCids) { + logFileHashStore.debug("Releasing objectLockedCids for cid: " + cid); + objectLockedCids.remove(cid); + objectLockedCids.notify(); + } + } + + /** + * Synchronize the pid tagging process since {@code tagObject} is a Public API method that can + * be called directly. This is used in the scenario when the client is missing metadata but must + * store the data object first. + * + * @param pid Persistent or authority-based identifier + * @throws InterruptedException When an issue occurs when attempting to sync the pid + */ + private static void synchronizeReferenceLockedPids(String pid) throws InterruptedException { + synchronized (referenceLockedPids) { + while (referenceLockedPids.contains(pid)) { + try { + referenceLockedPids.wait(TIME_OUT_MILLISEC); + + } catch (InterruptedException ie) { + String errMsg = + "Synchronization has been interrupted while trying to sync pid: " + pid; + logFileHashStore.error(errMsg); + throw new InterruptedException(errMsg); + } + } + logFileHashStore.debug("Synchronizing referenceLockedPids for pid: " + pid); + referenceLockedPids.add(pid); + } + } + + /** + * Remove the given pid from 'referenceLockedPids' and notify other threads + * + * @param pid Persistent or authority-based identifier + */ + private static void releaseReferenceLockedPids(String pid) { + synchronized (referenceLockedPids) { + logFileHashStore.debug("Releasing referenceLockedPids for pid: " + pid); + referenceLockedPids.remove(pid); + referenceLockedPids.notify(); + } + } } diff --git a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java index 6b7ae759..105ac569 100644 --- a/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java +++ b/src/main/java/org/dataone/hashstore/filehashstore/FileHashStoreUtility.java @@ -1,40 +1,50 @@ package org.dataone.hashstore.filehashstore; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Objects; import java.util.Random; import java.util.stream.Stream; import javax.xml.bind.DatatypeConverter; /** - * FileHashStoreUtility is a utility class that encapsulates generic or shared functionality - * in FileHashStore and/or related classes. + * FileHashStoreUtility is a utility class that encapsulates generic or shared functionality in + * FileHashStore and/or related classes. */ public class FileHashStoreUtility { + private static final Log log = LogFactory.getLog(FileHashStoreUtility.class); + /** * Checks whether a given object is null and throws an exception if so * * @param object Object to check * @param argument Value that is being checked - * @param method Calling method or class * @throws IllegalArgumentException If the object is null */ - public static void ensureNotNull(Object object, String argument, String method) + public static void ensureNotNull(Object object, String argument) throws IllegalArgumentException { if (object == null) { - String errMsg = "FileHashStoreUtility.ensureNotNull - Calling Method: " + method - + "(): " + argument + " cannot be null."; - throw new IllegalArgumentException(errMsg); + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = + "Calling Method: " + stackTraceElements[2].getMethodName() + "()'s argument: " + + argument + " cannot be null."; + throw new IllegalArgumentException(msg); } } @@ -50,7 +60,7 @@ public static void ensureNotNull(Object object, String argument, String method) public static String calculateHexDigest(InputStream dataStream, String algorithm) throws IOException, NoSuchAlgorithmException { MessageDigest mdObject = MessageDigest.getInstance(algorithm); - try { + try (dataStream) { byte[] buffer = new byte[8192]; int bytesRead; while ((bytesRead = dataStream.read(buffer)) != -1) { @@ -59,14 +69,9 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm } } catch (IOException ioe) { - String errMsg = - "FileHashStoreUtility.calculateHexDigest - Unexpected IOException encountered: " - + ioe.getMessage(); + String errMsg = "Unexpected IOException encountered: " + ioe.getMessage(); throw new IOException(errMsg); - } finally { - // Close dataStream - dataStream.close(); } // mdObjectHexDigest return DatatypeConverter.printHexBinary(mdObject.digest()).toLowerCase(); @@ -83,10 +88,10 @@ public static String calculateHexDigest(InputStream dataStream, String algorithm */ public static String getPidHexDigest(String pid, String algorithm) throws NoSuchAlgorithmException, IllegalArgumentException { - FileHashStoreUtility.ensureNotNull(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyString(pid, "pid", "getPidHexDigest"); - FileHashStoreUtility.ensureNotNull(algorithm, "algorithm", "getPidHexDigest"); - FileHashStoreUtility.checkForEmptyString(algorithm, "algorithm", "getPidHexDigest"); + FileHashStoreUtility.ensureNotNull(pid, "pid"); + FileHashStoreUtility.checkForNotEmptyAndValidString(pid, "pid"); + FileHashStoreUtility.ensureNotNull(algorithm, "algorithm"); + FileHashStoreUtility.checkForNotEmptyAndValidString(algorithm, "algorithm"); MessageDigest stringMessageDigest = MessageDigest.getInstance(algorithm); byte[] bytes = pid.getBytes(StandardCharsets.UTF_8); @@ -102,7 +107,7 @@ public static String getPidHexDigest(String pid, String algorithm) * @return True if a file is found or the directory is empty, False otherwise * @throws IOException If I/O occurs when accessing directory */ - public static boolean isDirectoryEmpty(Path directory) throws IOException { + public static boolean dirContainsFiles(Path directory) throws IOException { try (Stream stream = Files.list(directory)) { // The findFirst() method is called on the stream created from the given // directory to retrieve the first element. If the stream is empty (i.e., the @@ -112,28 +117,139 @@ public static boolean isDirectoryEmpty(Path directory) throws IOException { // findFirst(). If the Optional contains a value (i.e., an element was found), // isPresent() returns true. If the Optional is empty (i.e., the stream is // empty), isPresent() returns false. - return !stream.findFirst().isPresent(); + return stream.findFirst().isPresent(); + } + } + + /** + * Checks a directory for files and returns a list of paths + * + * @param directory Directory to check + * @return List of files + * @throws IOException If I/O occurs when accessing directory + */ + public static List getFilesFromDir(Path directory) throws IOException { + List filePaths = new ArrayList<>(); + if (Files.isDirectory(directory) && dirContainsFiles(directory)) { + try (Stream stream = Files.walk(directory)) { + stream.filter(Files::isRegularFile).forEach(filePaths::add); + } + } + return filePaths; + } + + /** + * Rename the given path to the 'file name' + '_delete' + * + * @param pathToRename The path to the file to be renamed with '_delete' + * @return Path to the file with '_delete' appended + * @throws IOException Issue with renaming the given file path + */ + public static Path renamePathForDeletion(Path pathToRename) throws IOException { + ensureNotNull(pathToRename, "pathToRename"); + if (!Files.exists(pathToRename)) { + String errMsg = "Given path to file: " + pathToRename + " does not exist."; + throw new FileNotFoundException(errMsg); + } + Path parentPath = pathToRename.getParent(); + Path fileName = pathToRename.getFileName(); + String newFileName = fileName.toString() + "_delete"; + + Path deletePath = parentPath.resolve(newFileName); + Files.move(pathToRename, deletePath, StandardCopyOption.ATOMIC_MOVE); + return deletePath; + } + + /** + * Rename the given path slated for deletion by replacing '_delete' with "" + * + * @param pathToRename The path to the file to revert deletion + * @throws IOException Issue with renaming the given file path + */ + public static void renamePathForRestoration(Path pathToRename) throws IOException { + ensureNotNull(pathToRename, "pathToRename"); + if (!Files.exists(pathToRename)) { + String errMsg = "Given path to file: " + pathToRename + " does not exist."; + throw new FileNotFoundException(errMsg); + } + Path parentPath = pathToRename.getParent(); + Path fileName = pathToRename.getFileName(); + String newFileName = fileName.toString().replace("_delete", ""); + + Path restorePath = parentPath.resolve(newFileName); + Files.move(pathToRename, restorePath, StandardCopyOption.ATOMIC_MOVE); + } + + /** + * Delete all paths found in the given List object. + * + * @param deleteList Directory to check + */ + public static void deleteListItems(Collection deleteList) { + ensureNotNull(deleteList, "deleteList"); + if (!deleteList.isEmpty()) { + for (Path deleteItem : deleteList) { + if (Files.exists(deleteItem)) { + try { + Files.delete(deleteItem); + } catch (Exception ge) { + String warnMsg = + "Attempted to delete metadata document: " + deleteItem + " but failed." + + " Additional Details: " + ge.getMessage(); + log.warn(warnMsg); + } + + } + } } } /** - * Checks whether a given string is empty and throws an exception if so + * Checks whether a given string is empty or contains illegal characters, and throws an + * exception if so * * @param string String to check * @param argument Value that is being checked - * @param method Calling method - * @throws IllegalArgumentException If the string is empty or null + * @throws IllegalArgumentException If the string is empty or contains illegal characters */ - public static void checkForEmptyString(String string, String argument, String method) + public static void checkForNotEmptyAndValidString(String string, String argument) throws IllegalArgumentException { - ensureNotNull(string, "string", "checkForEmptyString"); - if (string.trim().isEmpty()) { - String errMsg = "FileHashStoreUtility.checkForEmptyString - Calling Method: " + method - + "(): " + argument + " cannot be empty."; - throw new IllegalArgumentException(errMsg); + ensureNotNull(string, "string"); + if (string.isBlank()) { + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = + "Calling Method: " + stackTraceElements[2].getMethodName() + "()'s argument: " + + argument + " cannot be empty, contain empty white spaces, tabs or newlines."; + throw new IllegalArgumentException(msg); + } + if (!isValidString(string)) { + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = + "Calling Method: " + stackTraceElements[2].getMethodName() + "()'s argument: " + + argument + " contains empty white spaces, tabs or newlines."; + throw new IllegalArgumentException(msg); } } + /** + * Iterates over a given string and checks each character to make sure that there are no + * whitespaces, tabs, new lines or other illegal characters. + * + * @param string String to check + * @return True if valid, False if illegal characters found. + */ + public static boolean isValidString(String string) { + boolean valid = true; + for (int i = 0; i < string.length(); i++) { + char ch = string.charAt(i); + if (Character.isWhitespace(ch)) { + valid = false; + break; + } + } + return valid; + } + /** * Checks whether a given long integer is negative or zero * @@ -141,12 +257,12 @@ public static void checkForEmptyString(String string, String argument, String me * @param method Calling method * @throws IllegalArgumentException If longInt is less than or equal */ - public static void checkNotNegativeOrZero(long longInt, String method) - throws IllegalArgumentException { - if (longInt < 0 || longInt == 0) { - String errMsg = "FileHashStoreUtility.checkNotNegative - Calling Method: " + method - + "(): objSize cannot be less than or equal to 0."; - throw new IllegalArgumentException(errMsg); + public static void checkPositive(long longInt) throws IllegalArgumentException { + if (longInt <= 0) { + StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); + String msg = "Calling Method: " + stackTraceElements[2].getMethodName() + + "(): given objSize/long/runnableMethod/etc. object cannot be <= 0 "; + throw new IllegalArgumentException(msg); } } @@ -160,7 +276,7 @@ public static void checkNotNegativeOrZero(long longInt, String method) * @return String */ public static String getHierarchicalPathString(int depth, int width, String digest) { - List tokens = new ArrayList<>(); + Collection tokens = new ArrayList<>(); int digestLength = digest.length(); for (int i = 0; i < depth; i++) { int start = i * width; @@ -172,7 +288,7 @@ public static String getHierarchicalPathString(int depth, int width, String dige tokens.add(digest.substring(depth * width)); } - List stringArray = new ArrayList<>(); + Collection stringArray = new ArrayList<>(); for (String str : tokens) { if (!str.trim().isEmpty()) { stringArray.add(str); @@ -183,8 +299,8 @@ public static String getHierarchicalPathString(int depth, int width, String dige } /** - * Creates an empty/temporary file in a given location. If this file is not moved, it will - * be deleted upon JVM gracefully exiting or shutting down. + * Creates an empty/temporary file in a given location. If this file is not moved, it will be + * deleted upon JVM gracefully exiting or shutting down. * * @param prefix string to prepend before tmp file * @param directory location to create tmp file @@ -192,8 +308,8 @@ public static String getHierarchicalPathString(int depth, int width, String dige * @throws IOException Issues with generating tmpFile * @throws SecurityException Insufficient permissions to create tmpFile */ - public static File generateTmpFile(String prefix, Path directory) throws IOException, - SecurityException { + public static File generateTmpFile(String prefix, Path directory) + throws IOException, SecurityException { Random rand = new Random(); int randomNumber = rand.nextInt(1000000); String newPrefix = prefix + "-" + System.currentTimeMillis() + randomNumber; @@ -203,4 +319,23 @@ public static File generateTmpFile(String prefix, Path directory) throws IOExcep newFile.deleteOnExit(); return newFile; } + + /** + * Ensures that two objects are equal. If not, throws an IllegalArgumentException. + * + * @param nameValue The name of the object being checked + * @param suppliedValue The value supplied to compare + * @param existingValue The existing value to compare with + * @throws IllegalArgumentException If the supplied value is not equal to the existing value + */ + public static void checkObjectEquality( + String nameValue, Object suppliedValue, Object existingValue) { + if (!Objects.equals(suppliedValue, existingValue)) { + String errMsg = + "FileHashStore.checkConfigurationEquality() - Mismatch in " + nameValue + ": " + + suppliedValue + " does not match the existing configuration value: " + + existingValue; + throw new IllegalArgumentException(errMsg); + } + } } diff --git a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java index 40bd443a..4383e38b 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreClientTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreClientTest.java @@ -12,9 +12,10 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; import java.util.Properties; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -34,7 +35,7 @@ public class HashStoreClientTest { @BeforeEach public void getHashStore() { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - Path rootDirectory = tempFolder.resolve("metacat"); + Path rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -42,8 +43,7 @@ public void getHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { hsProperties = storeProperties; @@ -66,7 +66,7 @@ public void getHashStore() { * @return String */ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String digest) { - List tokens = new ArrayList<>(); + Collection tokens = new ArrayList<>(); int digestLength = digest.length(); for (int i = 0; i < dirDepth; i++) { int start = i * dirWidth; @@ -78,7 +78,7 @@ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String di tokens.add(digest.substring(dirDepth * dirWidth)); } - List stringArray = new ArrayList<>(); + Collection stringArray = new ArrayList<>(); for (String str : tokens) { if (!str.trim().isEmpty()) { stringArray.add(str); @@ -89,22 +89,42 @@ protected String getHierarchicalPathString(int dirDepth, int dirWidth, String di } /** - * Utility method to get absolute path of a given object and objType - * ("objects" or "metadata"). + * Utility method to get absolute path of a given object and objType ("objects", "metadata", + * "cid", or "pid"). */ - public Path getObjectAbsPath(String id, String objType) { + public Path getObjectAbsPath(String id, String objType) throws Exception { + String storeAlgo = hsProperties.getProperty("storeAlgorithm"); int shardDepth = Integer.parseInt(hsProperties.getProperty("storeDepth")); int shardWidth = Integer.parseInt(hsProperties.getProperty("storeWidth")); - // Get relative path - String objCidShardString = this.getHierarchicalPathString(shardDepth, shardWidth, id); // Get absolute path Path storePath = Paths.get(hsProperties.getProperty("storePath")); Path absPath = null; if (objType.equals("object")) { + // Get relative path + String objCidShardString = getHierarchicalPathString(shardDepth, shardWidth, id); absPath = storePath.resolve("objects/" + objCidShardString); } if (objType.equals("metadata")) { - absPath = storePath.resolve("metadata/" + objCidShardString); + // Get pid metadata directory hash(pid) + String pidHash = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); + String pidMetadataDirectory = + getHierarchicalPathString(shardDepth, shardWidth, pidHash); + // Get sysmeta name hash(pid+default_formatId) + String metadataDocHash = FileHashStoreUtility.getPidHexDigest( + id + hsProperties.getProperty("storeMetadataNamespace"), storeAlgo); + absPath = storePath.resolve("metadata").resolve(pidMetadataDirectory) + .resolve(metadataDocHash); + } + if (objType.equals("cid")) { + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(shardDepth, shardWidth, id); + absPath = storePath.resolve("refs/cids").resolve(pidRelativePath); + } + if (objType.equals("pid")) { + String hashId = FileHashStoreUtility.getPidHexDigest(id, storeAlgo); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(shardDepth, shardWidth, hashId); + absPath = storePath.resolve("refs/pids").resolve(pidRelativePath); } return absPath; } @@ -116,7 +136,7 @@ public Path getObjectAbsPath(String id, String objType) { public void client_createHashStore() throws Exception { String optCreateHashstore = "-chs"; String optStore = "-store"; - String optStorePath = tempFolder + "/metacat"; + String optStorePath = tempFolder + "/hashstore"; String optStoreDepth = "-dp"; String optStoreDepthValue = "3"; String optStoreWidth = "-wp"; @@ -124,10 +144,11 @@ public void client_createHashStore() throws Exception { String optAlgo = "-ap"; String optAlgoValue = "SHA-256"; String optFormatId = "-nsp"; - String optFormatIdValue = "http://ns.dataone.org/service/types/v2.0"; - String[] args = {optCreateHashstore, optStore, optStorePath, optStoreDepth, - optStoreDepthValue, optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, - optFormatId, optFormatIdValue}; + String optFormatIdValue = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; + String[] args = + {optCreateHashstore, optStore, optStorePath, optStoreDepth, optStoreDepthValue, + optStoreWidth, optStoreWidthValue, optAlgo, optAlgoValue, optFormatId, + optFormatIdValue}; HashStoreClient.main(args); Path storePath = Paths.get(optStorePath); @@ -162,9 +183,8 @@ public void client_storeObjects() throws Exception { String optPath = "-path"; String optObjectPath = testDataFile.toString(); String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, - optPidValue}; + String[] args = + {optStoreObject, optStore, optStorePath, optPath, optObjectPath, optPid, pid}; HashStoreClient.main(args); // Confirm object was stored @@ -203,18 +223,30 @@ public void client_storeMetadata() throws Exception { String optPath = "-path"; String optObjectPath = testDataFile.toString(); String optPid = "-pid"; - String optPidValue = pid; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, - optPid, optPidValue, optFormatId, optFormatIdValue}; + String[] args = + {optStoreMetadata, optStore, optStorePath, optPath, optObjectPath, optPid, pid, + optFormatId, optFormatIdValue}; HashStoreClient.main(args); // Confirm metadata was stored - Path absPath = getObjectAbsPath( - testData.pidData.get(pid).get("metadata_cid"), "metadata" - ); - assertTrue(Files.exists(absPath)); + // Calculate absolute path + String storeAlgorithm = hsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(hsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(hsProperties.getProperty("storeWidth")); + Path metadataDirectory = + Paths.get(hsProperties.getProperty("storePath")).resolve("metadata"); + String metadataCidPartOne = FileHashStoreUtility.getPidHexDigest(pid, storeAlgorithm); + String pidMetadataDirectory = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataCidPartOne); + // The file name for the metadata document is the hash of the supplied 'formatId' + String metadataCidPartTwo = + FileHashStoreUtility.getPidHexDigest(pid + optFormatIdValue, storeAlgorithm); + Path expectedMetadataPath = + metadataDirectory.resolve(pidMetadataDirectory).resolve(metadataCidPartTwo); + assertTrue(Files.exists(expectedMetadataPath)); // Put things back System.out.flush(); @@ -240,16 +272,17 @@ public void client_retrieveObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + hashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Call client String optRetrieveObject = "-retrieveobject"; String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optRetrieveObject, optStore, optStorePath, optPid, optPidValue}; + String[] args = {optRetrieveObject, optStore, optStorePath, optPid, pid}; HashStoreClient.main(args); // Put things back @@ -276,19 +309,20 @@ public void client_retrieveMetadata() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - hashStore.storeMetadata(metadataStream, pid); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + hashStore.storeMetadata(metadataStream, pid); + } // Call client String optRetrieveMetadata = "-retrievemetadata"; String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, optPidValue, - optFormatId, optFormatIdValue}; + String[] args = {optRetrieveMetadata, optStore, optStorePath, optPid, pid, optFormatId, + optFormatIdValue}; HashStoreClient.main(args); // Put things back @@ -315,20 +349,21 @@ public void client_deleteObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + hashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Call client String optDeleteObject = "-deleteobject"; String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optDeleteObject, optStore, optStorePath, optPid, optPidValue}; + String[] args = {optDeleteObject, optStore, optStorePath, optPid, pid}; HashStoreClient.main(args); // Confirm object was deleted - Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("object_cid"), "object"); + Path absPath = getObjectAbsPath(testData.pidData.get(pid).get("sha256"), "object"); assertFalse(Files.exists(absPath)); // Put things back @@ -355,26 +390,25 @@ public void client_deleteMetadata() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - hashStore.storeMetadata(metadataStream, pid); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + hashStore.storeMetadata(metadataStream, pid); + } // Call client String optDeleteMetadata = "-deletemetadata"; String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; String optFormatId = "-format_id"; String optFormatIdValue = hsProperties.getProperty("storeMetadataNamespace"); - String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, optPidValue, - optFormatId, optFormatIdValue}; + String[] args = {optDeleteMetadata, optStore, optStorePath, optPid, pid, optFormatId, + optFormatIdValue}; HashStoreClient.main(args); // Confirm metadata was deleted - Path absPath = getObjectAbsPath( - testData.pidData.get(pid).get("metadata_cid"), "metadata" - ); - assertFalse(Files.exists(absPath)); + Path sysmetaPath = getObjectAbsPath(pid, "metadata"); + assertFalse(Files.exists(sysmetaPath)); // Put things back System.out.flush(); @@ -400,19 +434,20 @@ public void client_getHexDigest() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + hashStore.storeObject(dataStream, pid, null, null, null, -1); + } // Call client String optGetChecksum = "-getchecksum"; String optStore = "-store"; String optStorePath = hsProperties.getProperty("storePath"); String optPid = "-pid"; - String optPidValue = pid; String optAlgo = "-algo"; String optAlgoValue = "SHA-256"; - String[] args = {optGetChecksum, optStore, optStorePath, optPid, optPidValue, optAlgo, - optAlgoValue}; + String[] args = + {optGetChecksum, optStore, optStorePath, optPid, pid, optAlgo, optAlgoValue}; HashStoreClient.main(args); @@ -427,42 +462,4 @@ public void client_getHexDigest() throws Exception { assertEquals(testDataChecksum, pidStdOut.trim()); } } - - /** - * Test hashStore client returns the content identifier (cid) of an object - */ - @Test - public void client_findObject() throws Exception { - for (String pid : testData.pidList) { - // Redirect stdout to capture output - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - PrintStream ps = new PrintStream(outputStream); - PrintStream old = System.out; - System.setOut(ps); - - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - hashStore.storeObject(dataStream, pid, null, null, null, -1); - - // Call client - String optFindObject = "-findobject"; - String optStore = "-store"; - String optStorePath = hsProperties.getProperty("storePath"); - String optPid = "-pid"; - String optPidValue = pid; - String[] args = {optFindObject, optStore, optStorePath, optPid, optPidValue}; - HashStoreClient.main(args); - - String contentIdentifier = testData.pidData.get(pid).get("sha256"); - - // Put things back - System.out.flush(); - System.setOut(old); - - // Confirm correct content identifier has been saved - String pidStdOut = outputStream.toString(); - assertEquals(contentIdentifier, pidStdOut.trim()); - } - } } diff --git a/src/test/java/org/dataone/hashstore/HashStoreRunnable.java b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java new file mode 100644 index 00000000..738d62af --- /dev/null +++ b/src/test/java/org/dataone/hashstore/HashStoreRunnable.java @@ -0,0 +1,94 @@ +package org.dataone.hashstore; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.hashstore.exceptions.HashStoreServiceException; +import org.dataone.hashstore.filehashstore.FileHashStoreUtility; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A HashStoreRunnable represents the data needed for a single request to HashStore packaged as a + * Runnable task that can be executed within a thread pool, typically provided by the Executor + * service. + */ +public class HashStoreRunnable implements Runnable { + private static final Log log = LogFactory.getLog(HashStoreRunnable.class); + public static final int storeObject = 1; + public static final int deleteObject = 2; + private final HashStore hashstore; + private final int publicAPIMethod; + private final String pid; + private InputStream objStream; + + /** + * Constructor for HashStoreRunnable to store a data object with a given pid + * + * @param hashstore HashStore object to interact with + * @param publicAPIMethod Integer representing action/Public API method (ex. 1 for storeObject) + * @param objStream Stream to data object + * @param pid Persistent or authority-based identifier + */ + public HashStoreRunnable( + HashStore hashstore, int publicAPIMethod, InputStream objStream, String pid) { + FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); + FileHashStoreUtility.checkPositive(publicAPIMethod); + this.hashstore = hashstore; + this.publicAPIMethod = publicAPIMethod; + this.objStream = objStream; + this.pid = pid; + } + + /** + * Constructor for HashStoreRunnable where only a pid is necessary (ex. to delete an object). + * + * @param hashstore HashStore object to interact with + * @param publicAPIMethod Integer representing action/Public API method (ex. 2 for + * deleteObject) + * @param pid Persistent or authority-based identifier + */ + public HashStoreRunnable(HashStore hashstore, int publicAPIMethod, String pid) { + FileHashStoreUtility.ensureNotNull(hashstore, "hashstore"); + FileHashStoreUtility.checkPositive(publicAPIMethod); + this.hashstore = hashstore; + this.publicAPIMethod = publicAPIMethod; + this.pid = pid; + } + + /** + * Executes a HashStore action (ex. storeObject, deleteObject) + */ + public void run() { + log.debug("HashStoreRunnable - Called to: " + publicAPIMethod); + try { + switch (publicAPIMethod) { + case storeObject -> { + try { + hashstore.storeObject(objStream, pid, null, null, null, -1); + } catch (Exception e) { + String errMsg = + "HashStoreRunnable ~ UnexpectedError - storeObject: " + e.getCause(); + System.out.println(errMsg); + log.error(errMsg); + throw new HashStoreServiceException(errMsg); + } + objStream.close(); + } + case deleteObject -> { + try { + hashstore.deleteObject(pid); + } catch (Exception e) { + String errMsg = + "HashStoreRunnable ~ UnexpectedError - deleteObject: " + e.getCause(); + System.out.println(errMsg); + log.error(errMsg); + throw new HashStoreServiceException(errMsg); + } + } + } + } catch (HashStoreServiceException | IOException hse) { + log.error("HashStoreRunnable ~ Unexpected Error: " + hse.getMessage()); + } + } +} diff --git a/src/test/java/org/dataone/hashstore/HashStoreTest.java b/src/test/java/org/dataone/hashstore/HashStoreTest.java index 1f34c70a..990b922b 100644 --- a/src/test/java/org/dataone/hashstore/HashStoreTest.java +++ b/src/test/java/org/dataone/hashstore/HashStoreTest.java @@ -28,7 +28,7 @@ public class HashStoreTest { @BeforeEach public void getHashStore() { String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; - Path rootDirectory = tempFolder.resolve("metacat"); + Path rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -36,8 +36,7 @@ public void getHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); @@ -72,13 +71,13 @@ public void isHashStore() { public void hashStore_classPackageNull() { assertThrows(HashStoreFactoryException.class, () -> { Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", "/test"); + storeProperties.setProperty("storePath", "/hashstore"); storeProperties.setProperty("storeDepth", "3"); storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); hashStore = HashStoreFactory.getHashStore(null, storeProperties); }); @@ -98,8 +97,8 @@ public void hashStore_classPackageNotFound() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); hashStore = HashStoreFactory.getHashStore(classPackage, storeProperties); @@ -126,12 +125,89 @@ public void hashStore_storeObjects() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = hashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + hashStore.storeObject(dataStream, pid, null, null, null, -1); - // Check id (sha-256 hex digest of the ab_id, aka object_cid) - String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, objInfo.getCid()); + // Check id (sha-256 hex digest of the ab_id, aka object_cid) + String objContentId = testData.pidData.get(pid).get("sha256"); + assertEquals(objContentId, objInfo.cid()); + } } } + + /** + * Confirm factory throws exception when a given folder is empty but an objects folder exists + */ + @Test + public void getHashStore_objFolderExists() throws Exception { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.resolve("doutest/hashstore"); + + Path conflictingObjDirectory = rootDirectory.resolve("objects"); + Files.createDirectories(rootDirectory.resolve("objects")); + assertTrue(Files.exists(conflictingObjDirectory)); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); + + assertThrows( + HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); + } + + /** + * Confirm factory throws exception when a given folder is empty but an objects folder exists + */ + @Test + public void getHashStore_metadataFolderExists() throws Exception { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.resolve("doutest/hashstore"); + + Path conflictingObjDirectory = rootDirectory.resolve("metadata"); + Files.createDirectories(rootDirectory.resolve("metadata")); + assertTrue(Files.exists(conflictingObjDirectory)); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); + + assertThrows( + HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); + } + + /** + * Confirm factory throws exception when a given folder is empty but an objects folder exists + */ + @Test + public void getHashStore_refsFolderExists() throws Exception { + String classPackage = "org.dataone.hashstore.filehashstore.FileHashStore"; + Path rootDirectory = tempFolder.resolve("doutest/hashstore"); + + Path conflictingObjDirectory = rootDirectory.resolve("refs"); + Files.createDirectories(rootDirectory.resolve("refs")); + assertTrue(Files.exists(conflictingObjDirectory)); + + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootDirectory.toString()); + storeProperties.setProperty("storeDepth", "3"); + storeProperties.setProperty("storeWidth", "2"); + storeProperties.setProperty("storeAlgorithm", "SHA-256"); + storeProperties.setProperty( + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); + + assertThrows( + HashStoreFactoryException.class, () -> hashStore = + HashStoreFactory.getHashStore(classPackage, storeProperties)); + } } diff --git a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java index 8d97a0f3..e0a340d3 100644 --- a/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java +++ b/src/test/java/org/dataone/hashstore/ObjectMetadataTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import java.util.HashMap; import java.util.Map; @@ -28,16 +29,13 @@ public void initializeInstanceVariables() { hexDigests.put("md5", "f4ea2d07db950873462a064937197b0f"); hexDigests.put("sha1", "3d25436c4490b08a2646e283dada5c60e5c0539d"); hexDigests.put( - "sha256", "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a" - ); + "sha256", "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"); hexDigests.put( "sha384", - "a204678330fcdc04980c9327d4e5daf01ab7541e8a351d49a7e9c5005439dce749ada39c4c35f573dd7d307cca11bea8" - ); + "a204678330fcdc04980c9327d4e5daf01ab7541e8a351d49a7e9c5005439dce749ada39c4c35f573dd7d307cca11bea8"); hexDigests.put( "sha512", - "bf9e7f4d4e66bd082817d87659d1d57c2220c376cd032ed97cadd481cf40d78dd479cbed14d34d98bae8cebc603b40c633d088751f07155a94468aa59e2ad109" - ); + "bf9e7f4d4e66bd082817d87659d1d57c2220c376cd032ed97cadd481cf40d78dd479cbed14d34d98bae8cebc603b40c633d088751f07155a94468aa59e2ad109"); } /** @@ -45,17 +43,39 @@ public void initializeInstanceVariables() { */ @Test public void testObjectMetadata() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); assertNotNull(objInfo); } /** - * Check ObjectMetadata get id + * Check ObjectMetadata pid is null by default + */ + @Test + public void testObjectMetadataGetPid() { + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); + String pid = objInfo.pid(); + assertNull(pid); + } + + /** + * Check ObjectMetadata pid is null by default + */ + @Test + public void testObjectMetadataSetPid() { + String pidToSet = "dou.test.1"; + ObjectMetadata objInfo = new ObjectMetadata(pidToSet, id, size, hexDigests); + + String pidFromObjectMetadata = objInfo.pid(); + assertEquals(pidFromObjectMetadata, pidToSet); + } + + /** + * Check ObjectMetadata get cid */ @Test public void testObjectMetadataGetId() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); - String objId = objInfo.getCid(); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); + String objId = objInfo.cid(); assertEquals(objId, id); } @@ -64,8 +84,8 @@ public void testObjectMetadataGetId() { */ @Test public void testHashAddressGetSize() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); - long objSize = objInfo.getSize(); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); + long objSize = objInfo.size(); assertEquals(objSize, size); } @@ -74,8 +94,8 @@ public void testHashAddressGetSize() { */ @Test public void testObjectMetadataGetHexDigests() { - ObjectMetadata objInfo = new ObjectMetadata(id, size, hexDigests); - Map objInfoMap = objInfo.getHexDigests(); + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); + Map objInfoMap = objInfo.hexDigests(); assertEquals(objInfoMap, hexDigests); } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java similarity index 87% rename from src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java rename to src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java index 05b0fa42..f46dcb3e 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStorePublicTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInitTest.java @@ -14,6 +14,7 @@ import java.util.HashMap; import java.util.Properties; +import org.dataone.hashstore.HashStore; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -22,7 +23,7 @@ /** * Test class for FileHashStore constructor */ -public class FileHashStorePublicTest { +public class FileHashStoreInitTest { private static Path rootDirectory; private static Path objStringFull; private static Path objTmpStringFull; @@ -37,7 +38,7 @@ public class FileHashStorePublicTest { @BeforeEach public void initializeFileHashStore() { Path root = tempFolder; - rootDirectory = root.resolve("metacat"); + rootDirectory = root.resolve("hashstore"); objStringFull = rootDirectory.resolve("objects"); objTmpStringFull = rootDirectory.resolve("objects/tmp"); metadataStringFull = rootDirectory.resolve("metadata"); @@ -49,8 +50,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fileHashStore = new FileHashStore(storeProperties); @@ -91,8 +91,8 @@ public void constructor_nullStorePath() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -110,8 +110,8 @@ public void constructor_illegalDepthArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -129,8 +129,8 @@ public void constructor_illegalWidthArg() { storeProperties.setProperty("storeWidth", "0"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -148,8 +148,8 @@ public void constructor_illegalAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "MD5"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -167,8 +167,8 @@ public void constructor_emptyAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", ""); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -186,8 +186,8 @@ public void constructor_emptySpacesAlgorithmArg() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", " "); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -239,8 +239,8 @@ public void initDefaultStore_directoryNull() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -277,9 +277,9 @@ public void initRefsDirectories() { assertTrue(Files.isDirectory(refsPath)); Path refsTmpPath = rootDirectory.resolve("refs/tmp"); assertTrue(Files.isDirectory(refsTmpPath)); - Path refsPidPath = rootDirectory.resolve("refs/pid"); + Path refsPidPath = rootDirectory.resolve("refs/pids"); assertTrue(Files.isDirectory(refsPidPath)); - Path refsCidPath = rootDirectory.resolve("refs/cid"); + Path refsCidPath = rootDirectory.resolve("refs/cids"); assertTrue(Files.isDirectory(refsCidPath)); } @@ -302,8 +302,8 @@ public void testGetHashStoreYaml() throws IOException { assertEquals(hsProperties.get("storeWidth"), 2); assertEquals(hsProperties.get("storeAlgorithm"), "SHA-256"); assertEquals( - hsProperties.get("storeMetadataNamespace"), "http://ns.dataone.org/service/types/v2.0" - ); + hsProperties.get("storeMetadataNamespace"), + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); } /** @@ -317,8 +317,7 @@ public void testExistingHashStoreConfiguration_sameConfig() throws Exception { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); } @@ -336,8 +335,8 @@ public void testExistingHashStoreConfiguration_diffAlgorithm() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "MD5"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -356,8 +355,8 @@ public void testExistingHashStoreConfiguration_diffDepth() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -376,8 +375,8 @@ public void testExistingHashStoreConfiguration_diffWidth() { storeProperties.setProperty("storeWidth", "1"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); new FileHashStore(storeProperties); }); @@ -396,8 +395,7 @@ public void testExistingHashStoreConfiguration_diffMetadataNamespace() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.test.org/service/types/v2.0" - ); + "storeMetadataNamespace", "http://ns.test.org/service/types/v2.0"); new FileHashStore(storeProperties); }); @@ -418,10 +416,10 @@ public void testExistingHashStoreConfiguration_missingYaml() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); - FileHashStore secondHashStore = new FileHashStore(storeProperties); + HashStore secondHashStore = new FileHashStore(storeProperties); // Confirm config present Path newStoreHashStoreYaml = newStoreDirectory.resolve("hashstore.yaml"); @@ -432,8 +430,9 @@ public void testExistingHashStoreConfiguration_missingYaml() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - secondHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + secondHashStore.storeObject(dataStream, pid, null, null, null, -1); + } } // Delete configuration diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java index bdd6d2d2..33eee518 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreInterfaceTest.java @@ -19,6 +19,11 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutorService; @@ -29,9 +34,14 @@ import javax.xml.bind.DatatypeConverter; +import org.dataone.hashstore.HashStoreRunnable; import org.dataone.hashstore.ObjectMetadata; -import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; -import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.MissingHexDigestsException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.UnsupportedHashAlgorithmException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -40,8 +50,6 @@ /** * Test class for FileHashStore HashStore Interface methods. - * - * Note: `tagObject` & `verifyObject` tests can be found in the `FileHashStoreReferences` class */ public class FileHashStoreInterfaceTest { private FileHashStore fileHashStore; @@ -54,7 +62,7 @@ public class FileHashStoreInterfaceTest { */ @BeforeEach public void initializeFileHashStore() { - rootDirectory = tempFolder.resolve("metacat"); + rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -62,8 +70,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fhsProperties = storeProperties; @@ -93,14 +100,15 @@ public void storeObject() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - // Check id (content identifier based on the store algorithm) - String objectCid = testData.pidData.get(pid).get("sha256"); - assertEquals(objectCid, objInfo.getCid()); + // Check id (content identifier based on the store algorithm) + String objectCid = testData.pidData.get(pid).get("sha256"); + assertEquals(objectCid, objInfo.cid()); + assertEquals(pid, objInfo.pid()); + } } } @@ -113,14 +121,15 @@ public void storeObject_objSize() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + // Check the object size + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.size()); + } - // Check the object size - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); } } @@ -133,24 +142,24 @@ public void storeObject_hexDigests() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + Map hexDigests = objInfo.hexDigests(); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } } } @@ -175,8 +184,9 @@ public void storeObject_nullPid() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, null, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, null, null, null, null, -1); + } }); } } @@ -191,92 +201,67 @@ public void storeObject_emptyPid() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, "", null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, "", null, null, null, -1); + } }); } } /** - * Check that store object throws exception when object size is 0 + * Check that store object throws exception when pid contains new line character */ @Test - public void storeObject_zeroObjSize() { + public void storeObject_pidWithNewLine() { for (String pid : testData.pidList) { assertThrows(IllegalArgumentException.class, () -> { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, "dou.test.1\n", null, null, null, -1); + } }); } } /** - * Verify that storeObject stores and validates a given checksum and its expected size - * with overloaded method - */ - @Test - public void storeObject_overloadChecksumCsAlgoAndSize() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - String md2 = testData.pidData.get(pid).get("md2"); - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, md2, "MD2", objectSize - ); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - assertEquals(md2, hexDigests.get("MD2")); - } - } - - /** - * Verify that storeObject stores and validates a given checksum with overloaded method + * Check that store object throws exception when pid contains tab character */ @Test - public void storeObject_overloadChecksumAndChecksumAlgo() throws Exception { + public void storeObject_pidWithTab() { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - String md2 = testData.pidData.get(pid).get("md2"); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, md2, "MD2"); - - Map hexDigests = objInfo.getHexDigests(); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - // Validate checksum values - assertEquals(md2, hexDigests.get("MD2")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, "dou.test.1\t", null, null, null, -1); + } + }); } } /** - * Check that store object returns the correct ObjectMetadata size with overloaded method + * Check that store object throws exception when object size is 0 */ @Test - public void storeObject_overloadObjSize() throws Exception { + public void storeObject_zeroObjSize() { for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, objectSize); + assertThrows(IllegalArgumentException.class, () -> { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - assertEquals(objectSize, objInfo.getSize()); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, 0); + } + }); } } /** - * Check that store object executes as expected with only an InputStream (does not create - * any reference files) + * Check that store object executes as expected with only an InputStream (does not create any + * reference files) */ @Test public void storeObject_overloadInputStreamOnly() throws Exception { @@ -284,41 +269,21 @@ public void storeObject_overloadInputStreamOnly() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - Map hexDigests = objInfo.getHexDigests(); - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - String cid = objInfo.getCid(); + Map hexDigests = objInfo.hexDigests(); + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + String cid = objInfo.cid(); - assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); + assertEquals(hexDigests.get(defaultStoreAlgorithm), cid); - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.findObject(pid); - }); - - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertFalse(Files.exists(cidRefsFilePath)); - } - } + assertThrows(FileNotFoundException.class, () -> fileHashStore.findObject(pid)); - /** - * Verify that storeObject generates an additional checksum with overloaded method - */ - @Test - public void storeObject_overloadAdditionalAlgo() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, "MD2"); - - Map hexDigests = objInfo.getHexDigests(); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(md2, hexDigests.get("MD2")); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertFalse(Files.exists(cidRefsFilePath)); + } } } @@ -333,11 +298,12 @@ public void storeObject_validateChecksumValue() throws Exception { String checksumCorrect = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, checksumCorrect, "SHA-256", -1); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - assertTrue(Files.exists(objCidAbsPath)); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objCidAbsPath)); + } } /** @@ -351,11 +317,12 @@ public void storeObject_correctChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "MD2", null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, "MD2", null, null, -1); - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(checksumCorrect, md2); + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(checksumCorrect, md2); + } } /** @@ -371,8 +338,9 @@ public void storeObject_incorrectChecksumValue() { String checksumIncorrect = "aaf9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, checksumIncorrect, "SHA-256", -1); + } }); } @@ -388,8 +356,9 @@ public void storeObject_emptyChecksumValue() { String checksumEmpty = ""; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, checksumEmpty, "MD2", -1); + } }); } @@ -403,8 +372,9 @@ public void storeObject_nullChecksumValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, "SHA-512/224", -1); + } }); } @@ -418,13 +388,13 @@ public void storeObject_objSizeCorrect() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, objectSize - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, objectSize); - // Check id (sha-256 hex digest of the ab_id (pid)) - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + assertEquals(objectSize, objInfo.size()); + } } } @@ -438,14 +408,14 @@ public void storeObject_objSizeIncorrect() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, 1000 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, 1000); - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.size()); + } }); } } @@ -460,14 +430,15 @@ public void storeObject_invalidAlgorithm() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, "SM2", null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, "SM2", null, null, -1); + } }); } /** - * Check that store object tags cid refs file as expected when called - * to store a duplicate object (two pids that reference the same cid) + * Check that store object tags cid refs file as expected when called to store a duplicate + * object (two pids that reference the same cid) */ @Test public void storeObject_duplicate() throws Exception { @@ -475,37 +446,36 @@ public void storeObject_duplicate() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile); + InputStream dataStreamDup = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String pidTwo = pid + ".test"; - InputStream dataStreamDup = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStreamDup, pidTwo, null, null, null, -1 - ); + String pidTwo = pid + ".test"; + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); - String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertTrue(fileHashStore.isPidInCidRefsFile(pid, absCidRefsPath)); - assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); + String cid = objInfo.cid(); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertTrue(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); + assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); + } } } /** - * Test that storeObject successfully stores a 1GB file - * - * Note 1: a 4GB successfully stored in approximately 1m30s - * Note 2: Successfully stores 250GB file confirmed from knbvm + * Test that storeObject successfully stores a 1GB file Note 1: a 4GB successfully stored in + * approximately 1m30s Note 2: Successfully stores 250GB file confirmed from knbvm */ @Test public void storeObject_largeSparseFile() throws Exception { - long fileSize = 1L * 1024L * 1024L * 1024L; // 1GB + long fileSize = 1024L * 1024L * 1024L; // 1GB // Get tmp directory to initially store test file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path testFilePath = storePath.resolve("random_file.bin"); + Path testDataFile = storePath.resolve("random_file.bin"); // Generate a random file with the specified size - try (FileOutputStream fileOutputStream = new FileOutputStream(testFilePath.toString())) { + try (FileOutputStream fileOutputStream = new FileOutputStream(testDataFile.toString())) { FileChannel fileChannel = fileOutputStream.getChannel(); FileLock lock = fileChannel.lock(); fileChannel.position(fileSize - 1); @@ -516,28 +486,29 @@ public void storeObject_largeSparseFile() throws Exception { throw ioe; } - InputStream dataStream = Files.newInputStream(testFilePath); - String pid = "dou.sparsefile.1"; - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + String pid = "dou.sparsefile.1"; + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - assertTrue(Files.exists(objCidAbsPath)); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objCidAbsPath)); + } } /** - * Tests that temporary objects that are being worked on while storeObject is in - * progress and gets interrupted are deleted. + * Tests that temporary objects that are being worked on while storeObject is in progress and + * gets interrupted are deleted. */ @Test public void storeObject_interruptProcess() throws Exception { - long fileSize = 1L * 1024L * 1024L * 1024L; // 1GB + long fileSize = 1024L * 1024L * 1024L; // 1GB // Get tmp directory to initially store test file Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path testFilePath = storePath.resolve("random_file.bin"); + Path testDataFile = storePath.resolve("random_file.bin"); // Generate a random file with the specified size - try (FileOutputStream fileOutputStream = new FileOutputStream(testFilePath.toString())) { + try (FileOutputStream fileOutputStream = new FileOutputStream(testDataFile.toString())) { FileChannel fileChannel = fileOutputStream.getChannel(); FileLock lock = fileChannel.lock(); fileChannel.position(fileSize - 1); @@ -549,10 +520,10 @@ public void storeObject_interruptProcess() throws Exception { } Thread toInterrupt = new Thread(() -> { - try { - InputStream dataStream = Files.newInputStream(testFilePath); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { String pid = "dou.sparsefile.1"; fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } catch (IOException | NoSuchAlgorithmException | InterruptedException ioe) { ioe.printStackTrace(); } @@ -572,12 +543,15 @@ public void storeObject_interruptProcess() throws Exception { /** * Tests that the `storeObject` method can store an object successfully with multiple threads * (5). This test uses five futures (threads) that run concurrently, all except one of which - * will encounter an `ExecutionException`. The thread that does not encounter an exception will - * store the given object, and verifies that the object is stored successfully. - * - * The threads that run into exceptions will encounter a `RunTimeException` since the expected - * object to store is already in progress (thrown by `syncPutObject` which coordinates - * `store_object` requests with a pid). + * will encounter a `HashStoreRefsAlreadyExistException`. The thread that does not encounter an + * exception will store the given object, and verifies that the object is stored successfully. + * + * The threads are expected to encounter a `RunTimeException` since the expected object to store + * is already in progress (thrown by `syncPutObject` which coordinates `store_object` requests + * with a pid). If both threads execute simultaneously and bypasses the store object + * synchronization flow, we may also run into a `HashStoreRefsAlreadyExistException` - which is + * called during the `tagObject` process when reference files already exist with the expected + * values. */ @Test public void storeObject_objectLockedIds_FiveThreads() throws Exception { @@ -585,107 +559,118 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - // Create a thread pool with 3 threads + // Create a thread pool with 5 threads ExecutorService executorService = Executors.newFixedThreadPool(5); // Submit 5 futures to the thread pool, each calling storeObject Future future1 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { - String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + String cid = objInfo.cid(); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - System.out.println("Start Thread 1 Exception:"); - System.out.println(e.getClass()); - e.printStackTrace(); - System.out.println("End Thread 1 Exception\n"); - assertTrue(e instanceof RuntimeException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future2 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { - String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + String cid = objInfo.cid(); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future3 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { - String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + String cid = objInfo.cid(); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future4 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { - String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + String cid = objInfo.cid(); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); Future future5 = executorService.submit(() -> { - try { - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); if (objInfo != null) { - String cid = objInfo.getCid(); - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); + String cid = objInfo.cid(); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path pidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); assertTrue(Files.exists(objCidAbsPath)); assertTrue(Files.exists(pidRefsPath)); assertTrue(Files.exists(cidRefsPath)); } } catch (Exception e) { - assertTrue(e instanceof RuntimeException); + System.out.println( + "storeObject_objectLockedIds_FiveThreads - Exception Cause: " + e.getCause()); + assertTrue(e instanceof RuntimeException + | e instanceof HashStoreRefsAlreadyExistException); } }); @@ -700,6 +685,341 @@ public void storeObject_objectLockedIds_FiveThreads() throws Exception { executorService.awaitTermination(1, TimeUnit.MINUTES); } + /** + * Test storeObject synchronization using a Runnable class + */ + @Test + public void storeObject_50Pids_1Obj_viaRunnable() throws Exception { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + + List pidModifiedList = new ArrayList<>(); + for (int i = 1; i <= 50; i++) { + pidModifiedList.add(pid + ".dou.test." + i); + } + + Runtime runtime = Runtime.getRuntime(); + int numCores = runtime.availableProcessors(); + ExecutorService executorService = Executors.newFixedThreadPool(numCores); + + for (String pidAdjusted : pidModifiedList) { + InputStream dataStream = Files.newInputStream(testDataFile); + Runnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); + executorService.execute(request); + } + + executorService.shutdown(); + executorService.awaitTermination(1, TimeUnit.MINUTES); + + // Check cid refs file that every pid is found + String cidSha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(cidSha256DigestFromTestData, + FileHashStore.HashStoreIdTypes.cid); + Collection stringSet = new HashSet<>(pidModifiedList); + List lines = Files.readAllLines(cidRefsFilePath); + boolean allFoundPidsFound = true; + for (String line : lines) { + if (!stringSet.contains(line)) { + allFoundPidsFound = false; + break; + } + } + assertTrue(allFoundPidsFound); + + // Confirm that 50 pid refs file exists + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + assertEquals(50, pidRefFiles.size()); + } + + /** + * Check tagObject does not throw exception when creating a fresh set of reference files + */ + @Test + public void tagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + } + + /** + * Check that tagObject successfully tags a cid refs file that already exists + */ + @Test + public void tagObject_cidRefsAlreadyExists() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + String pidTwo = "dou.test.2"; + fileHashStore.tagObject(pidTwo, cid); + + // Confirm number of ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(2, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that tagObject throws HashStoreRefsAlreadyExistException exception when pid and cid + * refs file already exists (duplicate tag request) + */ + @Test + public void tagObject_HashStoreRefsAlreadyExistException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // This exception only needs to be re-raised + assertThrows( + HashStoreRefsAlreadyExistException.class, () -> fileHashStore.tagObject(pid, cid)); + + // Confirm there are only 1 of each ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that tagObject throws PidRefsFileExistsException when called to tag a 'pid' that is + * already referencing another 'cid' + */ + @Test + public void tagObject_PidRefsFileExistsException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // This exception only needs to be re-raised + assertThrows( + PidRefsFileExistsException.class, () -> fileHashStore.tagObject(pid, "another.cid")); + + // Confirm there are only 1 of each ref files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that deleteIfInvalidObject does not throw exception with matching values + */ + @Test + public void deleteIfInvalidObject_correctValues() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get deleteIfInvalidObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + fileHashStore.deleteIfInvalidObject( + objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); + // Real path to the data object + assertTrue(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); + } + } + } + + /** + * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests is + * empty. + */ + @Test + public void deleteIfInvalidObject_objInfoEmptyHexDigests() { + String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; + long size = 1999999; + Map hexDigests = new HashMap<>(); + + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, hexDigests); + + assertThrows(MissingHexDigestsException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); + } + + /** + * Check that deleteIfInvalidObject throws MissingHexDigestsException when objInfo hexDigests is + * null. + */ + @Test + public void deleteIfInvalidObject_objInfoNullHexDigests() { + String id = "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; + long size = 1999999; + ObjectMetadata objInfo = new ObjectMetadata(null, id, size, null); + + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, id, "MD2", size)); + } + + /** + * Check that deleteIfInvalidObject calculates and verifies a checksum with a supported + * algorithm that is not included in the default list + */ + @Test + public void deleteIfInvalidObject_supportedAlgoNotInDefaultList() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + // Get deleteIfInvalidObject args + String expectedChecksum = testData.pidData.get(pid).get("md2"); + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, "MD2", expectedSize); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); + // Real path to the data object + assertTrue(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); + } + } + } + + /** + * Check that deleteIfInvalidObject calculates throws exception when given a checksumAlgorithm + * that is not supported + */ + @Test + public void deleteIfInvalidObject_unsupportedAlgo() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + assertThrows(UnsupportedHashAlgorithmException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, "ValueNotRelevant", + "BLAKE2S", 1000)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); + // Real path to the data object + assertTrue(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); + } + } + } + + /** + * Check that deleteIfInvalidObject throws exception when non-matching size value provided + */ + @Test + public void deleteIfInvalidObject_mismatchedSize() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get deleteIfInvalidObject args + String expectedChecksum = testData.pidData.get(pid).get("sha256"); + long expectedSize = 123456789; + + assertThrows(NonMatchingObjSizeException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, + defaultStoreAlgorithm, + expectedSize)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); + // Real path to the data object + assertFalse(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); + } + } + } + + /** + * Check that deleteIfInvalidObject throws exception with non-matching checksum value + */ + @Test + public void deleteIfInvalidObject_mismatchedChecksum() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + + String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); + + // Get deleteIfInvalidObject args + String expectedChecksum = "intentionallyWrongValue"; + long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); + + assertThrows(NonMatchingChecksumException.class, + () -> fileHashStore.deleteIfInvalidObject(objInfo, expectedChecksum, + defaultStoreAlgorithm, + expectedSize)); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + // If cid is found, return the expected real path to object + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); + // Real path to the data object + assertFalse(Files.exists( + Paths.get(fhsProperties.getProperty("storePath")).resolve("objects") + .resolve(objRelativePath))); + } + } + } + /** * Test storeMetadata stores metadata as expected */ @@ -711,22 +1031,19 @@ public void storeMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = + fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + metadataStream.close(); - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); - - assertTrue(Files.exists(metadataCidAbsPath)); + // Calculate absolute path + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, testFormatId); - long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataCidAbsPath); - assertEquals(writtenMetadataFile, originalMetadataFie); + assertEquals(metadataPidExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataPidExpectedPath)); + } } } @@ -735,28 +1052,83 @@ public void storeMetadata() throws Exception { */ @Test public void storeMetadata_defaultFormatId_overload() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid); + + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + + assertEquals(metadataPidExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataPidExpectedPath)); + } + } + } + + /** + * Test storeMetadata creates appropriate directory for metadata documents with the given pid + */ + @Test + public void storeMetadata_pidHashIsDirectory() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid); - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String metadataPidhash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String pidMetadataDirectory = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidhash); + Path expectedPidMetadataDirectory = + rootDirectory.resolve("metadata").resolve(pidMetadataDirectory); + + assertTrue(Files.isDirectory(expectedPidMetadataDirectory)); + } + } + } - assertTrue(Files.exists(metadataCidAbsPath)); + /** + * Test storeMetadata stores different metadata for a given pid in its expected directory + */ + @Test + public void storeMetadata_multipleFormatIds() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataCidAbsPath); - assertEquals(writtenMetadataFile, originalMetadataFie); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamDup = Files.newInputStream(testMetaDataFile)) { + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPath = + fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + String metadataDefaultPath = fileHashStore.storeMetadata(metadataStreamDup, pid); + + // Calculate absolute path + Path metadataTestFormatIdExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, testFormatId); + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataDefaultExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + + assertEquals(metadataTestFormatIdExpectedPath.toString(), metadataPath); + assertTrue(Files.exists(metadataTestFormatIdExpectedPath)); + assertEquals(metadataDefaultExpectedPath.toString(), metadataDefaultPath); + assertTrue(Files.exists(metadataDefaultExpectedPath)); + } } } @@ -771,20 +1143,13 @@ public void storeMetadata_fileSize() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, null); - - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, null); - long writtenMetadataFile = Files.size(testMetaDataFile); - long originalMetadataFie = Files.size(metadataCidAbsPath); - assertEquals(writtenMetadataFile, originalMetadataFie); + long writtenMetadataFile = Files.size(testMetaDataFile); + long originalMetadataFie = Files.size(Paths.get(metadataPath)); + assertEquals(writtenMetadataFile, originalMetadataFie); + } } } @@ -795,8 +1160,7 @@ public void storeMetadata_fileSize() throws Exception { public void storeMetadata_metadataNull() { for (String pid : testData.pidList) { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.storeMetadata(null, pid, null) - ); + IllegalArgumentException.class, () -> fileHashStore.storeMetadata(null, pid, null)); } } @@ -812,9 +1176,9 @@ public void storeMetadata_pidNull() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.storeMetadata(metadataStream, null, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, null, null); + } }); } } @@ -831,9 +1195,9 @@ public void storeMetadata_pidEmpty() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.storeMetadata(metadataStream, "", null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, "", null); + } }); } } @@ -850,9 +1214,9 @@ public void storeMetadata_pidEmptySpaces() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.storeMetadata(metadataStream, " ", null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, " ", null); + } }); } } @@ -863,7 +1227,7 @@ public void storeMetadata_pidEmptySpaces() { * concurrently, each of which will have to wait for the given `pid` to be released from * metadataLockedIds before proceeding to store the given metadata content from its * `storeMetadata()` request. - * + * * All requests to store the same metadata will be executed, and the existing metadata file will * be overwritten by each thread. No exceptions should be encountered during these tests. */ @@ -874,39 +1238,46 @@ public void storeMetadata_metadataLockedIds() throws Exception { String pidFormatted = pid.replace("/", "_"); // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - String pidFormatHexDigest = - "ddf07952ef28efc099d10d8b682480f7d2da60015f5d8873b6e1ea75b4baf689"; // Create a thread pool with 3 threads ExecutorService executorService = Executors.newFixedThreadPool(3); // Submit 3 threads, each calling storeMetadata Future future1 = executorService.submit(() -> { - try { - String formatId = "http://ns.dataone.org/service/types/v2.0"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, formatId); - assertEquals(metadataCid, pidFormatHexDigest); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (IOException | NoSuchAlgorithmException | InterruptedException e) { e.printStackTrace(); } }); Future future2 = executorService.submit(() -> { - try { - String formatId = "http://ns.dataone.org/service/types/v2.0"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, formatId); - assertEquals(metadataCid, pidFormatHexDigest); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (Exception e) { e.printStackTrace(); } }); Future future3 = executorService.submit(() -> { - try { - String formatId = "http://ns.dataone.org/service/types/v2.0"; - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.storeMetadata(metadataStream, pid, formatId); - assertEquals(metadataCid, pidFormatHexDigest); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String formatId = "https://ns.dataone.org/service/types/v2.0#SystemMetadata"; + String metadataPath = fileHashStore.storeMetadata(metadataStream, pid, formatId); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); } catch (Exception e) { e.printStackTrace(); } @@ -923,11 +1294,12 @@ public void storeMetadata_metadataLockedIds() throws Exception { // Confirm metadata file is written Path storePath = Paths.get(fhsProperties.getProperty("storePath")); String formatId = fhsProperties.getProperty("storeMetadataNamespace"); - Path metadataCidAbsPath = fileHashStore.getRealPath(pid, "metadata", formatId); + Path metadataCidAbsPath = fileHashStore.getHashStoreMetadataPath(pid, formatId); assertTrue(Files.exists(metadataCidAbsPath)); - // Confirm there are only two files in HashStore - 'hashstore.yaml' and the - // metadata file written + // Confirm there are only three files in HashStore - 'hashstore.yaml', the metadata file + // written + // and the metadata refs file that contains namespaces used try (Stream walk = Files.walk(storePath)) { long fileCount = walk.filter(Files::isRegularFile).count(); assertEquals(fileCount, 2); @@ -943,25 +1315,25 @@ public void retrieveObject() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - // Retrieve object - InputStream objectCidInputStream = fileHashStore.retrieveObject(pid); - assertNotNull(objectCidInputStream); - objectCidInputStream.close(); + // Retrieve object + try (InputStream objectCidInputStream = fileHashStore.retrieveObject(pid)) { + assertNotNull(objectCidInputStream); + } + } } } /** - * Check that retrieveObject throws exception when there is no object - * associated with a given pid + * Check that retrieveObject throws exception when there is no object associated with a given + * pid */ @Test public void retrieveObject_pidDoesNotExist() { - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.retrieveObject("pid.whose.object.does.not.exist"); - }); + assertThrows(FileNotFoundException.class, + () -> fileHashStore.retrieveObject("pid.whose.object.does.not.exist")); } /** @@ -969,9 +1341,7 @@ public void retrieveObject_pidDoesNotExist() { */ @Test public void retrieveObject_pidNull() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.retrieveObject(null); - }); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.retrieveObject(null)); } /** @@ -979,9 +1349,7 @@ public void retrieveObject_pidNull() { */ @Test public void retrieveObject_pidEmpty() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.retrieveObject(""); - }); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.retrieveObject("")); } /** @@ -989,9 +1357,7 @@ public void retrieveObject_pidEmpty() { */ @Test public void retrieveObject_pidEmptySpaces() { - assertThrows(IllegalArgumentException.class, () -> { - fileHashStore.retrieveObject(" "); - }); + assertThrows(IllegalArgumentException.class, () -> fileHashStore.retrieveObject(" ")); } /** @@ -999,9 +1365,8 @@ public void retrieveObject_pidEmptySpaces() { */ @Test public void retrieveObject_pidNotFound() { - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.retrieveObject("dou.2023.hs.1"); - }); + assertThrows( + FileNotFoundException.class, () -> fileHashStore.retrieveObject("dou.2023.hs.1")); } /** @@ -1013,42 +1378,39 @@ public void retrieveObject_verifyContent() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } + // Retrieve object - InputStream objectCidInputStream; - try { - objectCidInputStream = fileHashStore.retrieveObject(pid); + try (InputStream objectCidInputStream = fileHashStore.retrieveObject(pid)) { + // Read content and compare it to the SHA-256 checksum from TestDataHarness + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = objectCidInputStream.read(buffer)) != -1) { + sha256.update(buffer, 0, bytesRead); + } + + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; + + } + + // Get hex digest + String sha256Digest = + DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + assertEquals(sha256Digest, sha256DigestFromTestData); } catch (Exception e) { e.printStackTrace(); throw e; } - - // Read content and compare it to the SHA-256 checksum from TestDataHarness - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = objectCidInputStream.read(buffer)) != -1) { - sha256.update(buffer, 0, bytesRead); - } - - } catch (IOException ioe) { - ioe.printStackTrace(); - throw ioe; - - } finally { - // Close stream - objectCidInputStream.close(); - } - - // Get hex digest - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - assertEquals(sha256Digest, sha256DigestFromTestData); } } @@ -1063,13 +1425,17 @@ public void retrieveMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + } String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, storeFormatId); - assertNotNull(metadataCidInputStream); + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, + storeFormatId)) { + assertNotNull(metadataCidInputStream); + } + } } @@ -1084,11 +1450,13 @@ public void retrieveMetadata_overload() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + } - InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid); - assertNotNull(metadataCidInputStream); + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid)) { + assertNotNull(metadataCidInputStream); + } } } @@ -1168,9 +1536,8 @@ public void retrieveMetadata_formatEmptySpaces() { public void retrieveMetadata_pidNotFound() { assertThrows(FileNotFoundException.class, () -> { String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - InputStream pidInputStream = fileHashStore.retrieveMetadata( - "dou.2023.hs.1", storeFormatId - ); + InputStream pidInputStream = + fileHashStore.retrieveMetadata("dou.2023.hs.1", storeFormatId); pidInputStream.close(); }); } @@ -1186,50 +1553,116 @@ public void retrieveMetadata_verifyContent() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + } String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); // Retrieve object - InputStream metadataCidInputStream; - try { - metadataCidInputStream = fileHashStore.retrieveMetadata(pid, storeFormatId); + try (InputStream metadataCidInputStream = fileHashStore.retrieveMetadata(pid, + storeFormatId)) { + // Read content and compare it to the SHA-256 checksum from TestDataHarness + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = metadataCidInputStream.read(buffer)) != -1) { + sha256.update(buffer, 0, bytesRead); + } + + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; + + } + + // Get hex digest + String sha256MetadataDigest = + DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256MetadataDigestFromTestData = + testData.pidData.get(pid).get("metadata_cid_sha256"); + assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); } catch (Exception e) { e.printStackTrace(); throw e; } + } + } - // Read content and compare it to the SHA-256 checksum from TestDataHarness - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = metadataCidInputStream.read(buffer)) != -1) { - sha256.update(buffer, 0, bytesRead); - } + /** + * Confirm that deleteObject deletes objects and all metadata documents. + */ + @Test + public void deleteObject_dataObjAndMetadataDocs() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - } catch (IOException ioe) { - ioe.printStackTrace(); - throw ioe; + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } + + // Get metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile)) { + String testFormatId = "https://test.arcticdata.io/ns"; + String metadataPathString = + fileHashStore.storeMetadata(metadataStream, pid, testFormatId); + + String metadataDefaultPathString = + fileHashStore.storeMetadata(metadataStreamTwo, pid); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path metadataPath = Paths.get(metadataPathString); + Path metadataDefaultPath = Paths.get(metadataDefaultPathString); + + // Confirm expected documents exist + assertTrue(Files.exists(metadataPath)); + assertTrue(Files.exists(metadataDefaultPath)); + assertTrue(Files.exists(objCidAbsPath)); + + fileHashStore.deleteObject(pid); + + // Check documents have been deleted + assertFalse(Files.exists(metadataPath)); + assertFalse(Files.exists(metadataDefaultPath)); + assertFalse(Files.exists(objCidAbsPath)); + } + } + } + + + /** + * Confirm that deleteObject overload method with signature (String pid) deletes objects and + * does not throw exceptions if metadata documents do not exist. + */ + @Test + public void deleteObject_stringPidNoMetadataDocs() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - } finally { - // Close stream - metadataCidInputStream.close(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); } - // Get hex digest - String sha256MetadataDigest = DatatypeConverter.printHexBinary(sha256.digest()) - .toLowerCase(); - String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_sha256" - ); - assertEquals(sha256MetadataDigest, sha256MetadataDigestFromTestData); + // Get metadata file + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + + // Confirm expected documents exist + assertTrue(Files.exists(objCidAbsPath)); + + fileHashStore.deleteObject(pid); + + // Check documents have been deleted + assertFalse(Files.exists(objCidAbsPath)); } } + /** * Confirm that deleteObject deletes object */ @@ -1239,10 +1672,11 @@ public void deleteObject_objectDeleted() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + } - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); fileHashStore.deleteObject(pid); // Check that file doesn't exist @@ -1267,18 +1701,20 @@ public void deleteObject_referencesDeleted() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - String cid = objInfo.getCid(); - - // Path objAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteObject(pid); - assertFalse(Files.exists(absPathPidRefsPath)); - assertFalse(Files.exists(absPathCidRefsPath)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.cid(); + + // Path objAbsPath = fileHashStore.getExpectedPath(pid, "object", null); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.deleteObject(pid); + assertFalse(Files.exists(absPathPidRefsPath)); + assertFalse(Files.exists(absPathCidRefsPath)); + } } } @@ -1288,35 +1724,35 @@ public void deleteObject_referencesDeleted() throws Exception { * has references). */ @Test - public void deleteObject_objectExistsIfCidRefencesFileNotEmpty() throws Exception { + public void deleteObject_cidRefsFileNotEmptyObjectExistsStill() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - String pidExtra = "dou.test" + pid; - String cid = objInfo.getCid(); - fileHashStore.tagObject(pidExtra, cid); - - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteObject(pid); - - assertFalse(Files.exists(absPathPidRefsPath)); - assertTrue(Files.exists(objCidAbsPath)); - assertTrue(Files.exists(absPathCidRefsPath)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String pidExtra = "dou.test" + pid; + String cid = objInfo.cid(); + fileHashStore.tagObject(pidExtra, cid); + + Path objCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.deleteObject(pid); + + assertFalse(Files.exists(absPathPidRefsPath)); + assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(absPathCidRefsPath)); + } } } /** * Confirm that deleteObject removes an orphan pid reference file when the associated cid refs * file does not contain the expected pid. - * - * @throws Exception */ @Test public void deleteObject_pidOrphan() throws Exception { @@ -1324,31 +1760,35 @@ public void deleteObject_pidOrphan() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - String cid = objInfo.getCid(); - String pidExtra = "dou.test" + pid; - Path objRealPath = fileHashStore.getRealPath(pid, "object", null); - - // Manually change the pid found in the cid refs file - Path absPathCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.updateCidRefsFiles(pidExtra, absPathCidRefsPath); - // Create an orphaned pid refs file - fileHashStore.deleteCidRefsPid(pid, absPathCidRefsPath); - - fileHashStore.deleteObject(pid); - - // Confirm cid refs file still exists - assertTrue(Files.exists(absPathCidRefsPath)); - // Confirm the original (and now orphaned) pid refs file is deleted - Path absPathPidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - assertFalse(Files.exists(absPathPidRefsPath)); - // Confirm the object has not been deleted - assertTrue(Files.exists(objRealPath)); - // Confirm the cid refs file still exists - assertTrue(Files.exists(absPathCidRefsPath)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.cid(); + String pidExtra = "dou.test" + pid; + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + + // Manually change the pid found in the cid refs file + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile(pidExtra, absPathCidRefsPath, + FileHashStore.HashStoreRefUpdateTypes.add); + // Create an orphaned pid refs file + fileHashStore.updateRefsFile( + pid, absPathCidRefsPath, FileHashStore.HashStoreRefUpdateTypes.remove); + + fileHashStore.deleteObject(pid); + + // Confirm cid refs file still exists + assertTrue(Files.exists(absPathCidRefsPath)); + // Confirm the original (and now orphaned) pid refs file is deleted + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertFalse(Files.exists(absPathPidRefsPath)); + // Confirm the object has not been deleted + assertTrue(Files.exists(objRealPath)); + // Confirm the cid refs file still exists + assertTrue(Files.exists(absPathCidRefsPath)); + } } } @@ -1358,8 +1798,7 @@ public void deleteObject_pidOrphan() throws Exception { @Test public void deleteObject_pidNotFound() { assertThrows( - FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1") - ); + FileNotFoundException.class, () -> fileHashStore.deleteObject("dou.2023.hashstore.1")); } /** @@ -1387,86 +1826,132 @@ public void deleteObject_pidEmptySpaces() { } /** - * Confirm deleteObject overload method to delete a cid deletes cid with a true bool + * Confirm deleteObject removes pid and cid refs orphan files */ @Test - public void deleteObject_overloadCidDeleteTrue() throws Exception { + public void deleteObject_orphanRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path absPathCidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path absPathPidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + fileHashStore.deleteObject(pid); + assertFalse(Files.exists(absPathCidRefsPath)); + assertFalse(Files.exists(absPathPidRefsPath)); + } + + /** + * Confirm deleteObjectByCid deletes cid object + */ + @Test + public void deleteObjectByCid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String cid = objInfo.getCid(); - - // Set flag to true - fileHashStore.deleteObject(cid, true); - - // Get permanent address of the actual cid - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertFalse(Files.exists(objectStoreDirectory)); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.cid(); + + fileHashStore.deleteObjectByCid(cid); + + // Get permanent address of the actual cid + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String actualCid = objInfo.cid(); + String cidShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + actualCid); + Path objectStoreDirectory = + rootDirectory.resolve("objects").resolve(cidShardString); + assertFalse(Files.exists(objectStoreDirectory)); + } } } /** - * Confirm deleteObject overload method does not delete an object with a true bool - * because a cid refs file exists + * Confirm deleteObjectByCid does not delete an object because a cid refs file exists (there are + * still pids referencing the object) */ @Test - public void deleteObject_overloadCidDeleteTrueButCidRefsExists() throws Exception { + public void deleteObject_cidType_AndCidRefsExists() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - String cid = objInfo.getCid(); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.cid(); - // Set flag to true - fileHashStore.deleteObject(cid, true); + fileHashStore.deleteObjectByCid(cid); - // Get permanent address of the actual cid - Path objRealPath = fileHashStore.getRealPath(pid, "object", null); - assertTrue(Files.exists(objRealPath)); + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objRealPath)); + // Confirm cid refs file still exists + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertTrue(Files.exists(cidRefsPath)); + } } } /** - * Confirm deleteObject overload method does not delete an object with a false bool + * Test deleteObject synchronization using a Runnable class */ @Test - public void deleteObject_overloadCidDeleteFalse() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); + public void deleteObject_1000Pids_1Obj_viaRunnable() throws Exception { + // Get single test file to "upload" + String pid = "jtao.1700.1"; + Path testDataFile = testData.getTestFile(pid); + Collection pidModifiedList = new ArrayList<>(); + for (int i = 1; i <= 1000; i++) { + pidModifiedList.add(pid + ".dou.delobj1k." + i); + } + + Runtime runtime = Runtime.getRuntime(); + int numCores = runtime.availableProcessors(); + ExecutorService executorService = Executors.newFixedThreadPool(numCores); + + // Store 1000 + for (String pidAdjusted : pidModifiedList) { InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - String cid = objInfo.getCid(); - - // Set flag to true - fileHashStore.deleteObject(cid, false); - - // Get permanent address of the actual cid - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertTrue(Files.exists(objectStoreDirectory)); + Runnable request = new HashStoreRunnable(fileHashStore, 1, dataStream, pidAdjusted); + executorService.execute(request); + } + // Delete 1000 + for (String pidAdjusted : pidModifiedList) { + Runnable request = new HashStoreRunnable(fileHashStore, 2, pidAdjusted); + executorService.execute(request); + } + + executorService.shutdown(); + executorService.awaitTermination(1, TimeUnit.MINUTES); + + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + // Check that no objects exist + List objectPaths = FileHashStoreUtility.getFilesFromDir(storePath.resolve("objects")); + // To assist with debugging + for (Path path : objectPaths) { + System.out.println("HashStoreRunnableTest ~ Path found in Objects Directory: " + path); } + assertEquals(0, objectPaths.size()); + // Check that no refs files exist + List pidRefFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + assertEquals(0, pidRefFiles.size()); + List cidRefFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + assertEquals(0, cidRefFiles.size()); } + /** * Confirm that deleteMetadata deletes metadata and empty sub directories */ @@ -1478,28 +1963,29 @@ public void deleteMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); - String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - fileHashStore.deleteMetadata(pid, storeFormatId); + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + fileHashStore.deleteMetadata(pid, storeFormatId); - // Check that file doesn't exist - Path metadataCidPath = fileHashStore.getRealPath(pid, "metadata", storeFormatId); - assertFalse(Files.exists(metadataCidPath)); + // Check that file doesn't exist + Path metadataCidPath = fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); + assertFalse(Files.exists(metadataCidPath)); - // Check that parent directories are not deleted - assertTrue(Files.exists(metadataCidPath.getParent())); + // Check that parent directories are not deleted + assertTrue(Files.exists(metadataCidPath.getParent())); - // Check that metadata directory still exists - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path storeObjectPath = storePath.resolve("metadata"); - assertTrue(Files.exists(storeObjectPath)); + // Check that metadata directory still exists + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + Path storeObjectPath = storePath.resolve("metadata"); + assertTrue(Files.exists(storeObjectPath)); + } } } /** - * Confirm that deleteMetadata deletes object and empty subdirectories with overload method + * Confirm that deleteMetadata deletes all metadata stored for a given pid. */ @Test public void deleteMetadata_overload() throws Exception { @@ -1508,16 +1994,28 @@ public void deleteMetadata_overload() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.storeMetadata(metadataStream, pid, null); + String formatIdTwo = "ns.type.2"; + String formatIdThree = "ns.type.3"; + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamTwo = Files.newInputStream(testMetaDataFile); + InputStream metadataStreamThree = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + fileHashStore.storeMetadata(metadataStreamTwo, pid, formatIdTwo); + fileHashStore.storeMetadata(metadataStreamThree, pid, formatIdThree); + } fileHashStore.deleteMetadata(pid); // Check that file doesn't exist String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); - Path metadataCidPath = fileHashStore.getRealPath(pid, "metadata", storeFormatId); + Path metadataCidPath = fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); + Path metadataCidPathTwo = fileHashStore.getHashStoreMetadataPath(pid, formatIdTwo); + Path metadataCidPathThree = fileHashStore.getHashStoreMetadataPath(pid, formatIdThree); + assertFalse(Files.exists(metadataCidPath)); + assertFalse(Files.exists(metadataCidPathTwo)); + assertFalse(Files.exists(metadataCidPathThree)); // Check that parent directories are not deleted assertTrue(Files.exists(metadataCidPath.getParent())); @@ -1530,8 +2028,7 @@ public void deleteMetadata_overload() throws Exception { } /** - * Confirm that no exceptions are thrown when called to delete metadata - * that does not exist. + * Confirm that no exceptions are thrown when called to delete metadata that does not exist. */ @Test public void deleteMetadata_pidNotFound() throws Exception { @@ -1554,7 +2051,7 @@ public void deleteMetadata_pidNull() { * Confirm that deleteMetadata throws exception when pid is empty */ @Test - public void deleteMetadata_pidEmpty() throws Exception { + public void deleteMetadata_pidEmpty() { assertThrows(IllegalArgumentException.class, () -> { String formatId = "http://hashstore.tests/types/v1.0"; fileHashStore.deleteMetadata("", formatId); @@ -1615,17 +2112,17 @@ public void getHexDigest() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); - - // Then get the checksum - String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); - String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); - String objSha256Checksum = objInfo.getHexDigests().get("SHA-256"); - assertEquals(pidHexDigest, sha256DigestFromTestData); - assertEquals(pidHexDigest, objSha256Checksum); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + // Then get the checksum + String pidHexDigest = fileHashStore.getHexDigest(pid, "SHA-256"); + String sha256DigestFromTestData = testData.pidData.get(pid).get("sha256"); + String objSha256Checksum = objInfo.hexDigests().get("SHA-256"); + assertEquals(pidHexDigest, sha256DigestFromTestData); + assertEquals(pidHexDigest, objSha256Checksum); + } } } @@ -1648,8 +2145,7 @@ public void getHexDigest_pidNotFound() { @Test public void getHexDigest_pidNull() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.getHexDigest(null, "SHA-256") - ); + IllegalArgumentException.class, () -> fileHashStore.getHexDigest(null, "SHA-256")); } /** @@ -1658,8 +2154,7 @@ public void getHexDigest_pidNull() { @Test public void getHexDigest_pidEmpty() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.getHexDigest("", "SHA-256") - ); + IllegalArgumentException.class, () -> fileHashStore.getHexDigest("", "SHA-256")); } /** @@ -1668,8 +2163,7 @@ public void getHexDigest_pidEmpty() { @Test public void getHexDigest_pidEmptySpaces() { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.getHexDigest(" ", "SHA-256") - ); + IllegalArgumentException.class, () -> fileHashStore.getHexDigest(" ", "SHA-256")); } /** @@ -1683,74 +2177,12 @@ public void getHexDigest_badAlgo() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - fileHashStore.getHexDigest(pid, "BLAKE2S"); + fileHashStore.getHexDigest(pid, "BLAKE2S"); + } }); } } - - /** - * Confirm expected cid is returned - */ - @Test - public void findObject_cid() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - String cidRetrieved = fileHashStore.findObject(pid); - - assertEquals(cid, cidRetrieved); - } - - /** - * Confirm that findObject throws OrphanPidRefsFileException exception when - * pid refs file found but cid refs file is missing. - */ - @Test - public void findObject_cidRefsFileNotFound() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - Files.delete(cidRefsPath); - - assertThrows(OrphanPidRefsFileException.class, () -> { - fileHashStore.findObject(pid); - }); - } - - - /** - * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when - * pid refs file found but cid refs file is missing. - */ - @Test - public void findObject_cidRefsFileMissingPid() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pid, cidRefsPath); - - assertThrows(PidNotFoundInCidRefsFileException.class, () -> { - fileHashStore.findObject(pid); - }); - } - - /** - * Check that exception is thrown when pid refs file doesn't exist - */ - @Test - public void findObject_pidNotFound() { - String pid = "dou.test.1"; - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.findObject(pid); - }); - } - } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java index 09a26f15..0145fce0 100644 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java +++ b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreProtectedTest.java @@ -2,11 +2,14 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -14,12 +17,26 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Properties; import javax.xml.bind.DatatypeConverter; import org.dataone.hashstore.ObjectMetadata; +import org.dataone.hashstore.exceptions.CidNotFoundInPidRefsFileException; +import org.dataone.hashstore.exceptions.HashStoreRefsAlreadyExistException; +import org.dataone.hashstore.exceptions.NonMatchingChecksumException; +import org.dataone.hashstore.exceptions.NonMatchingObjSizeException; +import org.dataone.hashstore.exceptions.OrphanPidRefsFileException; +import org.dataone.hashstore.exceptions.OrphanRefsFilesException; +import org.dataone.hashstore.exceptions.PidNotFoundInCidRefsFileException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; +import org.dataone.hashstore.exceptions.PidRefsFileNotFoundException; import org.dataone.hashstore.testdata.TestDataHarness; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -31,6 +48,7 @@ public class FileHashStoreProtectedTest { private FileHashStore fileHashStore; private Properties fhsProperties; + private Path rootDirectory; private static final TestDataHarness testData = new TestDataHarness(); /** @@ -38,7 +56,7 @@ public class FileHashStoreProtectedTest { */ @BeforeEach public void initializeFileHashStore() { - Path rootDirectory = tempFolder.resolve("metacat"); + rootDirectory = tempFolder.resolve("hashstore"); Properties storeProperties = new Properties(); storeProperties.setProperty("storePath", rootDirectory.toString()); @@ -46,8 +64,7 @@ public void initializeFileHashStore() { storeProperties.setProperty("storeWidth", "2"); storeProperties.setProperty("storeAlgorithm", "SHA-256"); storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); + "storeMetadataNamespace", "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); try { fhsProperties = storeProperties; @@ -66,7 +83,7 @@ public void initializeFileHashStore() { * Non-test method using to generate a temp file */ public File generateTemporaryFile() throws Exception { - Path directory = tempFolder.resolve("metacat"); + Path directory = tempFolder.resolve("hashstore"); // newFile return FileHashStoreUtility.generateTmpFile("testfile", directory); } @@ -78,98 +95,185 @@ public File generateTemporaryFile() throws Exception { public Path tempFolder; /** - * Check algorithm support for supported algorithm + * Check that findObject returns cid as expected. */ @Test - public void isValidAlgorithm_supported() { - try { - String md2 = "MD2"; - boolean supported = fileHashStore.validateAlgorithm(md2); - assertTrue(supported); + public void findObject_cid() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objMeta = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); + assertEquals(findObjInfo.cid(), objMeta.cid()); + } } } /** - * Check algorithm support for unsupported algorithm + * Check that findObject returns the path to the object as expected. */ @Test - public void isValidAlgorithm_notSupported() { - assertThrows(NoSuchAlgorithmException.class, () -> { - try { - String sm3 = "SM3"; - boolean not_supported = fileHashStore.validateAlgorithm(sm3); - assertFalse(not_supported); + public void findObject_cidPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - } catch (NoSuchAlgorithmException nsae) { - throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage() - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); + String objectPath = findObjInfo.cidObjectPath(); + + String objRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + objInfo.cid()); + Path realPath = rootDirectory.resolve("objects").resolve(objRelativePath); + assertEquals(objectPath, realPath.toString()); } - }); + } } /** - * Check algorithm support for unsupported algorithm with lower cases + * Check that findObject returns the absolute path to the pid and cid refs file */ @Test - public void isValidAlgorithm_notSupportedLowerCase() { - assertThrows(NoSuchAlgorithmException.class, () -> { - try { - // Must match string to reduce complexity, no string formatting - String md2_lowercase = "md2"; - boolean lowercase_not_supported = fileHashStore.validateAlgorithm(md2_lowercase); - assertFalse(lowercase_not_supported); + public void findObject_refsPaths() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); - } catch (NoSuchAlgorithmException nsae) { - throw new NoSuchAlgorithmException( - "NoSuchAlgorithmException encountered: " + nsae.getMessage() - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); + String cidRefsPath = findObjInfo.cidRefsPath(); + String pidRefsPath = findObjInfo.pidRefsPath(); + + Path cidRefsFilePath = fileHashStore.getHashStoreRefsPath(objInfo.cid(), + FileHashStore.HashStoreIdTypes.cid); + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertEquals(cidRefsPath, cidRefsFilePath.toString()); + assertEquals(pidRefsPath, pidRefsFilePath.toString()); } - }); + } } /** - * Check algorithm support for null algorithm value throws exception + * Check that findObject returns the absolute path to sysmeta document if it exists */ @Test - public void isValidAlgorithm_algorithmNull() { - assertThrows(IllegalArgumentException.class, () -> { - try { - fileHashStore.validateAlgorithm(null); + public void findObject_sysmetaPath_exists() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + try (InputStream dataStream = Files.newInputStream(testDataFile); + InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + // Store Metadata + fileHashStore.storeMetadata(metadataStream, pid); + + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); + String objInfoSysmetaPath = findObjInfo.sysmetaPath(); + + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path sysmetaPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + + assertEquals(objInfoSysmetaPath, sysmetaPath.toString()); } - }); + } } /** - * Confirm that a temporary file has been generated. + * Check that findObject returns "Does not exist." when there is no sysmeta for the pid. */ @Test - public void generateTempFile() throws Exception { - File newTmpFile = generateTemporaryFile(); - assertTrue(newTmpFile.exists()); + public void findObject_sysmetaPath_doesNotExist() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + FileHashStore.ObjectInfo findObjInfo = fileHashStore.findObject(pid); + String objInfoSysmetaPath = findObjInfo.sysmetaPath(); + + assertEquals(objInfoSysmetaPath, "Does not exist"); + } + } + } + + /** + * Confirm findObject throws exception when cid object does not exist but reference files + * exist. + */ + @Test + public void findObject_refsFileExistButObjectDoesNot() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + assertThrows(OrphanRefsFilesException.class, () -> fileHashStore.findObject(pid)); + } + + /** + * Confirm that findObject throws OrphanPidRefsFileException exception when pid refs file found + * but cid refs file is missing. + */ + @Test + public void findObject_cidRefsFileNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Files.delete(cidRefsPath); + + assertThrows(OrphanPidRefsFileException.class, () -> fileHashStore.findObject(pid)); + } + + + /** + * Confirm that findObject throws PidNotFoundInCidRefsFileException exception when pid refs file + * found but cid refs file is missing. + */ + @Test + public void findObject_cidRefsFileMissingPid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + Path cidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + pid, cidRefsPath, FileHashStore.HashStoreRefUpdateTypes.remove); + + assertThrows(PidNotFoundInCidRefsFileException.class, () -> fileHashStore.findObject(pid)); } /** - * Confirm that a given digest is sharded appropriately + * Check that exception is thrown when pid refs file doesn't exist */ @Test - public void getHierarchicalPathString() { - String shardedPath = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, "94f9b6c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a" - ); - String shardedPathExpected = - "94/f9/b6/c88f1f458e410c30c351c6384ea42ac1b5ee1f8430d3e365e43b78a38a"; - assertEquals(shardedPath, shardedPathExpected); + public void findObject_pidNotFound() { + String pid = "dou.test.1"; + assertThrows(PidRefsFileNotFoundException.class, () -> fileHashStore.findObject(pid)); } /** @@ -181,12 +285,14 @@ public void putObject_testHarness_id() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata address = + fileHashStore.putObject(dataStream, pid, null, null, null, -1); - // Check id (sha-256 hex digest of the ab_id, aka object_cid) - String objContentId = testData.pidData.get(pid).get("sha256"); - assertEquals(objContentId, address.getCid()); + // Check id (sha-256 hex digest of the ab_id, aka object_cid) + String objContentId = testData.pidData.get(pid).get("sha256"); + assertEquals(objContentId, address.cid()); + } } } @@ -199,12 +305,15 @@ public void putObject_objSize() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.putObject(dataStream, pid, null, null, null, -1); + + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.size()); + } - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); } } @@ -217,22 +326,24 @@ public void putObject_testHarness_hexDigests() throws Exception { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject(dataStream, pid, null, null, null, -1); - - Map hexDigests = address.getHexDigests(); - - // Validate checksum values - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata address = + fileHashStore.putObject(dataStream, pid, null, null, null, -1); + + Map hexDigests = address.hexDigests(); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } } } @@ -247,19 +358,19 @@ public void putObject_validateChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata address = fileHashStore.putObject( - dataStream, pid, null, checksumCorrect, "MD2", -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata address = + fileHashStore.putObject(dataStream, pid, null, checksumCorrect, "MD2", -1); - String objCid = address.getCid(); - // Get relative path - String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path objCidAbsPath = storePath.resolve("objects/" + objCidShardString); + String objCid = address.cid(); + // Get relative path + String objCidShardString = FileHashStoreUtility.getHierarchicalPathString(3, 2, objCid); + // Get absolute path + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + Path objCidAbsPath = storePath.resolve("objects/" + objCidShardString); - assertTrue(Files.exists(objCidAbsPath)); + assertTrue(Files.exists(objCidAbsPath)); + } } /** @@ -273,11 +384,12 @@ public void putObject_additionalAlgo_correctChecksumValue() throws Exception { String checksumCorrect = "9c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "MD2", null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, "MD2", null, null, -1); - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(checksumCorrect, md2); + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(checksumCorrect, md2); + } } /** @@ -292,8 +404,9 @@ public void putObject_incorrectChecksumValue() { String checksumIncorrect = "1c25df1c8ba1d2e57bb3fd4785878b85"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, checksumIncorrect, "MD2", -1); + } }); } @@ -307,8 +420,9 @@ public void putObject_emptyChecksumValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, " ", "MD2", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, " ", "MD2", -1); + } }); } @@ -322,8 +436,9 @@ public void putObject_nullChecksumValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, "MD2", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, null, "MD2", -1); + } }); } @@ -337,8 +452,9 @@ public void putObject_emptyChecksumAlgorithmValue() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", " ", -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, "abc", " ", -1); + } }); } @@ -351,8 +467,10 @@ public void putObject_nullChecksumAlgorithmValue() { // Get test file to "upload" String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, "abc", null, -1); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, "abc", null, -1); + } }); } @@ -367,13 +485,13 @@ public void putObject_objSizeCorrect() throws Exception { Path testDataFile = testData.getTestFile(pidFormatted); long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject( - dataStream, pid, null, null, null, objectSize - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.putObject(dataStream, pid, null, null, null, objectSize); - // Check id (sha-256 hex digest of the ab_id (pid)) - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + assertEquals(objectSize, objInfo.size()); + } } } @@ -387,21 +505,21 @@ public void putObject_objSizeIncorrect() { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.putObject( - dataStream, pid, null, null, null, 1000 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.putObject(dataStream, pid, null, null, null, 1000); - // Check id (sha-256 hex digest of the ab_id (pid)) - long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); - assertEquals(objectSize, objInfo.getSize()); + // Check id (sha-256 hex digest of the ab_id (pid)) + long objectSize = Long.parseLong(testData.pidData.get(pid).get("size")); + assertEquals(objectSize, objInfo.size()); + } }); } } /** - * Verify putObject deletes temporary file written if called to store an object - * that already exists (duplicate) + * Verify putObject deletes temporary file written if called to store an object that already + * exists (duplicate) */ @Test public void putObject_duplicateObject() throws Exception { @@ -409,18 +527,21 @@ public void putObject_duplicateObject() throws Exception { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, null, null, null, -1); + } + // Try duplicate upload String pidTwo = pid + ".test"; - InputStream dataStreamTwo = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStreamTwo, pidTwo, null, null, null, -1); + try (InputStream dataStreamTwo = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStreamTwo, pidTwo, null, null, null, -1); + } // Confirm there are no files in 'objects/tmp' directory Path storePath = Paths.get(fhsProperties.getProperty("storePath")); File[] files = storePath.resolve("objects/tmp").toFile().listFiles(); - assertEquals(0, files.length); + assertEquals(0, Objects.requireNonNull(files).length); } /** @@ -433,8 +554,9 @@ public void putObject_invalidAlgorithm() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, "SM2", null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, "SM2", null, null, -1); + } }); } @@ -448,11 +570,233 @@ public void putObject_emptyAlgorithm() { String pid = "jtao.1700.1"; Path testDataFile = testData.getTestFile(pid); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.putObject(dataStream, pid, " ", null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.putObject(dataStream, pid, " ", null, null, -1); + } + }); + } + + /** + * Confirm validateTmpObject does nothing when requestValidation is false and does not throw any + * exceptions + */ + @Test + public void validateTmpObject() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + File tmpFile = generateTemporaryFile(); + fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", tmpFile, hexDigests, + -1); + } + + /** + * Confirm validateTmpObject does not throw exception when expected sie matches store size + */ + @Test + public void validateTmpObject_sizeMatches() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + File tmpFile = generateTemporaryFile(); + + // Write the byte to the file + try (FileOutputStream fos = new FileOutputStream(tmpFile)) { + fos.write(0x41); + } catch (IOException e) { + e.printStackTrace(); + } + + fileHashStore.validateTmpObject(false, "sha256Digest", "SHA-256", tmpFile, hexDigests, 1); + } + + /** + * Confirm validateTmpObject does not throw exception when expected sie matches store size + */ + @Test + public void validateTmpObject_sizeMismatch() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + File tmpFile = generateTemporaryFile(); + + assertThrows(NonMatchingObjSizeException.class, + () -> fileHashStore.validateTmpObject(false, "checksum.string", "SHA-256", + tmpFile, hexDigests, 10)); + } + + /** + * GG Confirm validateTmpObject does not throw exception when requested to validate checksums + * with good values + */ + @Test + public void validateTmpObject_validationRequested_matchingChecksum() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + File tmpFile = generateTemporaryFile(); + fileHashStore.validateTmpObject(true, "sha256Digest", "SHA-256", tmpFile, hexDigests, -1); + } + + /** + * Confirm validateTmpObject does not throw exception when requested to validate checksums with + * good values, and that the tmpFile passed is deleted. + */ + @Test + public void validateTmpObject_validationRequested_nonMatchingChecksum() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + File tmpFile = generateTemporaryFile(); + + assertThrows(NonMatchingChecksumException.class, + () -> fileHashStore.validateTmpObject(true, "checksum.string", "SHA-256", + tmpFile, hexDigests, -1)); + assertFalse(Files.exists(tmpFile.toPath())); + } + + /** + * Confirm validateTmpObject throws exception when requested to validate but algo is not found + * in hex digests passed. + */ + @Test + public void validateTmpObject_validationRequested_algoNotFound() throws Exception { + Map hexDigests = new HashMap<>(); + hexDigests.put("MD5", "md5Digest"); + hexDigests.put("SHA-256", "sha256Digest"); + File tmpFile = generateTemporaryFile(); + + assertThrows(NoSuchAlgorithmException.class, + () -> fileHashStore.validateTmpObject(true, "md2Digest", "MD2", tmpFile, + hexDigests, -1)); + assertFalse(Files.exists(tmpFile.toPath())); + } + + /** + * Check algorithm support for supported algorithm + */ + @Test + public void validateAlgorithm_supported() { + try { + String md2 = "MD2"; + boolean supported = fileHashStore.validateAlgorithm(md2); + assertTrue(supported); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + } + + /** + * Check algorithm support for unsupported algorithm + */ + @Test + public void validateAlgorithm_notSupported() { + assertThrows(NoSuchAlgorithmException.class, () -> { + try { + String sm3 = "SM3"; + boolean not_supported = fileHashStore.validateAlgorithm(sm3); + assertFalse(not_supported); + + } catch (NoSuchAlgorithmException nsae) { + throw new NoSuchAlgorithmException( + "NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + }); + } + + /** + * Check algorithm support for unsupported algorithm with lower cases + */ + @Test + public void validateAlgorithm_notSupportedLowerCase() { + assertThrows(NoSuchAlgorithmException.class, () -> { + try { + // Must match string to reduce complexity, no string formatting + String md2_lowercase = "md2"; + boolean lowercase_not_supported = fileHashStore.validateAlgorithm(md2_lowercase); + assertFalse(lowercase_not_supported); + + } catch (NoSuchAlgorithmException nsae) { + throw new NoSuchAlgorithmException( + "NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } + }); + } + + /** + * Check algorithm support for null algorithm value throws exception + */ + @Test + public void validateAlgorithm_algorithmNull() { + assertThrows(IllegalArgumentException.class, () -> { + try { + fileHashStore.validateAlgorithm(null); + + } catch (NoSuchAlgorithmException nsae) { + fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); + + } }); } + /** + * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list + */ + @Test + public void shouldCalculateAlgorithm_algoIncluded() { + boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-256"); + assertFalse(shouldCalculate); + } + + /** + * Confirm shouldCalculateAlgorithm returns false if algorithm not part of default list + */ + @Test + public void shouldCalculateAlgorithm_algoNotIncluded() { + boolean shouldCalculate = fileHashStore.shouldCalculateAlgorithm("SHA-DOU"); + assertTrue(shouldCalculate); + } + + /** + * Confirm verifyChecksumParameters returns true with good values + */ + @Test + public void verifyChecksumParameters() throws Exception { + boolean shouldValidate = fileHashStore.verifyChecksumParameters("abc123", "SHA-256"); + assertTrue(shouldValidate); + } + + /** + * Confirm verifyChecksumParameters throws exception when checksum value is empty + */ + @Test + public void verifyChecksumParameters_emptyChecksum() { + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.verifyChecksumParameters(" ", "SHA-256")); + } + + /** + * Confirm verifyChecksumParameters throws exception when checksum algorithm is empty + */ + @Test + public void verifyChecksumParameters_emptyAlgorithm() { + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.verifyChecksumParameters("abc123", " ")); + } + + /** + * Confirm verifyChecksumParameters throws exception when checksum algorithm is not supported + */ + @Test + public void verifyChecksumParameters_unsupportedAlgorithm() { + assertThrows(NoSuchAlgorithmException.class, + () -> fileHashStore.verifyChecksumParameters("abc123", "SHA-DOU")); + } + /** * Check default checksums are generated */ @@ -465,22 +809,23 @@ public void writeToTmpFileAndGenerateChecksums() throws Exception { // Get test file Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, null, null - ); - - // Validate checksum values - String md5 = testData.pidData.get(pid).get("md5"); - String sha1 = testData.pidData.get(pid).get("sha1"); - String sha256 = testData.pidData.get(pid).get("sha256"); - String sha384 = testData.pidData.get(pid).get("sha384"); - String sha512 = testData.pidData.get(pid).get("sha512"); - assertEquals(md5, hexDigests.get("MD5")); - assertEquals(sha1, hexDigests.get("SHA-1")); - assertEquals(sha256, hexDigests.get("SHA-256")); - assertEquals(sha384, hexDigests.get("SHA-384")); - assertEquals(sha512, hexDigests.get("SHA-512")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, + null); + + // Validate checksum values + String md5 = testData.pidData.get(pid).get("md5"); + String sha1 = testData.pidData.get(pid).get("sha1"); + String sha256 = testData.pidData.get(pid).get("sha256"); + String sha384 = testData.pidData.get(pid).get("sha384"); + String sha512 = testData.pidData.get(pid).get("sha512"); + assertEquals(md5, hexDigests.get("MD5")); + assertEquals(sha1, hexDigests.get("SHA-1")); + assertEquals(sha256, hexDigests.get("SHA-256")); + assertEquals(sha384, hexDigests.get("SHA-384")); + assertEquals(sha512, hexDigests.get("SHA-512")); + } } } @@ -499,8 +844,10 @@ public void writeToTmpFileAndGenerateChecksums_tmpFileSize() throws Exception { // Extra algo to calculate - MD2 String addAlgo = "MD2"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, addAlgo, null); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.writeToTmpFileAndGenerateChecksums( + newTmpFile, dataStream, addAlgo, null); + } long testDataFileSize = Files.size(testDataFile); long tmpFileSize = Files.size(newTmpFile.toPath()); @@ -523,14 +870,14 @@ public void writeToTmpFileAndGenerateChecksums_addAlgo() throws Exception { // Extra algo to calculate - MD2 String addAlgo = "MD2"; - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, null - ); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - assertEquals(md2, hexDigests.get("MD2")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, + addAlgo, null); + // Validate checksum values + String md2 = testData.pidData.get(pid).get("md2"); + assertEquals(md2, hexDigests.get("MD2")); + } } } @@ -549,14 +896,14 @@ public void writeToTmpFileAndGenerateChecksums_checksumAlgo() throws Exception { // Extra algo to calculate - MD2 String checksumAlgo = "SHA-512/224"; - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, null, checksumAlgo - ); - - // Validate checksum values - String sha512224 = testData.pidData.get(pid).get("sha512-224"); - assertEquals(sha512224, hexDigests.get("SHA-512/224")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, null, + checksumAlgo); + // Validate checksum values + String sha512224 = testData.pidData.get(pid).get("sha512-224"); + assertEquals(sha512224, hexDigests.get("SHA-512/224")); + } } } @@ -576,16 +923,16 @@ public void writeToTmpFileAndGenerateChecksums_addAlgoChecksumAlgo() throws Exce String addAlgo = "MD2"; String checksumAlgo = "SHA-512/224"; - InputStream dataStream = Files.newInputStream(testDataFile); - Map hexDigests = fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, checksumAlgo - ); - - // Validate checksum values - String md2 = testData.pidData.get(pid).get("md2"); - String sha512224 = testData.pidData.get(pid).get("sha512-224"); - assertEquals(md2, hexDigests.get("MD2")); - assertEquals(sha512224, hexDigests.get("SHA-512/224")); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + Map hexDigests = + fileHashStore.writeToTmpFileAndGenerateChecksums(newTmpFile, dataStream, + addAlgo, checksumAlgo); + // Validate checksum values + String md2 = testData.pidData.get(pid).get("md2"); + String sha512224 = testData.pidData.get(pid).get("sha512-224"); + assertEquals(md2, hexDigests.get("MD2")); + assertEquals(sha512224, hexDigests.get("SHA-512/224")); + } } } @@ -605,10 +952,10 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { // Extra algo to calculate - MD2 String addAlgo = "SM2"; - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.writeToTmpFileAndGenerateChecksums( - newTmpFile, dataStream, addAlgo, null - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.writeToTmpFileAndGenerateChecksums( + newTmpFile, dataStream, addAlgo, null); + } }); } } @@ -617,7 +964,7 @@ public void writeToTmpFileAndGenerateChecksums_invalidAlgo() { * Confirm that object has moved */ @Test - public void testMove() throws Exception { + public void move() throws Exception { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; File targetFile = new File(targetString); @@ -630,7 +977,7 @@ public void testMove() throws Exception { * Confirm that exceptions are not thrown when move is called on an object that already exists */ @Test - public void testMove_targetExists() throws Exception { + public void move_targetExists() throws Exception { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.toString() + "/testmove/test_tmp_object.tmp"; File targetFile = new File(targetString); @@ -644,7 +991,7 @@ public void testMove_targetExists() throws Exception { * Confirm that NullPointerException is thrown when entity is null */ @Test - public void testMove_entityNull() { + public void move_entityNull() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -657,7 +1004,7 @@ public void testMove_entityNull() { * Confirm that FileAlreadyExistsException is thrown entity is empty */ @Test - public void testMove_entityEmpty() { + public void move_entityEmpty() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -670,7 +1017,7 @@ public void testMove_entityEmpty() { * Confirm that FileAlreadyExistsException is thrown when entity is empty spaces */ @Test - public void testMove_entityEmptySpaces() { + public void move_entityEmptySpaces() { assertThrows(IllegalArgumentException.class, () -> { File newTmpFile = generateTemporaryFile(); String targetString = tempFolder.getRoot().toString() + "/testmove/test_tmp_object.tmp"; @@ -679,6 +1026,634 @@ public void testMove_entityEmptySpaces() { }); } + /** + * Confirm deleteObjectByCid method deletes object when there are no references. + */ + @Test + public void deleteObjectByCid() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + // Store object only + ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); + String cid = objInfo.cid(); + + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); + + // Get permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + + Path objRealPath = storePath.resolve("objects").resolve(objShardString); + assertFalse(Files.exists(objRealPath)); + } + } + } + + /** + * Confirm deleteObjectByCid method does not delete an object if a cid refs file exists (pids + * still referencing the cid). + */ + @Test + public void deleteObjectByCid_cidRefsFileContainsPids() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.cid(); + + // Try deleting the object + fileHashStore.deleteObjectByCid(cid); + + // Get permanent address of the actual cid + Path objRealPath = fileHashStore.getHashStoreDataObjectPath(pid); + assertTrue(Files.exists(objRealPath)); + } + } + } + + /** + * Check that storeHashStoreRefsFiles creates reference files + */ + @Test + public void storeHashStoreRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + // Confirm refs files exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + + assertTrue(Files.exists(absCidRefsPath)); + assertTrue(Files.exists(absPidRefsPath)); + + // Confirm no additional files were created + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that storeHashStoreRefsFiles writes expected pid refs files and that the content is + * correct + */ + @Test + public void storeHashStoreRefsFiles_pidRefsFileContent() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertTrue(Files.exists(pidRefsFilePath)); + + String retrievedCid = new String(Files.readAllBytes(pidRefsFilePath)); + assertEquals(cid, retrievedCid); + } + + /** + * Check that storeHashStoreRefsFiles writes expected cid refs files and that the content is + * correct + */ + @Test + public void storeHashStoreRefsFiles_cidRefsFileContent() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertTrue(Files.exists(cidRefsFilePath)); + + String retrievedPid = new String(Files.readAllBytes(cidRefsFilePath)); + assertEquals(pid, retrievedPid); + } + + /** + * Check that storeHashStoreRefsFiles throws HashStoreRefsAlreadyExistException when refs files + * already exist + */ + @Test + public void storeHashStoreRefsFiles_HashStoreRefsAlreadyExistException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + assertThrows(HashStoreRefsAlreadyExistException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + + // Confirm that there is only 1 of each ref file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check storeHashStoreRefsFiles throws exception when the supplied cid is different from what + * is found in the pid refs file, and the associated cid refs file from the pid refs file is + * correctly tagged (everything is where it's expected to be) + */ + @Test + public void storeHashStoreRefsFiles_PidRefsFileExistsException() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + String existingCid = "987654321fedcba"; + fileHashStore.storeHashStoreRefsFiles(pid, existingCid); + + // This will throw an exception because the pid and cid refs file are in sync + assertThrows(PidRefsFileExistsException.class, + () -> fileHashStore.storeHashStoreRefsFiles(pid, cid)); + } + + /** + * Check storeHashStoreRefsFiles overwrites an orphaned pid refs file - the 'cid' that it + * references does not exist (does not have a cid refs file) + */ + @Test + public void storeHashStoreRefsFiles_pidRefsOrphanedFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + String cidForOrphanPidRef = "987654321fedcba"; + + // Create orphaned pid refs file + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidForOrphanPidRef, + FileHashStore.HashStoreIdTypes.pid.name()); + File absPathPidRefsFile = absPidRefsPath.toFile(); + fileHashStore.move(pidRefsTmpFile, absPathPidRefsFile, "refs"); + + fileHashStore.storeHashStoreRefsFiles(pid, cid); + // There should only be 1 of each ref file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(1, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that storeHashStoreRefsFiles creates a pid refs file and updates an existing cid refs + * file + */ + @Test + public void storeHashStoreRefsFiles_updateExistingRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.storeHashStoreRefsFiles(pid, cid); + + String pidAdditional = "another.pid.2"; + fileHashStore.storeHashStoreRefsFiles(pidAdditional, cid); + + // Confirm missing pid refs file has been created + Path pidAdditionalRefsFilePath = + fileHashStore.getHashStoreRefsPath(pidAdditional, FileHashStore.HashStoreIdTypes.pid); + assertTrue(Files.exists(pidAdditionalRefsFilePath)); + + // Check cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + boolean pidFoundInCidRefFiles = + fileHashStore.isStringInRefsFile(pidAdditional, cidRefsFilePath); + assertTrue(pidFoundInCidRefFiles); + + // There should be 2 pid refs file, and 1 cid refs file + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(2, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that unTagObject deletes reference files + */ + @Test + public void unTagObject() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + fileHashStore.unTagObject(pid, cid); + + // Confirm refs files do not exist + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertFalse(Files.exists(absCidRefsPath)); + assertFalse(Files.exists(absPidRefsPath)); + } + + /** + * Check that unTagObject deletes pid refs file for a cid that is referenced by multiple pids, + * and that the cid refs file is not deleted. + */ + @Test + public void unTagObject_cidWithMultiplePidReferences() throws Exception { + String pid = "dou.test.1"; + String pidTwo = "dou.test.2"; + String pidThree = "dou.test.3"; + String pidFour = "dou.test.4"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + fileHashStore.tagObject(pidTwo, cid); + fileHashStore.tagObject(pidThree, cid); + fileHashStore.tagObject(pidFour, cid); + + fileHashStore.unTagObject(pid, cid); + + // Confirm refs files state + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + + assertFalse(Files.exists(absPidRefsPath)); + assertTrue(Files.exists(absCidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(3, pidRefsFiles.size()); + assertEquals(1, cidRefsFiles.size()); + } + + /** + * Check that unTagObject deletes an orphaned pid refs file (there is no cid refs file) + */ + @Test + public void unTagObject_orphanPidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Delete cid refs file to create orphaned pid refs file + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + Files.delete(absCidRefsPath); + assertFalse(Files.exists(absCidRefsPath)); + + fileHashStore.unTagObject(pid, cid); + + // Confirm pid refs is deleted + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + assertFalse(Files.exists(absPidRefsPath)); + + // Confirm number of reference files + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + List pidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/pids")); + List cidRefsFiles = + FileHashStoreUtility.getFilesFromDir(storePath.resolve("refs" + "/cids")); + + assertEquals(0, pidRefsFiles.size()); + assertEquals(0, cidRefsFiles.size()); + } + + /** + * Check that unTagObject does not throw exception when a pid refs file and cid refs file does + * not exist + */ + @Test + public void unTagObject_missingRefsFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + fileHashStore.unTagObject(pid, cid); + } + + /** + * Check that unTagObject does not throw exception when a pid refs file and cid refs file does + * not exist + */ + @Test + public void unTagObject_missingPidRefsFile() throws Exception { + String pid = "dou.test.1"; + String pidTwo = "dou.test.2"; + String pidThree = "dou.test.3"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + fileHashStore.tagObject(pidTwo, cid); + fileHashStore.tagObject(pidThree, cid); + + // Delete pid refs to create scenario + Path absPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Files.delete(absPidRefsPath); + assertFalse(Files.exists(absPidRefsPath)); + + fileHashStore.unTagObject(pid, cid); + + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertFalse(fileHashStore.isStringInRefsFile(pid, absCidRefsPath)); + } + + /** + * Check that no exception is thrown when pid and cid are tagged correctly + */ + @Test + public void verifyHashStoreRefFiles() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a pid refs file with the incorrect cid + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsFilePath); + } + + /** + * Check that an exception is thrown when a file is not found + */ + @Test + public void verifyHashStoreRefFiles_fileNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + + // Create a pid refs file with the incorrect cid + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + assertThrows(FileNotFoundException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsFilePath)); + } + + /** + * Check that exception is thrown when incorrect cid in a pid refs file. + */ + @Test + public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a pid refs file with the incorrect cid + String cidToWrite = "123456789abcdef"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); + + // Get path of the cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + assertThrows(CidNotFoundInPidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, + cidRefsFilePath)); + } + + /** + * Check that exception is thrown when an expected pid is not found in a cid refs file + */ + @Test + public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Create a cid refs file with a different pid from the one that is expected + String pidToWrite = "dou.test.2"; + File cidRefsTmpFile = fileHashStore.writeRefsFile(pidToWrite, "cid"); + Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); + + // Get path of the pid refs file + Path pidRefsFilePath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + + assertThrows(PidNotFoundInCidRefsFileException.class, + () -> fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, + cidRefsTmpFilePath)); + } + + /** + * Check that the value supplied is written + */ + @Test + public void writeRefsFile_content() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); + assertEquals(cidRead, cidToWrite); + } + + /** + * Check isStringInRefsFile returns true when value is found + */ + @Test + public void isStringInRefsFile_found() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + assertTrue(fileHashStore.isStringInRefsFile(cidToWrite, pidRefsTmpFile.toPath())); + } + + /** + * Check isStringInRefsFile returns false when value is not found + */ + @Test + public void isStringInRefsFile_notFound() throws Exception { + String cidToWrite = "test_cid_123"; + File pidRefsTmpFile = fileHashStore.writeRefsFile(cidToWrite, "pid"); + + assertFalse(fileHashStore.isStringInRefsFile("not.found.in.ref", pidRefsTmpFile.toPath())); + } + + /** + * Check isStringInRefsFile returns true when value is found in a refs file with multiple values + * and returns false when a value isn't found + */ + @Test + public void isStringInRefsFile_cidRefsMultipleVals() throws Exception { + String cid = "abcdef123456789"; + fileHashStore.tagObject("dou.test.1", cid); + fileHashStore.tagObject("dou.test.2", cid); + fileHashStore.tagObject("dou.test.3", cid); + // Get path of the cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + assertTrue(fileHashStore.isStringInRefsFile("dou.test.1", cidRefsFilePath)); + assertFalse(fileHashStore.isStringInRefsFile("wont.be.found", cidRefsFilePath)); + } + + /** + * Confirm that cid refs file has been updated successfully + */ + @Test + public void updateRefsFile_add() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + String pidAdditional = "dou.test.2"; + fileHashStore.updateRefsFile( + pidAdditional, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.add); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + boolean pidAdditional_foundInCidRefFiles = false; + for (String line : lines) { + if (line.equals(pidAdditional)) { + pidAdditional_foundInCidRefFiles = true; + } + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertTrue(pidOriginal_foundInCidRefFiles); + assertTrue(pidAdditional_foundInCidRefFiles); + } + + /** + * Check that updateRefsFile removes pid from its cid refs file + */ + @Test + public void updateRefsFile_remove() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + String pidAdditional = "dou.test.2"; + fileHashStore.tagObject(pidAdditional, cid); + + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + + assertFalse(fileHashStore.isStringInRefsFile(pid, cidRefsFilePath)); + } + + /** + * Check that updateRefsFile removes all pids as expected and leaves an empty file. + */ + @Test + public void updateRefsFile_removeMultiplePids() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + String pidAdditional = "dou.test.2"; + fileHashStore.tagObject(pidAdditional, cid); + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + fileHashStore.updateRefsFile( + pidAdditional, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + + assertTrue(Files.exists(cidRefsFilePath)); + assertEquals(0, Files.size(cidRefsFilePath)); + } + + /** + * Confirm that updateRefsFile does not throw any exception if called to remove a value that is + * not found in a cid refs file. + */ + @Test + public void updateRefsFile_cidRefsPidNotFound() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + "dou.test.2", cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + int pidsFound = 0; + for (String line : lines) { + pidsFound++; + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertTrue(pidOriginal_foundInCidRefFiles); + assertEquals(1, pidsFound); + } + + /** + * Confirm that updateRefsFile does not throw any exception if called to remove a value from a + * cid refs file that is empty + */ + @Test + public void updateRefsFile_cidRefsEmpty() throws Exception { + String pid = "dou.test.1"; + String cid = "abcdef123456789"; + fileHashStore.tagObject(pid, cid); + + // Get path of the cid refs file + Path cidRefsFilePath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + + List lines = Files.readAllLines(cidRefsFilePath); + boolean pidOriginal_foundInCidRefFiles = false; + int pidsFound = 0; + for (String line : lines) { + pidsFound++; + if (line.equals(pid)) { + pidOriginal_foundInCidRefFiles = true; + } + } + assertFalse(pidOriginal_foundInCidRefFiles); + assertEquals(0, pidsFound); + + // Confirm that no exception is thrown and that the cid refs still exists + fileHashStore.updateRefsFile( + pid, cidRefsFilePath, FileHashStore.HashStoreRefUpdateTypes.remove); + assertTrue(Files.exists(cidRefsFilePath)); + } + /** * Test putMetadata stores metadata as expected */ @@ -690,18 +1665,15 @@ public void putMetadata() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - String metadataCid = fileHashStore.putMetadata(metadataStream, pid, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String metadataPath = fileHashStore.putMetadata(metadataStream, pid, null); - // Get relative path - String metadataCidShardString = FileHashStoreUtility.getHierarchicalPathString( - 3, 2, metadataCid - ); - // Get absolute path - Path storePath = Paths.get(fhsProperties.getProperty("storePath")); - Path metadataCidAbsPath = storePath.resolve("metadata/" + metadataCidShardString); - - assertTrue(Files.exists(metadataCidAbsPath)); + // Calculate absolute path + String storeMetadataNamespace = fhsProperties.getProperty("storeMetadataNamespace"); + Path metadataPidExpectedPath = + fileHashStore.getHashStoreMetadataPath(pid, storeMetadataNamespace); + assertEquals(metadataPath, metadataPidExpectedPath.toString()); + } } } @@ -712,8 +1684,7 @@ public void putMetadata() throws Exception { public void putMetadata_metadataNull() { for (String pid : testData.pidList) { assertThrows( - IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null) - ); + IllegalArgumentException.class, () -> fileHashStore.putMetadata(null, pid, null)); } } @@ -729,9 +1700,9 @@ public void putMetadata_pidNull() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.putMetadata(metadataStream, null, null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.putMetadata(metadataStream, null, null); + } }); } } @@ -748,9 +1719,9 @@ public void putMetadata_pidEmpty() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.putMetadata(metadataStream, "", null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.putMetadata(metadataStream, "", null); + } }); } } @@ -767,9 +1738,9 @@ public void putMetadata_pidEmptySpaces() { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - - fileHashStore.putMetadata(metadataStream, " ", null); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.putMetadata(metadataStream, " ", null); + } }); } } @@ -786,17 +1757,16 @@ public void writeToTmpMetadataFile() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile( - newTmpFile, metadataStream - ); - assertTrue(metadataWritten); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + boolean metadataWritten = + fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + assertTrue(metadataWritten); + } } } /** * Check that tmp metadata is actually written by verifying file size - * */ @Test public void writeToTmpMetadataFile_tmpFileSize() throws Exception { @@ -807,16 +1777,16 @@ public void writeToTmpMetadataFile_tmpFileSize() throws Exception { // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - boolean metadataWritten = fileHashStore.writeToTmpMetadataFile( - newTmpFile, metadataStream - ); - assertTrue(metadataWritten); + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + boolean metadataWritten = + fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + assertTrue(metadataWritten); - long tmpMetadataFileSize = Files.size(newTmpFile.toPath()); - long testMetadataFileSize = Files.size(testMetaDataFile); - assertTrue(tmpMetadataFileSize > 0); - assertEquals(tmpMetadataFileSize, testMetadataFileSize); + long tmpMetadataFileSize = Files.size(newTmpFile.toPath()); + long testMetadataFileSize = Files.size(testMetaDataFile); + assertTrue(tmpMetadataFileSize > 0); + assertEquals(tmpMetadataFileSize, testMetadataFileSize); + } } } @@ -828,109 +1798,410 @@ public void writeToTmpMetadataFile_metadataContent() throws Exception { for (String pid : testData.pidList) { File newTmpFile = generateTemporaryFile(); String pidFormatted = pid.replace("/", "_"); - // Get test metadata file Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); - // Write it to the tmpFile - InputStream metadataStream = Files.newInputStream(testMetaDataFile); - fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); // Create InputStream to tmp File - InputStream metadataStoredStream; - try { - metadataStoredStream = Files.newInputStream(newTmpFile.toPath()); + try (InputStream metadataStoredStream = Files.newInputStream(newTmpFile.toPath()); + InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + // Write it to the tmpFile + + fileHashStore.writeToTmpMetadataFile(newTmpFile, metadataStream); + // Calculate checksum of metadata content + MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); + try { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = metadataStoredStream.read(buffer)) != -1) { + sha256.update(buffer, 0, bytesRead); + } + + } catch (IOException ioe) { + ioe.printStackTrace(); + throw ioe; - } catch (Exception e) { - e.printStackTrace(); - throw e; + } + String sha256Digest = + DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); + String sha256MetadataDigestFromTestData = + testData.pidData.get(pid).get("metadata_cid_sha256"); + assertEquals(sha256Digest, sha256MetadataDigestFromTestData); } + } + } - // Calculate checksum of metadata content - MessageDigest sha256 = MessageDigest.getInstance("SHA-256"); - try { - byte[] buffer = new byte[8192]; - int bytesRead; - while ((bytesRead = metadataStoredStream.read(buffer)) != -1) { - sha256.update(buffer, 0, bytesRead); + /** + * Confirm that syncRenameMetadataDocForDeletion adds '_delete' to the given paths + */ + @Test + public void syncRenameMetadataDocForDeletion_renamesAsExpected() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); + String pathToMetadataTwo = + fileHashStore.putMetadata(metadataStream, pid, "ns.test.1"); + String pathToMetadataThree = + fileHashStore.putMetadata(metadataStream, pid, "ns.test" + ".3"); + + // Confirm that metadata documents are present + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String pidHexDigest = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String pidRelativePath = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + pidHexDigest); + Path expectedPidMetadataDirectory = + storePath.resolve("metadata").resolve(pidRelativePath); + List metadataDocPaths = + FileHashStoreUtility.getFilesFromDir(expectedPidMetadataDirectory); + + assertEquals(3, metadataDocPaths.size()); + + Collection deleteList = + fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths); + + Collection renamedDocStrings = new ArrayList<>(); + for (Path renamedDoc : deleteList) { + renamedDocStrings.add(renamedDoc.toString()); } + assertTrue(renamedDocStrings.contains(pathToMetadata + "_delete")); + assertTrue(renamedDocStrings.contains(pathToMetadataTwo + "_delete")); + assertTrue(renamedDocStrings.contains(pathToMetadataThree + "_delete")); + } + } + } - } catch (IOException ioe) { - ioe.printStackTrace(); - throw ioe; + /** + * Confirm that syncRenameMetadataDocForDeletion throws exception when supplied list is empty. + */ + @Test + public void syncRenameMetadataDocForDeletion_emptyList() { + Collection metadataDocPaths = new ArrayList<>(); + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.syncRenameMetadataDocForDeletion(metadataDocPaths)); + } + + /** + * Confirm that syncRenameMetadataDocForDeletion throws exception when supplied list is null. + */ + @Test + public void syncRenameMetadataDocForDeletion_nullList() { + assertThrows(IllegalArgumentException.class, + () -> fileHashStore.syncRenameMetadataDocForDeletion(null)); + } + /** + * Confirm that isStringInRefsFile returns true when pid is found + */ + @Test + public void isStringInRefsFile_pidFound() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); } - String sha256Digest = DatatypeConverter.printHexBinary(sha256.digest()).toLowerCase(); - String sha256MetadataDigestFromTestData = testData.pidData.get(pid).get( - "metadata_sha256" - ); - assertEquals(sha256Digest, sha256MetadataDigestFromTestData); + String pidTwo = pid + ".test"; + + try (InputStream dataStreamDup = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStreamDup, pidTwo, null, null, null, -1); - // Close stream - metadataStoredStream.close(); + String cid = objInfo.cid(); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertTrue(fileHashStore.isStringInRefsFile(pidTwo, absCidRefsPath)); + } } } /** - * Confirm that isPidInCidRefsFile returns true when pid is found + * Confirm that isStringInRefsFile returns false when pid is found */ @Test - public void isPidInCidRefsFile_pidFound() throws Exception { + public void isStringInRefsFile_pidNotFound() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String pidTwo = pid + ".test"; - InputStream dataStreamDup = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStreamDup, pidTwo, null, null, null, -1 - ); + String cid = objInfo.cid(); + Path absCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + assertFalse(fileHashStore.isStringInRefsFile("pid.not.found", absCidRefsPath)); + } + } + } + + /** + * Confirm getHashStoreDataObjectPath returns correct object path + */ + @Test + public void getHashStoreDataObjectPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.cid(); + + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String objShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + Path calculatedObjRealPath = storePath.resolve("objects").resolve(objShardString); + + Path expectedObjCidAbsPath = fileHashStore.getHashStoreDataObjectPath(pid); + + assertEquals(expectedObjCidAbsPath, calculatedObjRealPath); + } + } + } + + /** + * Confirm getHashStoreMetadataPath returns correct metadata path + */ + @Test + public void getHashStoreMetadataPath() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid); + + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + String storeFormatId = fhsProperties.getProperty("storeMetadataNamespace"); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertTrue(fileHashStore.isPidInCidRefsFile(pidTwo, absCidRefsPath)); + // Document ID + String hashId = + FileHashStoreUtility.getPidHexDigest(pid + storeFormatId, storeAlgo); + + // Metadata directory of the given pid + String metadataPidDirId = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String metadataPidDirIdSharded = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidDirId); + + // Complete path + Path calculatedMetadataRealPath = + storePath.resolve("metadata").resolve(metadataPidDirIdSharded).resolve(hashId); + + Path expectedMetadataPidPath = + fileHashStore.getHashStoreMetadataPath(pid, storeFormatId); + + assertEquals(expectedMetadataPidPath, calculatedMetadataRealPath); + } + } + } + + /** + * Check that getHashStoreMetadataInputStream returns an InputStream + */ + @Test + public void getHashStoreMetadataInputStream() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + fileHashStore.storeMetadata(metadataStream, pid, null); + + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + + InputStream metadataCidInputStream = + fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId); + assertNotNull(metadataCidInputStream); + } + } + } + + /** + * Check that getHashStoreMetadataInputStream throws FileNotFoundException when there is no + * metadata to retrieve + */ + @Test + public void getHashStoreMetadataInputStream_fileNotFound() { + for (String pid : testData.pidList) { + String storeFormatId = (String) fhsProperties.get("storeMetadataNamespace"); + + assertThrows(FileNotFoundException.class, + () -> fileHashStore.getHashStoreMetadataInputStream(pid, storeFormatId)); } } /** - * Confirm that isPidInCidRefsFile returns false when pid is found + * Confirm getHashStoreRefsPath returns correct pid refs path */ @Test - public void isPidInCidRefsFile_pidNotFound() throws Exception { + public void getHashStoreRefsPath_pid() throws Exception { for (String pid : testData.pidList) { String pidFormatted = pid.replace("/", "_"); Path testDataFile = testData.getTestFile(pidFormatted); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject( - dataStream, pid, null, null, null, -1 - ); + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); + String storeAlgo = fhsProperties.getProperty("storeAlgorithm"); - String cid = objInfo.getCid(); - Path absCidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertFalse(fileHashStore.isPidInCidRefsFile("pid.not.found", absCidRefsPath)); + // Pid refs file + String metadataPidHash = FileHashStoreUtility.getPidHexDigest(pid, storeAlgo); + String metadataPidHashSharded = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, + metadataPidHash); + Path calculatedPidRefsRealPath = + storePath.resolve("refs/pids").resolve(metadataPidHashSharded); + + Path expectedPidRefsPath = + fileHashStore.getHashStoreRefsPath(pid, FileHashStore.HashStoreIdTypes.pid); + + assertEquals(expectedPidRefsPath, calculatedPidRefsRealPath); + } } } + /** + * Confirm getHashStoreRefsPath returns correct cid refs path + */ @Test - public void getRealPath() throws Exception { - // Get single test file to "upload" - String pid = "jtao.1700.1"; - Path testDataFile = testData.getTestFile(pid); + public void getHashStoreRefsPath_cid() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + Path testDataFile = testData.getTestFile(pidFormatted); + + try (InputStream dataStream = Files.newInputStream(testDataFile)) { + ObjectMetadata objInfo = + fileHashStore.storeObject(dataStream, pid, null, null, null, -1); + String cid = objInfo.cid(); + + // Manually form the permanent address of the actual cid + Path storePath = Paths.get(fhsProperties.getProperty("storePath")); + int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); + int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream, pid, null, null, null, -1); - String cid = objInfo.getCid(); + // Cid refs file + String objShardString = + FileHashStoreUtility.getHierarchicalPathString(storeDepth, storeWidth, cid); + Path calculatedCidRefsRealPath = + storePath.resolve("refs/cids").resolve(objShardString); + + Path expectedCidRefsPath = + fileHashStore.getHashStoreRefsPath(cid, FileHashStore.HashStoreIdTypes.cid); + + assertEquals(expectedCidRefsPath, calculatedCidRefsRealPath); + } + } + } - Path objCidAbsPath = fileHashStore.getRealPath(pid, "object", null); - Path pidRefsPath = fileHashStore.getRealPath(pid, "refs", "pid"); - Path cidRefsPath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertTrue(Files.exists(objCidAbsPath)); - assertTrue(Files.exists(pidRefsPath)); - assertTrue(Files.exists(cidRefsPath)); + /** + * Confirm getHashStoreDataObjectPath throws exception when requesting path for an object that + * does not exist + */ + @Test + public void getHashStoreDataObjectPath_fileNotFound() { + assertThrows(FileNotFoundException.class, () -> { + String pid = "dou.test.1"; + fileHashStore.getHashStoreDataObjectPath(pid); + }); + } + + /** + * Confirm getExpectedPath throws exception when requesting path for an object that does not + * exist + */ + @Test + public void fileHashStoreUtility_checkForEmptyAndValidString() { + assertThrows(IllegalArgumentException.class, + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("dou.test.1\n", + "pid")); + } + + /** + * Confirm getExpectedPath throws exception when requesting path for an object that does not + * exist + */ + @Test + public void fileHashStoreUtility_checkForEmptyAndValidString_newLine() { + assertThrows(IllegalArgumentException.class, + () -> FileHashStoreUtility.checkForNotEmptyAndValidString("\n", "pid")); + } + + /** + * Confirm that renamePathForDeletion adds '_delete' to the given path + */ + @Test + public void fileHashStoreUtility_renamePathForDeletion() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); + + Path metadataPath = Paths.get(pathToMetadata); + FileHashStoreUtility.renamePathForDeletion(metadataPath); + + Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); + assertTrue(Files.exists(expectedMetadataPathRenamed)); + } + } + } + + /** + * Confirm that renamePathForDeletion adds '_delete' to the given path + */ + @Test + public void fileHashStoreUtility_renamePathForRestoration() throws Exception { + for (String pid : testData.pidList) { + String pidFormatted = pid.replace("/", "_"); + + // Get test metadata file + Path testMetaDataFile = testData.getTestFile(pidFormatted + ".xml"); + + try (InputStream metadataStream = Files.newInputStream(testMetaDataFile)) { + String pathToMetadata = fileHashStore.putMetadata(metadataStream, pid, null); + + Path metadataPath = Paths.get(pathToMetadata); + FileHashStoreUtility.renamePathForDeletion(metadataPath); + + Path expectedMetadataPathRenamed = Paths.get(pathToMetadata + "_delete"); + assertFalse(Files.exists(metadataPath)); + assertTrue(Files.exists(expectedMetadataPathRenamed)); + + FileHashStoreUtility.renamePathForRestoration(expectedMetadataPathRenamed); + assertFalse(Files.exists(expectedMetadataPathRenamed)); + assertTrue(Files.exists(metadataPath)); + } + } } } diff --git a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java b/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java deleted file mode 100644 index 3b48adc7..00000000 --- a/src/test/java/org/dataone/hashstore/filehashstore/FileHashStoreReferencesTest.java +++ /dev/null @@ -1,406 +0,0 @@ -package org.dataone.hashstore.filehashstore; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.security.NoSuchAlgorithmException; -import java.util.List; -import java.util.Properties; - -import org.dataone.hashstore.ObjectMetadata; -import org.dataone.hashstore.exceptions.PidRefsFileExistsException; -import org.dataone.hashstore.testdata.TestDataHarness; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -/** - * Test class for FileHashStore references related methods - */ -public class FileHashStoreReferencesTest { - private FileHashStore fileHashStore; - private Path rootDirectory; - private Properties fhsProperties; - private static final TestDataHarness testData = new TestDataHarness(); - - /** - * Initialize FileHashStore before each test to creates tmp directories - */ - @BeforeEach - public void initializeFileHashStore() { - rootDirectory = tempFolder.resolve("metacat"); - - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootDirectory.toString()); - storeProperties.setProperty("storeDepth", "3"); - storeProperties.setProperty("storeWidth", "2"); - storeProperties.setProperty("storeAlgorithm", "SHA-256"); - storeProperties.setProperty( - "storeMetadataNamespace", "http://ns.dataone.org/service/types/v2.0" - ); - - try { - fhsProperties = storeProperties; - fileHashStore = new FileHashStore(storeProperties); - - } catch (IOException ioe) { - fail("IOException encountered: " + ioe.getMessage()); - - } catch (NoSuchAlgorithmException nsae) { - fail("NoSuchAlgorithmException encountered: " + nsae.getMessage()); - - } - } - - /** - * Temporary folder for tests to run in - */ - @TempDir - public Path tempFolder; - - /** - * Check that tagObject writes expected pid refs files - */ - @Test - public void tagObject_pidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); - assertTrue(Files.exists(pidRefsFilePath)); - } - - /** - * Check that tagObject writes expected cid refs files - */ - @Test - public void tagObject_cidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - assertTrue(Files.exists(cidRefsFilePath)); - } - - /** - * Check that tagObject throws exception when pid refs file already exists - */ - @Test - public void tagObject_pidRefsFileExists() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - assertThrows(PidRefsFileExistsException.class, () -> { - fileHashStore.tagObject(pid, cid); - }); - - } - - /** - * Check that tagObject creates a pid refs file and updates an existing cid refs file - */ - @Test - public void tagObject_cidRefsFileExists() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - String pidAdditional = "another.pid.2"; - fileHashStore.tagObject(pidAdditional, cid); - - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); - assertTrue(Files.exists(pidRefsFilePath)); - - - // Check cid refs file - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - boolean pidFoundInCidRefFiles = fileHashStore.isPidInCidRefsFile( - pidAdditional, cidRefsFilePath - ); - assertTrue(pidFoundInCidRefFiles); - } - - /** - * Check that tagObject creates pid refs file when pid already exists in cid refs file - */ - @Test - public void tagObject_pidExistsInCidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - - File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.move(cidRefsTmpFile, cidRefsFilePath.toFile(), "refs"); - - fileHashStore.tagObject(pid, cid); - - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); - assertTrue(Files.exists(pidRefsFilePath)); - - // Confirm that cid refs file only has 1 line - List lines = Files.readAllLines(cidRefsFilePath); - int numberOfLines = lines.size(); - assertEquals(numberOfLines, 1); - - } - - /** - * Check that the cid supplied is written into the file given - */ - @Test - public void writePidRefsFile_content() throws Exception { - String cidToWrite = "test_cid_123"; - File pidRefsTmpFile = fileHashStore.writePidRefsFile(cidToWrite); - - String cidRead = new String(Files.readAllBytes(pidRefsTmpFile.toPath())); - assertEquals(cidRead, cidToWrite); - } - - /** - * Check that the pid supplied is written into the file given with a new line - */ - @Test - public void writeCidRefsFile_content() throws Exception { - String pidToWrite = "dou.test.123"; - File cidRefsTmpFile = fileHashStore.writeCidRefsFile(pidToWrite); - - String pidRead = new String(Files.readAllBytes(cidRefsTmpFile.toPath())); - assertEquals(pidRead, pidToWrite); - } - - /** - * Check that exception is thrown when incorrect cid in a pid refs file. - */ - @Test - public void verifyHashStoreRefFiles_unexpectedCid() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Create a pid refs file with the incorrect cid - String cidToWrite = "123456789abcdef"; - File pidRefsTmpFile = fileHashStore.writePidRefsFile(cidToWrite); - Path pidRefsTmpFilePath = pidRefsTmpFile.toPath(); - - // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - - assertThrows(IOException.class, () -> { - fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsTmpFilePath, cidRefsFilePath); - }); - } - - /** - * Check that exception is thrown when an expected pid is not found in a cid refs file - */ - @Test - public void verifyHashStoreRefFiles_pidNotFoundInCidRefsFile() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Create a cid refs file with a different pid from the one that is expected - String cidToWrite = "dou.test.2"; - File cidRefsTmpFile = fileHashStore.writeCidRefsFile(cidToWrite); - Path cidRefsTmpFilePath = cidRefsTmpFile.toPath(); - - // Get path of the pid refs file - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); - - assertThrows(IOException.class, () -> { - fileHashStore.verifyHashStoreRefsFiles(pid, cid, pidRefsFilePath, cidRefsTmpFilePath); - }); - } - - /** - * Confirm that cid refs file has been updated successfully - */ - @Test - public void updateCidRefsFiles_content() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - // Get path of the cid refs file - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - - String pidAdditional = "dou.test.2"; - fileHashStore.updateCidRefsFiles("dou.test.2", cidRefsFilePath); - - List lines = Files.readAllLines(cidRefsFilePath); - boolean pidOriginal_foundInCidRefFiles = false; - boolean pidAdditional_foundInCidRefFiles = false; - for (String line : lines) { - if (line.equals(pidAdditional)) { - pidAdditional_foundInCidRefFiles = true; - } - if (line.equals(pid)) { - pidOriginal_foundInCidRefFiles = true; - } - } - assertTrue(pidOriginal_foundInCidRefFiles); - assertTrue(pidAdditional_foundInCidRefFiles); - } - - /** - * Check that deletePidRefsFile deletes file - */ - @Test - public void deletePidRefsFile_fileDeleted() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - - fileHashStore.deletePidRefsFile(pid); - - Path pidRefsFilePath = fileHashStore.getRealPath(pid, "refs", "pid"); - assertFalse(Files.exists(pidRefsFilePath)); - } - - /** - * Check that deletePidRefsFile throws exception when there is no file to delete - */ - @Test - public void deletePidRefsFile_missingPidRefsFile() { - String pid = "dou.test.1"; - - assertThrows(FileNotFoundException.class, () -> { - fileHashStore.deletePidRefsFile(pid); - }); - } - - /** - * Check that deleteCidRefsPid deletes pid from its cid refs file - */ - @Test - public void deleteCidRefsPid_pidRemoved() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - String pidAdditional = "dou.test.2"; - fileHashStore.tagObject(pidAdditional, cid); - - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); - - assertFalse(fileHashStore.isPidInCidRefsFile(pid, cidRefsFilePath)); - } - - /** - * Check that deleteCidRefsPid removes all pids as expected and leaves an - * empty file. - */ - @Test - public void deleteCidRefsPid_allPidsRemoved() throws Exception { - String pid = "dou.test.1"; - String cid = "abcdef123456789"; - fileHashStore.tagObject(pid, cid); - String pidAdditional = "dou.test.2"; - fileHashStore.tagObject(pidAdditional, cid); - Path cidRefsFilePath = fileHashStore.getRealPath(cid, "refs", "cid"); - - fileHashStore.deleteCidRefsPid(pid, cidRefsFilePath); - fileHashStore.deleteCidRefsPid(pidAdditional, cidRefsFilePath); - - assertTrue(Files.exists(cidRefsFilePath)); - assertTrue(Files.size(cidRefsFilePath) == 0); - } - - /** - * Check that verifyObject returns true with good values - */ - @Test - public void verifyObject_correctValues() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - boolean isObjectValid = fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - assertTrue(isObjectValid); - } - } - - /** - * Check that verifyObject returns false with mismatched size value - */ - @Test - public void verifyObject_mismatchedValuesBadSize() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = testData.pidData.get(pid).get("sha256"); - long expectedSize = 123456789; - - boolean isObjectValid = fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - assertFalse(isObjectValid); - } - } - - /** - * Check that verifyObject returns false and does not delete the file when - * there is a mismatch - */ - @Test - public void verifyObject_mismatchedValuesObjectDeleted() throws Exception { - for (String pid : testData.pidList) { - String pidFormatted = pid.replace("/", "_"); - Path testDataFile = testData.getTestFile(pidFormatted); - - InputStream dataStream = Files.newInputStream(testDataFile); - ObjectMetadata objInfo = fileHashStore.storeObject(dataStream); - - String defaultStoreAlgorithm = fhsProperties.getProperty("storeAlgorithm"); - - // Get verifyObject args - String expectedChecksum = "intentionallyWrongValue"; - long expectedSize = Long.parseLong(testData.pidData.get(pid).get("size")); - - boolean isObjectValid = fileHashStore.verifyObject( - objInfo, expectedChecksum, defaultStoreAlgorithm, expectedSize - ); - assertFalse(isObjectValid); - - int storeDepth = Integer.parseInt(fhsProperties.getProperty("storeDepth")); - int storeWidth = Integer.parseInt(fhsProperties.getProperty("storeWidth")); - String actualCid = objInfo.getCid(); - String cidShardString = FileHashStoreUtility.getHierarchicalPathString( - storeDepth, storeWidth, actualCid - ); - Path objectStoreDirectory = rootDirectory.resolve("objects").resolve(cidShardString); - assertTrue(Files.exists(objectStoreDirectory)); - - } - } -} diff --git a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java index e523d40a..2beb0ab8 100644 --- a/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java +++ b/src/test/java/org/dataone/hashstore/testdata/TestDataHarness.java @@ -10,10 +10,10 @@ * This class returns the test data expected hex digest values * * Notes: - * - "object_cid" is the SHA-256 hash of the pid * - algorithms without any prefixes are the algorithm hash of the pid's respective data object * content - * - "metadata_sha256" is the hash of the pid's respective metadata object content + * - "metadata_cid_sha256" is sha256 content identifier of the pid's metadata object + * - "sysmeta_address_sha256" is the sha256 hash of the pid + formatId * */ public class TestDataHarness { @@ -25,10 +25,6 @@ public TestDataHarness() { Map> pidsAndHexDigests = new HashMap<>(); Map values1 = new HashMap<>(); - values1.put( - "object_cid", - "0d555ed77052d7e166017f779cbc193357c3a5006ee8b8457230bcf7abcef65e" - ); values1.put("md2", "b33c730ac5e36b2b886a9cd14552f42e"); values1.put("md5", "db91c910a3202478c8def1071c54aae5"); values1.put("sha1", "1fe86e3c8043afa4c70857ca983d740ad8501ccd"); @@ -47,21 +43,17 @@ public TestDataHarness() { "sha512-224", "107f9facb268471de250625440b6c8b7ff8296fbe5d89bed4a61fd35" ); values1.put( - "metadata_cid", + "sysmeta_address_sha256", "323e0799524cec4c7e14d31289cefd884b563b5c052f154a066de5ec1e477da7" ); values1.put( - "metadata_sha256", + "metadata_cid_sha256", "158d7e55c36a810d7c14479c952a4d0b370f2b844808f2ea2b20d7df66768b04" ); values1.put("size", "39993"); pidsAndHexDigests.put("doi:10.18739/A2901ZH2M", values1); Map values2 = new HashMap<>(); - values2.put( - "object_cid", - "a8241925740d5dcd719596639e780e0a090c9d55a5d0372b0eaf55ed711d4edf" - ); values2.put("md2", "9c25df1c8ba1d2e57bb3fd4785878b85"); values2.put("md5", "f4ea2d07db950873462a064937197b0f"); values2.put("sha1", "3d25436c4490b08a2646e283dada5c60e5c0539d"); @@ -80,21 +72,17 @@ public TestDataHarness() { "sha512-224", "7a2b22e36ced9e91cf8cdf6971897ec4ae21780e11d1c3903011af33" ); values2.put( - "metadata_cid", + "sysmeta_address_sha256", "ddf07952ef28efc099d10d8b682480f7d2da60015f5d8873b6e1ea75b4baf689" ); values2.put( - "metadata_sha256", + "metadata_cid_sha256", "d87c386943ceaeba5644c52b23111e4f47972e6530df0e6f0f41964b25855b08" ); values2.put("size", "8724"); pidsAndHexDigests.put("jtao.1700.1", values2); Map values3 = new HashMap<>(); - values3.put( - "object_cid", - "7f5cc18f0b04e812a3b4c8f686ce34e6fec558804bf61e54b176742a7f6368d6" - ); values3.put("md2", "9f2b06b300f661ce4398006c41d8aa88"); values3.put("md5", "e1932fc75ca94de8b64f1d73dc898079"); values3.put("sha1", "c6d2a69a3f5adaf478ba796c114f57b990cf7ad1"); @@ -113,11 +101,11 @@ public TestDataHarness() { "sha512-224", "e1789a91c9df334fdf6ee5d295932ad96028c426a18b17016a627099" ); values3.put( - "metadata_cid", + "sysmeta_address_sha256", "9a2e08c666b728e6cbd04d247b9e556df3de5b2ca49f7c5a24868eb27cddbff2" ); values3.put( - "metadata_sha256", + "metadata_cid_sha256", "27003e07f2ab374020de73298dd24a1d8b1b57647b8fa3c49db00f8c342afa1d" ); values3.put("size", "18699");