Skip to content

Commit

Permalink
Merge pull request #32 from DataONEorg/feature-30-code-cleanup
Browse files Browse the repository at this point in the history
Feature-30: Code Cleanup
  • Loading branch information
doulikecookiedough authored Jun 30, 2023
2 parents 0bb82b9 + 18a55ca commit 6893e89
Show file tree
Hide file tree
Showing 7 changed files with 429 additions and 387 deletions.
21 changes: 10 additions & 11 deletions src/main/java/org/dataone/hashstore/HashAddress.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* HashAddress is a class that models a unique identifier for a file in the
* Hashstore. It encapsulates information about the file's name, path, and
* associated hash digest values. By using HashAddress objects, client code can
* easily locate, retrieve, and modify files in the Hashstore without needing to
* easily locate, retrieve, and modify files in the HashStore without needing to
* know the underlying file system details.
*/
public class HashAddress {
Expand All @@ -20,17 +20,16 @@ public class HashAddress {
/**
* Creates a new instance of HashAddress with the given properties.
*
* @param id the unique identifier for the file
* @param relPath the relative path of the file within the hash store
* @param absPath the absolute path of the file on the local file system
* @param isDuplicate a flag indicating if the file is a duplicate of an
* @param id Unique identifier for the file
* @param relPath Relative path of the file within the hash store
* @param absPath Absolute path of the file on the local file system
* @param isDuplicate Flag indicating if the file is a duplicate of an
* existing file
* @param hexDigests a map of hash algorithm names to their hex-encoded
* @param hexDigests A map of hash algorithm names to their hex-encoded
* digest values for the file
*/
public HashAddress(String id, String relPath, Path absPath, boolean isDuplicate,
Map<String, String> hexDigests) {
// Constructor implementation
this.id = id;
this.relPath = relPath;
this.absPath = absPath;
Expand All @@ -50,7 +49,7 @@ public String getId() {
/**
* Return the relative path to the file
*
* @return relative path
* @return relPath
*/
public String getRelPath() {
return relPath;
Expand All @@ -59,7 +58,7 @@ public String getRelPath() {
/**
* Return the absolute path to the file
*
* @return absolute path
* @return absPath
*/
public Path getAbsPath() {
return absPath;
Expand All @@ -68,7 +67,7 @@ public Path getAbsPath() {
/**
* Return the flag of whether a file is a duplicate or not
*
* @return true if the file is not a duplicate
* @return True if the file is not a duplicate, false otherwise
*/
public boolean getIsDuplicate() {
return isDuplicate;
Expand All @@ -77,7 +76,7 @@ public boolean getIsDuplicate() {
/**
* Return a map of hex digests
*
* @return hex digest map
* @return hexDigests
*/
public Map<String, String> getHexDigests() {
return hexDigests;
Expand Down
60 changes: 30 additions & 30 deletions src/main/java/org/dataone/hashstore/HashStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,29 @@
import org.dataone.hashstore.exceptions.PidObjectExistsException;

/**
* HashStore is a content-addressable file management system that utilizes a
* persistent identifier (PID) in the form of a hex digest value to address
* files. The system stores files in a file store and provides an API for
* interacting with the store. HashStore storage classes (like `FileHashStore`)
* must implement the HashStore interface to ensure proper usage of the system.
* HashStore is a content-addressable file management system that utilizes the
* hash/hex digest of a given persistent identifier (PID) to address
* files. The system stores both objects and metadata in its respective
* directories and provides an API for interacting with the store. HashStore
* storage classes (like `FileHashStore`) must implement the HashStore interface
* to ensure proper usage of the system.
*/
public interface HashStore {
/**
* The `storeObject` method is responsible for the atomic storage of objects to
* disk using a given InputStream and a persistent identifier (pid). Upon
* successful storage, the method returns a HashAddress object containing
* relevant file information, such as the file's id, relative path, absolute
* path, duplicate object status, and hex digest map of algorithms and
* checksums. `storeObject` also ensures that an object is stored only once by
* synchronizing multiple calls and rejecting calls to store duplicate objects.
* HashStore using a given InputStream and a persistent identifier (pid). Upon
* successful storage, the method returns a (HashAddress) object containing
* the object's file information, such as the id, relative path, absolute
* path, duplicate object status, and hex digest map of algorithms and hex
* digests/checksums. An object is stored once and only once - and `storeObject`
* also enforces this rule by synchronizing multiple calls and rejecting calls
* to store duplicate objects.
*
* The file's id is determined by calculating the SHA-256 hex digest of the
* provided pid, which is also used as the permanent address of the file. The
* file's identifier is then sharded using a depth of 3 and width of 2,
* delimited by '/' and concatenated to produce the final permanent address
* and is stored in the `/[...storeDirectory]/objects/` directory.
* delimited by '/' and concatenated to produce the final permanent address,
* which is stored in the object store directory (ex. `./[storePath]/objects/`).
*
* By default, the hex digest map includes the following hash algorithms: MD5,
* SHA-1, SHA-256, SHA-384 and SHA-512, which are the most commonly used
Expand Down Expand Up @@ -63,18 +65,18 @@ HashAddress storeObject(InputStream object, String pid, String additionalAlgorit

/**
* The `storeMetadata` method is responsible for adding/updating metadata
* (ex. `sysmeta`) to disk using a given InputStream, a persistent identifier
* (pid) and metadata format (formatId). The metadata object contains solely the
* given metadata content.
*
* The permanent address of the metadata document is determined by calculating
* the SHA-256 hex digest of the provided `pid` + `format_id`; and the body
* contains the metadata content (ex. `sysmeta`).
* (ex. `sysmeta`) to the HashStore by using a given InputStream, a persistent
* identifier (`pid`) and metadata format (`formatId`). The permanent address of
* the stored metadata document is determined by calculating the SHA-256 hex
* digest of the provided `pid` + `formatId`.
*
* Upon successful storage of metadata, `storeMetadata` returns a string that
* represents the path of the file's permanent address, as described above.
* Lastly, the metadata objects are stored in parallel to objects in the
* `/store_directory/metadata/` directory.
* `./[storePath]/metadata/` directory.
*
* Note, multiple calls to store the same metadata content will all be accepted,
* but is not guaranteed to execute sequentially.
*
* @param metadata Input stream to metadata document
* @param pid Authority-based identifier
Expand All @@ -94,10 +96,8 @@ String storeMetadata(InputStream metadata, String pid, String formatId)
NoSuchAlgorithmException;

/**
* The `retrieveObject` method retrieves an object from disk using a given
* persistent identifier (pid). If the object exists (determined by calculating
* the object's permanent address using the SHA-256 hash of the given pid), the
* method will open and return a buffered object stream ready to read from.
* The `retrieveObject` method retrieves an object from HashStore using a given
* persistent identifier (pid).
*
* @param pid Authority-based identifier
* @return Object InputStream
Expand All @@ -113,7 +113,7 @@ InputStream retrieveObject(String pid)

/**
* The 'retrieveMetadata' method retrieves the metadata content of a given pid
* and metadata namespace from disk and returns it in the form of a String.
* and metadata namespace from HashStore.
*
* @param pid Authority-based identifier
* @param formatId Metadata namespace/format
Expand All @@ -129,8 +129,8 @@ InputStream retrieveObject(String pid)
InputStream retrieveMetadata(String pid, String formatId) throws Exception;

/**
* The 'deleteObject' method deletes an object permanently from disk using a
* given persistent identifier and any empty subdirectories.
* The 'deleteObject' method deletes an object (and its empty subdirectories)
* permanently from HashStore using a given persistent identifier.
*
* @param pid Authority-based identifier
* @return True if successful
Expand All @@ -144,8 +144,8 @@ InputStream retrieveObject(String pid)

/**
* The 'deleteMetadata' method deletes a metadata document (ex. `sysmeta`)
* permanently from disk using a given persistent identifier and its respective
* metadata namespace.
* permanently from HashStore using a given persistent identifier and its
* respective metadata namespace.
*
* @param pid Authority-based identifier
* @param formatId Metadata namespace/format
Expand Down
Loading

0 comments on commit 6893e89

Please sign in to comment.