Skip to content

Commit

Permalink
Merge pull request #32 from richardrodgers/alg-copy-fix
Browse files Browse the repository at this point in the history
Cleanup of Adapter bugs - prepare for release
  • Loading branch information
richardrodgers authored Dec 25, 2020
2 parents fa563db + bdf90a1 commit 1bd7bff
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 18 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,20 @@ If automatic generation is not desired, an API call disables it.
Another extra is _sealed_ bags. Bags created by Loaders are immutable, meaning they cannot be altered via the API.
But we typically _can_ gain access to the backing bag storage, which we can of course then
change at will. However, if a bag is created as _sealed_ (a method on the Loader), all
method calls that expose the underlying storage will throw IllegalAccess exceptions. So, for example,
we would be _unable_ to obtain a File reference, but _could_ get an I/O stream to the same content.
method calls that expose the underlying storage will throw IllegalAccess exceptions. So, for example, we would
be _unable_ to obtain a File reference to a payload file, but _could_ get an I/O stream to the same content.
In other words, the content can be accessed, but the underlying representation cannot be altered, and
to this degree the bag contents are _tamper-proof_.

Finally, the library bundles several convenience (helper) methods for common operations on bags, using
the _Adapter_ class. One illustrative use case would be to upgrade an existing bag to a better checksum algorithm.
One could code:

Bag oldbag = Loader.load(oldBagZip);
Path newbagZip = Adapter.copy(newbag, oldbag, "SHA-512").toPackage();

Additional methods will be added as other common use-cases are identified.

## Bagger on the command line ##

The library bundles a very simple command-line tool called _Bagger_ that exposes much of the API.
Expand Down
32 changes: 19 additions & 13 deletions src/main/java/edu/mit/lib/bagit/Adapter.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
Expand Down Expand Up @@ -38,10 +39,10 @@ public class Adapter {
*/
public static Filler copy(Bag bag) throws IOException, IllegalAccessException, URISyntaxException {
var csAlgs = bag.csAlgorithms();
var algArray = new String[csAlgs.size()];
csAlgs.toArray(algArray);
var filler = new Filler(Files.createTempDirectory("bag"), bag.tagEncoding(), bag.lineSeparator(), true, algArray);
copyBag(filler, bag, csAlgs, algArray[0]);
var filler = new Filler(Files.createTempDirectory("bag"),
bag.tagEncoding(), bag.lineSeparator(), true,
csAlgs.toArray(String[]::new));
copyBag(filler, bag, csAlgs);
return filler;
}

Expand All @@ -60,10 +61,9 @@ public static Filler copy(Bag bag) throws IOException, IllegalAccessException, U
*/
public static Filler copy(Path base, Bag bag) throws IOException, IllegalAccessException, URISyntaxException {
var csAlgs = bag.csAlgorithms();
var algArray = new String[csAlgs.size()];
csAlgs.toArray(algArray);
var filler = new Filler(base, bag.tagEncoding(), bag.lineSeparator(), false, algArray);
copyBag(filler, bag, csAlgs, algArray[0]);
var filler = new Filler(base, bag.tagEncoding(), bag.lineSeparator(), false,
csAlgs.toArray(String[]::new));
copyBag(filler, bag, csAlgs);
return filler;
}

Expand All @@ -82,14 +82,21 @@ public static Filler copy(Path base, Bag bag) throws IOException, IllegalAccessE
* @throws URISyntaxException when invalid URIs encountered
*/
public static Filler copy(Path base, Bag bag, String ... csAlgorithms) throws IOException, IllegalAccessException, URISyntaxException {
// ensure we can handle refs - bail if not
var algSet = new HashSet<String>(Arrays.asList(csAlgorithms));
if (! bag.payloadRefs().isEmpty() && ! bag.csAlgorithms().containsAll(algSet)) {
throw new IllegalStateException("Fetch file checksums absent for requested algorithms");
}
var filler = new Filler(base, bag.tagEncoding(), bag.lineSeparator(), false, csAlgorithms);
copyBag(filler, bag, Set.of(csAlgorithms), csAlgorithms[0]);
copyBag(filler, bag, Set.of(csAlgorithms));
return filler;
}

private static void copyBag(Filler filler, Bag bag, Set<String> csAlgs, String csAlg)
private static void copyBag(Filler filler, Bag bag, Set<String> newAlgs)
throws IOException, IllegalAccessException, URISyntaxException {
var manifs = new HashMap<String, Map<String, String>>();
var csAlgs = bag.csAlgorithms();
var csAlg = csAlgs.iterator().next();
var refs = bag.payloadRefs();
var autoGs = autoGens(csAlgs);
var autoMs = autoMetas();
Expand All @@ -113,10 +120,10 @@ private static void copyBag(Filler filler, Bag bag, Set<String> csAlgs, String c
}
}
for (String relPath : refs.keySet()) {
var refParts = refs.get(relPath).split(" ");
var refParts = refs.get(relPath).split("\\s+");
var size = refParts[0].equals("-") ? -1L : Long.parseLong(refParts[0]);
var checksums = new HashMap<String, String>();
for (String alg : csAlgs) {
for (String alg : newAlgs) {
checksums.put(alg, manifs.get(alg).get(relPath));
}
filler.payloadRefUnsafe(relPath, size, new URI(refParts[1]), checksums);
Expand All @@ -129,7 +136,6 @@ private static void copyBag(Filler filler, Bag bag, Set<String> csAlgs, String c
}
}
}

}

private static Set<String> autoGens(Set<String> csAlgs) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/edu/mit/lib/bagit/Bag.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class Bag {
static final String LIB_VSN = "1.0";
static final String DFLT_FMT = "zip";
static final String TGZIP_FMT = "tgz";
static final String SPACER = " ";
static final String SPACER = " ";
// required file and directory names
static final String MANIF_FILE = "manifest-";
static final String TAGMANIF_FILE = "tagmanifest-";
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/edu/mit/lib/bagit/Filler.java
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ public Filler payloadRef(String relPath, InputStream in, URI uri) throws IOExcep
FlatWriter refWriter = getWriter(REF_FILE);
var destDir = Files.createTempDirectory("null");
var destFile = destDir.resolve("foo");
long size = digestCopy(in, destFile, relPath, manWriters);
long size = digestCopy(in, destFile, DATA_PATH + relPath, manWriters);
var sizeStr = (size > 0L) ? Long.toString(size) : "-";
refWriter.writeLine(uri.toString() + " " + sizeStr + " " + DATA_PATH + relPath);
Files.delete(destFile);
Expand All @@ -428,7 +428,7 @@ public Filler payloadRefUnsafe(String relPath, long size, URI uri, Map<String, S
throw new IllegalStateException("Payload file already exists at: " + relPath);
}
if (! uri.isAbsolute()) {
throw new IOException("URI must be absolute");
throw new IOException("URI must be absolute: '" + uri.toString() + "'");
}
if (! checksums.keySet().equals(csAlgs)) {
throw new IOException("checksums do not match bags");
Expand Down

0 comments on commit 1bd7bff

Please sign in to comment.