From 6c8d11844551b86b7aba85214d6c9d7a259ad417 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:25:45 -0800 Subject: [PATCH 01/12] =?UTF-8?q?release:=20v2.0.0=20=E2=80=94=20compressi?= =?UTF-8?q?on,=20KDF,=20Merkle=20manifests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add gzip compression pipeline, passphrase-based encryption via PBKDF2/scrypt, and Merkle tree manifests for large files. Includes 52 new unit tests, updated API reference, guide, and README documentation. --- CHANGELOG.md | 16 + GUIDE.md | 222 ++++++++- README.md | 21 + ROADMAP.md | 4 +- docs/API.md | 91 +++- index.d.ts | 14 +- index.js | 26 +- jsr.json | 2 +- package.json | 2 +- src/domain/schemas/ManifestSchema.d.ts | 27 + src/domain/schemas/ManifestSchema.js | 27 + src/domain/services/CasService.d.ts | 30 +- src/domain/services/CasService.js | 217 +++++++- src/domain/value-objects/Manifest.d.ts | 30 ++ src/domain/value-objects/Manifest.js | 6 + .../adapters/BunCryptoAdapter.js | 37 +- .../adapters/NodeCryptoAdapter.js | 41 +- .../adapters/WebCryptoAdapter.js | 47 ++ src/ports/CryptoPort.js | 17 + .../services/CasService.compression.test.js | 310 ++++++++++++ .../domain/services/CasService.kdf.test.js | 469 ++++++++++++++++++ .../domain/services/CasService.merkle.test.js | 462 +++++++++++++++++ 22 files changed, 2076 insertions(+), 42 deletions(-) create mode 100644 test/unit/domain/services/CasService.compression.test.js create mode 100644 test/unit/domain/services/CasService.kdf.test.js create mode 100644 test/unit/domain/services/CasService.merkle.test.js diff --git a/CHANGELOG.md b/CHANGELOG.md index efb59b4..993da8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.0.0] — M7 Horizon (2026-02-07) + +### Added +- **Compression support** (Task 7.1): Optional gzip compression pipeline via `compression: { algorithm: 'gzip' }` option on `store()`. Compression is applied before encryption when both are enabled. Manifests include a new optional `compression` field. Decompression on `restore()` is automatic. +- **KDF support** (Task 7.2): Passphrase-based encryption using PBKDF2 or scrypt via `deriveKey()` method and `passphrase` option on `store()`/`restore()`. KDF parameters are stored in `manifest.encryption.kdf` for deterministic re-derivation. All three crypto adapters (Node, Bun, Web) implement `deriveKey()`. +- **Merkle tree manifests** (Task 7.3): Large manifests (chunk count exceeding `merkleThreshold`, default 1000) are automatically split into sub-manifests stored as separate blobs. Root manifest uses `version: 2` with `subManifests` references. `readManifest()` transparently reconstitutes v2 manifests into flat chunk lists. Full backward compatibility with v1 manifests. +- New schema fields: `version`, `compression`, `subManifests` on `ManifestSchema`; `kdf` on `EncryptionSchema`. +- 52 new unit tests across three new test suites (compression, KDF, Merkle). +- Updated API reference (`docs/API.md`), guide (`GUIDE.md`), and README with v2.0.0 feature documentation. + +### Changed +- **BREAKING**: Manifest schema now includes `version` field (defaults to 1). Existing v1 manifests are fully backward-compatible. +- `CasService` constructor accepts new `merkleThreshold` option. +- `store()` accepts `passphrase`, `kdfOptions`, and `compression` options. +- `restore()` accepts `passphrase` option. + ## [1.6.2] — OIDC publishing + JSR docs coverage (2026-02-07) ### Added diff --git a/GUIDE.md b/GUIDE.md index 1aeffcf..cb388ad 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -18,10 +18,13 @@ along from first principles to full mastery. 7. [The CLI](#7-the-cli) 8. [Lifecycle Management](#8-lifecycle-management) 9. [Observability](#9-observability) -10. [Architecture](#10-architecture) -11. [Codec System](#11-codec-system) -12. [Error Handling](#12-error-handling) -13. [FAQ / Troubleshooting](#13-faq--troubleshooting) +10. [Compression](#10-compression) +11. [Passphrase Encryption (KDF)](#11-passphrase-encryption-kdf) +12. [Merkle Manifests](#12-merkle-manifests) +13. [Architecture](#13-architecture) +14. [Codec System](#14-codec-system) +15. [Error Handling](#15-error-handling) +16. [FAQ / Troubleshooting](#16-faq--troubleshooting) --- @@ -756,7 +759,209 @@ await cas.verifyIntegrity(manifest); --- -## 10. Architecture +## 10. Compression + +*New in v2.0.0.* + +`git-cas` supports optional gzip compression. When enabled, file content is +compressed before encryption (if any) and before chunking. This reduces storage +size for compressible data without changing the round-trip contract. + +### Storing with Compression + +Pass the `compression` option when storing: + +```js +const manifest = await cas.storeFile({ + filePath: './vacation.jpg', + slug: 'photos/vacation', + compression: { algorithm: 'gzip' }, +}); + +console.log(manifest.compression); +// { algorithm: 'gzip' } +``` + +The manifest gains an optional `compression` field recording the algorithm used. + +### Compression + Encryption + +Compression and encryption compose naturally. Compression runs first (on +plaintext), then encryption runs on the compressed bytes: + +```js +const manifest = await cas.storeFile({ + filePath: './data.csv', + slug: 'reports/q4', + compression: { algorithm: 'gzip' }, + encryptionKey, +}); +``` + +### Restoring Compressed Content + +Decompression on `restore()` is automatic. If the manifest includes a +`compression` field, the restored bytes are decompressed after decryption +(if encrypted) and after chunk reassembly: + +```js +await cas.restoreFile({ + manifest, + outputPath: './restored.csv', +}); +// restored.csv is byte-identical to the original data.csv +``` + +### When to Use Compression + +Compression is most useful for text, CSV, JSON, XML, and other compressible +formats. For already-compressed data (JPEG, PNG, MP4, ZIP), compression adds +CPU cost without meaningful size reduction. Use your judgement. + +--- + +## 11. Passphrase Encryption (KDF) + +*New in v2.0.0.* + +Instead of managing raw 32-byte encryption keys, you can derive keys from +passphrases using standard key derivation functions (KDFs). `git-cas` supports +PBKDF2 (default) and scrypt. + +### Storing with a Passphrase + +Pass `passphrase` instead of `encryptionKey`: + +```js +const manifest = await cas.storeFile({ + filePath: './vacation.jpg', + slug: 'photos/vacation', + passphrase: 'my secret passphrase', +}); + +console.log(manifest.encryption.kdf); +// { +// algorithm: 'pbkdf2', +// salt: 'base64-encoded-salt', +// iterations: 100000, +// hash: 'sha-512', +// keyLength: 32 +// } +``` + +KDF parameters (salt, iterations, algorithm) are stored in the manifest's +`encryption.kdf` field. The salt is generated randomly for each store +operation. + +### Restoring with a Passphrase + +Provide the same passphrase on restore. The KDF parameters in the manifest +are used to re-derive the key: + +```js +await cas.restoreFile({ + manifest, + passphrase: 'my secret passphrase', + outputPath: './restored.jpg', +}); +``` + +A wrong passphrase produces a wrong key, which fails with `INTEGRITY_ERROR` +(AES-256-GCM detects it). + +### Using scrypt + +Pass `kdfOptions` to select scrypt: + +```js +const manifest = await cas.storeFile({ + filePath: './secret.bin', + slug: 'vault', + passphrase: 'strong passphrase', + kdfOptions: { algorithm: 'scrypt', cost: 16384 }, +}); +``` + +### Manual Key Derivation + +For advanced workflows, derive the key yourself: + +```js +const { key, salt, params } = await cas.deriveKey({ + passphrase: 'my secret passphrase', + algorithm: 'pbkdf2', + iterations: 200000, +}); + +// Use the derived key directly +const manifest = await cas.storeFile({ + filePath: './vacation.jpg', + slug: 'photos/vacation', + encryptionKey: key, +}); +``` + +### Supported KDF Algorithms + +| Algorithm | Default Params | Notes | +|-----------|---------------|-------| +| `pbkdf2` (default) | 100,000 iterations, SHA-512 | Widely supported, good baseline | +| `scrypt` | N=16384, r=8, p=1 | Memory-hard, stronger against GPU attacks | + +--- + +## 12. Merkle Manifests + +*New in v2.0.0.* + +When storing very large files, the manifest (which lists every chunk) can +itself become large. Merkle manifests solve this by splitting the chunk list +into sub-manifests, each stored as a separate Git blob. The root manifest +references sub-manifests by OID. + +### How It Works + +When the chunk count exceeds `merkleThreshold` (default: 1000), `git-cas` +automatically: + +1. Groups chunks into sub-manifests (each containing up to `merkleThreshold` + chunks). +2. Stores each sub-manifest as a Git blob. +3. Writes a root manifest with `version: 2` and a `subManifests` array + referencing the sub-manifest blob OIDs. + +### Configuring the Threshold + +Set `merkleThreshold` at construction time: + +```js +const cas = new ContentAddressableStore({ + plumbing: git, + merkleThreshold: 500, // Split at 500 chunks instead of 1000 +}); +``` + +### Transparent Reconstitution + +`readManifest()` transparently handles both v1 (flat) and v2 (Merkle) +manifests. When it encounters a v2 manifest, it reads all sub-manifests, +concatenates their chunk lists, and returns a flat `Manifest` object: + +```js +const manifest = await cas.readManifest({ treeOid }); +// Works identically whether the manifest is v1 or v2 +console.log(manifest.chunks.length); // Full chunk list, regardless of structure +``` + +### Backward Compatibility + +- v2 code reads v1 manifests without any changes. +- v1 manifests (chunk count below threshold) continue to use the flat format. +- The `version` field defaults to `1` for existing manifests. + +--- + +## 13. Architecture `git-cas` follows a hexagonal (ports and adapters) architecture. The domain logic in `CasService` has zero direct dependencies on Node.js, Git, or any @@ -824,6 +1029,7 @@ class CryptoPort { encryptBuffer(buffer, key) {} // Returns { buf, meta } decryptBuffer(buffer, key, meta) {} // Returns Buffer createEncryptionStream(key) {} // Returns { encrypt, finalize } + deriveKey(options) {} // Returns { key, salt, params } (v2.0.0) } ``` @@ -889,7 +1095,7 @@ const cas = new ContentAddressableStore({ --- -## 11. Codec System +## 14. Codec System ### JSON Codec @@ -978,7 +1184,7 @@ The manifest will be stored in the tree as `manifest.msgpack`. --- -## 12. Error Handling +## 15. Error Handling All errors thrown by `git-cas` are instances of `CasError`, which extends `Error` with two additional properties: @@ -1061,7 +1267,7 @@ try { --- -## 13. FAQ / Troubleshooting +## 16. FAQ / Troubleshooting ### Q: Does this work with bare repositories? diff --git a/README.md b/README.md index 2b165ba..2588b5d 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,9 @@ We use the object database. - **Dedupe for free** Git already hashes objects. We just lean into it. - **Chunked storage** big files become stable, reusable blobs. - **Optional AES-256-GCM encryption** store secrets without leaking plaintext into the ODB. +- **Compression** gzip before encryption — smaller blobs, same round-trip. +- **Passphrase encryption** derive keys from passphrases via PBKDF2 or scrypt — no raw key management. +- **Merkle manifests** large files auto-split into sub-manifests for scalability. - **Manifests** a tiny explicit index of chunks + metadata (JSON/CBOR). - **Tree output** generates standard Git trees so assets snap into commits cleanly. - **Full round-trip** store, tree, and restore — get your bytes back, verified. @@ -28,6 +31,16 @@ We use the object database. **Use it for:** binary assets, build artifacts, model weights, data packs, secret bundles, weird experiments, etc. +## What's new in v2.0.0 + +**Compression** — `compression: { algorithm: 'gzip' }` on `store()`. Compression runs before encryption. Decompression on `restore()` is automatic. + +**Passphrase-based encryption** — Pass `passphrase` instead of `encryptionKey`. Keys are derived via PBKDF2 (default) or scrypt. KDF parameters are stored in the manifest for deterministic re-derivation. Use `deriveKey()` directly for manual control. + +**Merkle tree manifests** — When chunk count exceeds `merkleThreshold` (default: 1000), manifests are automatically split into sub-manifests stored as separate blobs. `readManifest()` transparently reconstitutes them. Full backward compatibility with v1 manifests. + +See [CHANGELOG.md](./CHANGELOG.md) for the full list of changes. + ## Usage (Node API) ```js @@ -56,6 +69,14 @@ const m = await cas.readManifest({ treeOid }); // Lifecycle: inspect deletion impact, find orphaned chunks const { slug, chunksOrphaned } = await cas.deleteAsset({ treeOid }); const { referenced, total } = await cas.findOrphanedChunks({ treeOids: [treeOid] }); + +// v2.0.0: Compressed + passphrase-encrypted store +const manifest2 = await cas.storeFile({ + filePath: './image.png', + slug: 'my-image', + passphrase: 'my secret passphrase', + compression: { algorithm: 'gzip' }, +}); ``` ## CLI (git plugin) diff --git a/ROADMAP.md b/ROADMAP.md index e5ab937..9dc8be6 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -134,7 +134,7 @@ Return and throw semantics for every public method (current and planned). | v1.4.0 | M4 | Compass | Lifecycle management | ✅ | | v1.5.0 | M5 | Sonar | Observability | ✅ | | v1.6.0 | M6 | Cartographer | Documentation | ✅ | -| v2.0.0 | M7 | Horizon | Advanced features | | +| v2.0.0 | M7 | Horizon | Advanced features | ✅ | --- @@ -1461,7 +1461,7 @@ As a new user, I want runnable examples so I can integrate quickly and correctly --- -# M7 — Horizon (v2.0.0) +# M7 — Horizon (v2.0.0) ✅ **Theme:** Advanced capabilities that may change manifest format; major version bump. --- diff --git a/docs/API.md b/docs/API.md index 8cd3297..b7774eb 100644 --- a/docs/API.md +++ b/docs/API.md @@ -29,6 +29,7 @@ new ContentAddressableStore(options) - `options.codec` (optional): CodecPort implementation (default: JsonCodec) - `options.crypto` (optional): CryptoPort implementation (default: auto-detected) - `options.policy` (optional): Resilience policy from `@git-stunts/alfred` for Git I/O +- `options.merkleThreshold` (optional): Chunk count threshold for Merkle manifests (default: 1000) **Example:** @@ -107,7 +108,7 @@ const service = await cas.getService(); #### store ```javascript -await cas.store({ source, slug, filename, encryptionKey }) +await cas.store({ source, slug, filename, encryptionKey, passphrase, kdfOptions, compression }) ``` Stores content from an async iterable source. @@ -118,6 +119,9 @@ Stores content from an async iterable source. - `slug` (required): `string` - Unique identifier for the asset - `filename` (required): `string` - Original filename - `encryptionKey` (optional): `Buffer` - 32-byte encryption key +- `passphrase` (optional): `string` - Derive encryption key from passphrase (alternative to `encryptionKey`) +- `kdfOptions` (optional): `Object` - KDF options when using `passphrase` (`{ algorithm, iterations, cost, ... }`) +- `compression` (optional): `{ algorithm: 'gzip' }` - Enable compression before encryption/chunking **Returns:** `Promise` @@ -154,6 +158,9 @@ Convenience method that opens a file and stores it. - `slug` (required): `string` - Unique identifier for the asset - `filename` (optional): `string` - Filename (defaults to basename of filePath) - `encryptionKey` (optional): `Buffer` - 32-byte encryption key +- `passphrase` (optional): `string` - Derive encryption key from passphrase +- `kdfOptions` (optional): `Object` - KDF options when using `passphrase` +- `compression` (optional): `{ algorithm: 'gzip' }` - Enable compression **Returns:** `Promise` @@ -171,7 +178,7 @@ const manifest = await cas.storeFile({ #### restore ```javascript -await cas.restore({ manifest, encryptionKey }) +await cas.restore({ manifest, encryptionKey, passphrase }) ``` Restores content from a manifest and returns the buffer. @@ -180,6 +187,7 @@ Restores content from a manifest and returns the buffer. - `manifest` (required): `Manifest` - Manifest object - `encryptionKey` (optional): `Buffer` - 32-byte encryption key (required if content is encrypted) +- `passphrase` (optional): `string` - Passphrase for KDF-based decryption (alternative to `encryptionKey`) **Returns:** `Promise<{ buffer: Buffer, bytesWritten: number }>` @@ -209,6 +217,7 @@ Restores content from a manifest and writes it to a file. - `manifest` (required): `Manifest` - Manifest object - `encryptionKey` (optional): `Buffer` - 32-byte encryption key +- `passphrase` (optional): `string` - Passphrase for KDF-based decryption - `outputPath` (required): `string` - Path to write the restored file **Returns:** `Promise<{ bytesWritten: number }>` @@ -323,6 +332,48 @@ console.log(`Asset "${slug}" has ${chunksOrphaned} chunks to clean up`); // Caller must remove refs pointing to treeOid; run `git gc --prune` to reclaim space ``` +#### deriveKey + +```javascript +await cas.deriveKey(options) +``` + +Derives an encryption key from a passphrase using PBKDF2 or scrypt. + +**Parameters:** + +- `options.passphrase` (required): `string` - The passphrase +- `options.salt` (optional): `Buffer` - Salt (random if omitted) +- `options.algorithm` (optional): `'pbkdf2' | 'scrypt'` - KDF algorithm (default: `'pbkdf2'`) +- `options.iterations` (optional): `number` - PBKDF2 iterations (default: 100000) +- `options.cost` (optional): `number` - scrypt cost parameter N (default: 16384) +- `options.blockSize` (optional): `number` - scrypt block size r (default: 8) +- `options.parallelization` (optional): `number` - scrypt parallelization p (default: 1) +- `options.keyLength` (optional): `number` - Derived key length (default: 32) + +**Returns:** `Promise<{ key: Buffer, salt: Buffer, params: Object }>` + +- `key` — the derived 32-byte encryption key +- `salt` — the salt used (save this for re-derivation) +- `params` — full KDF parameters object (stored in manifest when using `passphrase` option) + +**Example:** + +```javascript +const { key, salt, params } = await cas.deriveKey({ + passphrase: 'my secret passphrase', + algorithm: 'pbkdf2', + iterations: 200000, +}); + +// Use the derived key for encryption +const manifest = await cas.storeFile({ + filePath: '/path/to/file.txt', + slug: 'my-asset', + encryptionKey: key, +}); +``` + #### findOrphanedChunks ```javascript @@ -443,6 +494,7 @@ new CasService({ persistence, codec, crypto, chunkSize }) - `codec` (required): `CodecPort` implementation - `crypto` (required): `CryptoPort` implementation - `chunkSize` (optional): `number` - Chunk size in bytes (default: 262144, minimum: 1024) +- `merkleThreshold` (optional): `number` - Chunk count threshold for Merkle manifests (default: 1000) **Throws:** `Error` if chunkSize is less than 1024 bytes @@ -607,7 +659,10 @@ new Manifest(data) - `data.filename` (required): `string` - Original filename (min length: 1) - `data.size` (required): `number` - Total file size in bytes (>= 0) - `data.chunks` (required): `Array` - Chunk metadata array -- `data.encryption` (optional): `Object` - Encryption metadata +- `data.encryption` (optional): `Object` - Encryption metadata (may include `kdf` field for passphrase-derived keys) +- `data.version` (optional): `number` - Manifest version (1 = flat, 2 = Merkle; default: 1) +- `data.compression` (optional): `Object` - Compression metadata `{ algorithm: 'gzip' }` +- `data.subManifests` (optional): `Array` - Sub-manifest references (v2 Merkle manifests only) **Throws:** `Error` if data does not match ManifestSchema @@ -635,7 +690,10 @@ const manifest = new Manifest({ - `filename`: `string` - Original filename - `size`: `number` - Total file size - `chunks`: `Array` - Array of Chunk objects -- `encryption`: `Object | undefined` - Encryption metadata +- `encryption`: `Object | undefined` - Encryption metadata (may include `kdf` sub-object) +- `version`: `number` - Manifest version (1 or 2, default: 1) +- `compression`: `Object | undefined` - Compression metadata `{ algorithm }` +- `subManifests`: `Array | undefined` - Sub-manifest references (v2 only) #### Methods @@ -933,6 +991,27 @@ Creates a streaming encryption context. - `encrypt`: `(source: AsyncIterable) => AsyncIterable` - Transform function - `finalize`: `() => { algorithm: string, nonce: string, tag: string, encrypted: boolean }` - Get metadata +##### deriveKey + +```javascript +await port.deriveKey(options) +``` + +Derives an encryption key from a passphrase using PBKDF2 or scrypt. + +**Parameters:** + +- `options.passphrase`: `string` - The passphrase +- `options.salt` (optional): `Buffer` - Salt (random if omitted) +- `options.algorithm` (optional): `'pbkdf2' | 'scrypt'` - KDF algorithm (default: `'pbkdf2'`) +- `options.iterations` (optional): `number` - PBKDF2 iterations +- `options.cost` (optional): `number` - scrypt cost N +- `options.blockSize` (optional): `number` - scrypt block size r +- `options.parallelization` (optional): `number` - scrypt parallelization p +- `options.keyLength` (optional): `number` - Derived key length (default: 32) + +**Returns:** `Promise<{ key: Buffer, salt: Buffer, params: Object }>` + **Example Implementation:** ```javascript @@ -958,6 +1037,10 @@ class CustomCryptoAdapter extends CryptoPort { createEncryptionStream(key) { // Implementation } + + async deriveKey(options) { + // Implementation + } } ``` diff --git a/index.d.ts b/index.d.ts index 580f479..2ed4f24 100644 --- a/index.d.ts +++ b/index.d.ts @@ -4,7 +4,7 @@ */ import Manifest from "./src/domain/value-objects/Manifest.js"; -import type { EncryptionMeta, ManifestData } from "./src/domain/value-objects/Manifest.js"; +import type { EncryptionMeta, ManifestData, CompressionMeta, KdfParams, SubManifestRef } from "./src/domain/value-objects/Manifest.js"; import Chunk from "./src/domain/value-objects/Chunk.js"; import CasService from "./src/domain/services/CasService.js"; import type { @@ -12,10 +12,12 @@ import type { CodecPort, GitPersistencePort, CasServiceOptions, + DeriveKeyOptions, + DeriveKeyResult, } from "./src/domain/services/CasService.js"; export { CasService, Manifest, Chunk }; -export type { EncryptionMeta, ManifestData, CryptoPort, CodecPort, GitPersistencePort, CasServiceOptions }; +export type { EncryptionMeta, ManifestData, CompressionMeta, KdfParams, SubManifestRef, CryptoPort, CodecPort, GitPersistencePort, CasServiceOptions, DeriveKeyOptions, DeriveKeyResult }; /** Abstract port for cryptographic operations. */ export declare class CryptoPortBase { @@ -30,6 +32,7 @@ export declare class CryptoPortBase { encrypt: (source: AsyncIterable) => AsyncIterable; finalize: () => EncryptionMeta; }; + deriveKey(options: DeriveKeyOptions): Promise; } /** Abstract port for persisting data to Git's object database. */ @@ -126,17 +129,22 @@ export default class ContentAddressableStore { slug: string; filename: string; encryptionKey?: Buffer; + passphrase?: string; + kdfOptions?: Omit; + compression?: { algorithm: "gzip" }; }): Promise; restoreFile(options: { manifest: Manifest; encryptionKey?: Buffer; + passphrase?: string; outputPath: string; }): Promise<{ bytesWritten: number }>; restore(options: { manifest: Manifest; encryptionKey?: Buffer; + passphrase?: string; }): Promise<{ buffer: Buffer; bytesWritten: number }>; createTree(options: { manifest: Manifest }): Promise; @@ -152,4 +160,6 @@ export default class ContentAddressableStore { findOrphanedChunks(options: { treeOids: string[]; }): Promise<{ referenced: Set; total: number }>; + + deriveKey(options: DeriveKeyOptions): Promise; } diff --git a/index.js b/index.js index 0dd378a..8fac546 100644 --- a/index.js +++ b/index.js @@ -196,6 +196,9 @@ export default class ContentAddressableStore { * @param {string} options.slug - Logical identifier for the stored asset. * @param {string} options.filename - Filename for the manifest. * @param {Buffer} [options.encryptionKey] - 32-byte key for AES-256-GCM encryption. + * @param {string} [options.passphrase] - Derive encryption key from passphrase. + * @param {Object} [options.kdfOptions] - KDF options when using passphrase. + * @param {{ algorithm: 'gzip' }} [options.compression] - Enable compression. * @returns {Promise} The resulting manifest. */ async store(options) { @@ -208,14 +211,16 @@ export default class ContentAddressableStore { * @param {Object} options * @param {import('./src/domain/value-objects/Manifest.js').default} options.manifest - The file manifest. * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. + * @param {string} [options.passphrase] - Passphrase for KDF-based decryption. * @param {string} options.outputPath - Destination file path. * @returns {Promise<{ bytesWritten: number }>} */ - async restoreFile({ manifest, encryptionKey, outputPath }) { + async restoreFile({ manifest, encryptionKey, passphrase, outputPath }) { const service = await this.#getService(); const { buffer, bytesWritten } = await service.restore({ manifest, encryptionKey, + passphrase, }); writeFileSync(outputPath, buffer); return { bytesWritten }; @@ -226,6 +231,7 @@ export default class ContentAddressableStore { * @param {Object} options * @param {import('./src/domain/value-objects/Manifest.js').default} options.manifest - The file manifest. * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. + * @param {string} [options.passphrase] - Passphrase for KDF-based decryption. * @returns {Promise<{ buffer: Buffer, bytesWritten: number }>} */ async restore(options) { @@ -288,4 +294,22 @@ export default class ContentAddressableStore { const service = await this.#getService(); return await service.findOrphanedChunks(options); } + + /** + * Derives an encryption key from a passphrase using PBKDF2 or scrypt. + * @param {Object} options + * @param {string} options.passphrase - The passphrase. + * @param {Buffer} [options.salt] - Salt (random if omitted). + * @param {'pbkdf2'|'scrypt'} [options.algorithm='pbkdf2'] - KDF algorithm. + * @param {number} [options.iterations] - PBKDF2 iterations. + * @param {number} [options.cost] - scrypt cost (N). + * @param {number} [options.blockSize] - scrypt block size (r). + * @param {number} [options.parallelization] - scrypt parallelization (p). + * @param {number} [options.keyLength=32] - Derived key length. + * @returns {Promise<{ key: Buffer, salt: Buffer, params: Object }>} + */ + async deriveKey(options) { + const service = await this.#getService(); + return await service.deriveKey(options); + } } diff --git a/jsr.json b/jsr.json index d990c9c..c3a4720 100644 --- a/jsr.json +++ b/jsr.json @@ -1,6 +1,6 @@ { "name": "@git-stunts/git-cas", - "version": "1.6.2", + "version": "2.0.0", "exports": { ".": "./index.js", "./service": "./src/domain/services/CasService.js", diff --git a/package.json b/package.json index 21cd638..d3f2619 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@git-stunts/git-cas", - "version": "1.6.2", + "version": "2.0.0", "description": "Content-addressed storage backed by Git's object database, with optional encryption and pluggable codecs", "type": "module", "main": "index.js", diff --git a/src/domain/schemas/ManifestSchema.d.ts b/src/domain/schemas/ManifestSchema.d.ts index 83cd4cf..5813199 100644 --- a/src/domain/schemas/ManifestSchema.d.ts +++ b/src/domain/schemas/ManifestSchema.d.ts @@ -13,19 +13,46 @@ export declare const ChunkSchema: z.ZodObject<{ blob: z.ZodString; }>; +/** Validates KDF parameters stored alongside encryption metadata. */ +export declare const KdfSchema: z.ZodObject<{ + algorithm: z.ZodEnum<["pbkdf2", "scrypt"]>; + salt: z.ZodString; + iterations: z.ZodOptional; + cost: z.ZodOptional; + blockSize: z.ZodOptional; + parallelization: z.ZodOptional; + keyLength: z.ZodDefault; +}>; + /** Validates the encryption metadata attached to an encrypted manifest. */ export declare const EncryptionSchema: z.ZodObject<{ algorithm: z.ZodString; nonce: z.ZodString; tag: z.ZodString; encrypted: z.ZodDefault; + kdf: z.ZodOptional; +}>; + +/** Validates compression metadata. */ +export declare const CompressionSchema: z.ZodObject<{ + algorithm: z.ZodEnum<["gzip"]>; +}>; + +/** Validates a sub-manifest reference in a v2 Merkle manifest. */ +export declare const SubManifestRefSchema: z.ZodObject<{ + oid: z.ZodString; + chunkCount: z.ZodNumber; + startIndex: z.ZodNumber; }>; /** Validates a complete file manifest. */ export declare const ManifestSchema: z.ZodObject<{ + version: z.ZodDefault; slug: z.ZodString; filename: z.ZodString; size: z.ZodNumber; chunks: z.ZodArray; encryption: z.ZodOptional; + compression: z.ZodOptional; + subManifests: z.ZodOptional>; }>; diff --git a/src/domain/schemas/ManifestSchema.js b/src/domain/schemas/ManifestSchema.js index 298b006..970bf35 100644 --- a/src/domain/schemas/ManifestSchema.js +++ b/src/domain/schemas/ManifestSchema.js @@ -13,19 +13,46 @@ export const ChunkSchema = z.object({ blob: z.string().min(1), // Git OID }); +/** Validates KDF parameters stored alongside encryption metadata. */ +export const KdfSchema = z.object({ + algorithm: z.enum(['pbkdf2', 'scrypt']), + salt: z.string().min(1), + iterations: z.number().int().positive().optional(), + cost: z.number().int().positive().optional(), + blockSize: z.number().int().positive().optional(), + parallelization: z.number().int().positive().optional(), + keyLength: z.number().int().positive().default(32), +}); + /** Validates the encryption metadata attached to an encrypted manifest. */ export const EncryptionSchema = z.object({ algorithm: z.string(), nonce: z.string(), tag: z.string(), encrypted: z.boolean().default(true), + kdf: KdfSchema.optional(), +}); + +/** Validates compression metadata. */ +export const CompressionSchema = z.object({ + algorithm: z.enum(['gzip']), +}); + +/** Validates a sub-manifest reference in a v2 Merkle manifest. */ +export const SubManifestRefSchema = z.object({ + oid: z.string().min(1), + chunkCount: z.number().int().positive(), + startIndex: z.number().int().min(0), }); /** Validates a complete file manifest. */ export const ManifestSchema = z.object({ + version: z.number().int().min(1).max(2).default(1), slug: z.string().min(1), filename: z.string().min(1), size: z.number().int().min(0), chunks: z.array(ChunkSchema), encryption: EncryptionSchema.optional(), + compression: CompressionSchema.optional(), + subManifests: z.array(SubManifestRefSchema).optional(), }); diff --git a/src/domain/services/CasService.d.ts b/src/domain/services/CasService.d.ts index 86271c7..a98fc2c 100644 --- a/src/domain/services/CasService.d.ts +++ b/src/domain/services/CasService.d.ts @@ -5,7 +5,7 @@ import { EventEmitter } from "node:events"; import Manifest from "../value-objects/Manifest.js"; -import type { EncryptionMeta } from "../value-objects/Manifest.js"; +import type { EncryptionMeta, CompressionMeta, KdfParams } from "../value-objects/Manifest.js"; /** Port interface for cryptographic operations (hashing, encryption, random bytes). */ export interface CryptoPort { @@ -20,6 +20,7 @@ export interface CryptoPort { encrypt: (source: AsyncIterable) => AsyncIterable; finalize: () => EncryptionMeta; }; + deriveKey(options: DeriveKeyOptions): Promise; } /** Port interface for encoding and decoding manifest data. */ @@ -45,6 +46,26 @@ export interface CasServiceOptions { codec: CodecPort; crypto: CryptoPort; chunkSize?: number; + merkleThreshold?: number; +} + +/** Options for key derivation. */ +export interface DeriveKeyOptions { + passphrase: string; + salt?: Buffer; + algorithm?: "pbkdf2" | "scrypt"; + iterations?: number; + cost?: number; + blockSize?: number; + parallelization?: number; + keyLength?: number; +} + +/** Result from key derivation. */ +export interface DeriveKeyResult { + key: Buffer; + salt: Buffer; + params: KdfParams; } /** @@ -58,6 +79,7 @@ export default class CasService extends EventEmitter { readonly codec: CodecPort; readonly crypto: CryptoPort; readonly chunkSize: number; + readonly merkleThreshold: number; constructor(options: CasServiceOptions); @@ -77,6 +99,9 @@ export default class CasService extends EventEmitter { slug: string; filename: string; encryptionKey?: Buffer; + passphrase?: string; + kdfOptions?: Omit; + compression?: { algorithm: "gzip" }; }): Promise; createTree(options: { manifest: Manifest }): Promise; @@ -84,6 +109,7 @@ export default class CasService extends EventEmitter { restore(options: { manifest: Manifest; encryptionKey?: Buffer; + passphrase?: string; }): Promise<{ buffer: Buffer; bytesWritten: number }>; readManifest(options: { treeOid: string }): Promise; @@ -97,4 +123,6 @@ export default class CasService extends EventEmitter { }): Promise<{ referenced: Set; total: number }>; verifyIntegrity(manifest: Manifest): Promise; + + deriveKey(options: DeriveKeyOptions): Promise; } diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index ea6ce07..1773ae5 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -4,9 +4,13 @@ * @module */ import { EventEmitter } from 'node:events'; +import { gunzip } from 'node:zlib'; +import { promisify } from 'node:util'; import Manifest from '../value-objects/Manifest.js'; import CasError from '../errors/CasError.js'; +const gunzipAsync = promisify(gunzip); + /** * Domain service for Content Addressable Storage operations. * @@ -29,8 +33,9 @@ export default class CasService extends EventEmitter { * @param {import('../../ports/CodecPort.js').default} options.codec * @param {import('../../ports/CryptoPort.js').default} options.crypto * @param {number} [options.chunkSize=262144] - 256 KiB + * @param {number} [options.merkleThreshold=1000] - Chunk count threshold for Merkle manifests. */ - constructor({ persistence, codec, crypto, chunkSize = 256 * 1024 }) { + constructor({ persistence, codec, crypto, chunkSize = 256 * 1024, merkleThreshold = 1000 }) { super(); if (chunkSize < 1024) { throw new Error('Chunk size must be at least 1024 bytes'); @@ -39,6 +44,7 @@ export default class CasService extends EventEmitter { this.codec = codec; this.crypto = crypto; this.chunkSize = chunkSize; + this.merkleThreshold = merkleThreshold; } /** @@ -159,6 +165,23 @@ export default class CasService extends EventEmitter { } } + /** + * Wraps an async iterable through gzip compression. + * @private + * @param {AsyncIterable} source + * @returns {AsyncIterable} + */ + async *_compressStream(source) { + const { createGzip } = await import('node:zlib'); + const { Readable } = await import('node:stream'); + const gz = createGzip(); + const input = Readable.from(source); + const compressed = input.pipe(gz); + for await (const chunk of compressed) { + yield chunk; + } + } + /** * Chunks an async iterable source and stores it in Git. * @@ -170,9 +193,19 @@ export default class CasService extends EventEmitter { * @param {string} options.slug * @param {string} options.filename * @param {Buffer} [options.encryptionKey] + * @param {string} [options.passphrase] - Derive encryption key from passphrase instead. + * @param {Object} [options.kdfOptions] - KDF options when using passphrase. + * @param {{ algorithm: 'gzip' }} [options.compression] - Enable compression. * @returns {Promise} */ - async store({ source, slug, filename, encryptionKey }) { + async store({ source, slug, filename, encryptionKey, passphrase, kdfOptions, compression }) { + let kdfParams; + if (passphrase) { + const derived = await this.deriveKey({ passphrase, ...kdfOptions }); + encryptionKey = derived.key; + kdfParams = derived.params; + } + if (encryptionKey) { this._validateKey(encryptionKey); } @@ -184,12 +217,22 @@ export default class CasService extends EventEmitter { chunks: [], }; + let processedSource = source; + if (compression) { + processedSource = this._compressStream(processedSource); + manifestData.compression = { algorithm: compression.algorithm || 'gzip' }; + } + if (encryptionKey) { const { encrypt, finalize } = this.crypto.createEncryptionStream(encryptionKey); - await this._chunkAndStore(encrypt(source), manifestData); - manifestData.encryption = finalize(); + await this._chunkAndStore(encrypt(processedSource), manifestData); + const encMeta = finalize(); + if (kdfParams) { + encMeta.kdf = kdfParams; + } + manifestData.encryption = encMeta; } else { - await this._chunkAndStore(source, manifestData); + await this._chunkAndStore(processedSource, manifestData); } const manifest = new Manifest(manifestData); @@ -210,12 +253,65 @@ export default class CasService extends EventEmitter { * @returns {Promise} Git OID of the created tree. */ async createTree({ manifest }) { + const chunks = manifest.chunks; + + if (chunks.length > this.merkleThreshold) { + return await this._createMerkleTree({ manifest }); + } + const serializedManifest = this.codec.encode(manifest.toJSON()); const manifestOid = await this.persistence.writeBlob(serializedManifest); const treeEntries = [ `100644 blob ${manifestOid}\tmanifest.${this.codec.extension}`, - ...manifest.chunks.map((c) => `100644 blob ${c.blob}\t${c.digest}`), + ...chunks.map((c) => `100644 blob ${c.blob}\t${c.digest}`), + ]; + + return await this.persistence.writeTree(treeEntries); + } + + /** + * Creates a Merkle tree by splitting chunks into sub-manifests. + * @private + * @param {Object} options + * @param {import('../value-objects/Manifest.js').default} options.manifest + * @returns {Promise} Git tree OID. + */ + async _createMerkleTree({ manifest }) { + const chunks = [...manifest.chunks]; + const subManifestRefs = []; + const chunkBlobEntries = []; + + for (let i = 0; i < chunks.length; i += this.merkleThreshold) { + const group = chunks.slice(i, i + this.merkleThreshold); + const subManifestData = { chunks: group.map((c) => ({ index: c.index, size: c.size, digest: c.digest, blob: c.blob })) }; + const serialized = this.codec.encode(subManifestData); + const oid = await this.persistence.writeBlob(serialized); + + subManifestRefs.push({ + oid, + chunkCount: group.length, + startIndex: i, + }); + + for (const c of group) { + chunkBlobEntries.push(`100644 blob ${c.blob}\t${c.digest}`); + } + } + + const rootManifestData = { + ...manifest.toJSON(), + version: 2, + chunks: [], + subManifests: subManifestRefs, + }; + + const serializedRoot = this.codec.encode(rootManifestData); + const rootOid = await this.persistence.writeBlob(serializedRoot); + + const treeEntries = [ + `100644 blob ${rootOid}\tmanifest.${this.codec.extension}`, + ...chunkBlobEntries, ]; return await this.persistence.writeTree(treeEntries); @@ -263,31 +359,58 @@ export default class CasService extends EventEmitter { * @throws {CasError} MISSING_KEY if manifest is encrypted but no key is provided. * @throws {CasError} INTEGRITY_ERROR if chunk verification or decryption fails. */ - async restore({ manifest, encryptionKey }) { + /** + * Resolves the encryption key from a passphrase using KDF params from the manifest. + * @private + * @param {string} passphrase + * @param {Object} kdf - KDF params from manifest.encryption.kdf. + * @returns {Promise} + */ + async _resolveKeyFromPassphrase(passphrase, kdf) { + const { key } = await this.deriveKey({ + passphrase, + salt: Buffer.from(kdf.salt, 'base64'), + algorithm: kdf.algorithm, + iterations: kdf.iterations, + cost: kdf.cost, + blockSize: kdf.blockSize, + parallelization: kdf.parallelization, + }); + return key; + } + + /** + * Resolves the encryption key from passphrase or validates the provided key. + * @private + */ + _resolveEncryptionKey(manifest, encryptionKey, passphrase) { + if (passphrase && manifest.encryption?.kdf) { + return this._resolveKeyFromPassphrase(passphrase, manifest.encryption.kdf); + } if (encryptionKey) { this._validateKey(encryptionKey); } - if (manifest.encryption?.encrypted && !encryptionKey) { - throw new CasError( - 'Encryption key required to restore encrypted content', - 'MISSING_KEY', - ); + throw new CasError('Encryption key required to restore encrypted content', 'MISSING_KEY'); } + return Promise.resolve(encryptionKey); + } + + async restore({ manifest, encryptionKey, passphrase }) { + const key = await this._resolveEncryptionKey(manifest, encryptionKey, passphrase); if (manifest.chunks.length === 0) { return { buffer: Buffer.alloc(0), bytesWritten: 0 }; } - const chunks = await this._readAndVerifyChunks(manifest.chunks); - let buffer = Buffer.concat(chunks); + let buffer = Buffer.concat(await this._readAndVerifyChunks(manifest.chunks)); if (manifest.encryption?.encrypted) { - buffer = await this.decrypt({ - buffer, - key: encryptionKey, - meta: manifest.encryption, - }); + buffer = await this.decrypt({ buffer, key, meta: manifest.encryption }); + } + + if (manifest.compression) { + buffer = await gunzipAsync(buffer); } this.emit('file:restored', { @@ -342,9 +465,48 @@ export default class CasService extends EventEmitter { } const decoded = this.codec.decode(blob); + + if (decoded.version === 2 && decoded.subManifests?.length > 0) { + decoded.chunks = await this._resolveSubManifests(decoded.subManifests, treeOid); + } + return new Manifest(decoded); } + /** + * Reads and flattens sub-manifest blobs into a single chunk array. + * @private + * @param {Array<{ oid: string }>} subManifests - Sub-manifest references. + * @param {string} treeOid - Parent tree OID (for error context). + * @returns {Promise} Flattened chunk entries. + */ + async _resolveSubManifests(subManifests, treeOid) { + const allChunks = []; + for (const ref of subManifests) { + const subBlob = await this._readSubManifestBlob(ref.oid, treeOid); + const subDecoded = this.codec.decode(subBlob); + allChunks.push(...subDecoded.chunks); + } + return allChunks; + } + + /** + * Reads a sub-manifest blob, wrapping errors as GIT_ERROR. + * @private + */ + async _readSubManifestBlob(oid, treeOid) { + try { + return await this.persistence.readBlob(oid); + } catch (err) { + if (err instanceof CasError) { throw err; } + throw new CasError( + `Failed to read sub-manifest blob ${oid}: ${err.message}`, + 'GIT_ERROR', + { treeOid, subManifestOid: oid, originalError: err }, + ); + } + } + /** * Returns deletion metadata for an asset stored in a Git tree. * Does not perform any destructive Git operations. @@ -391,6 +553,23 @@ export default class CasService extends EventEmitter { * @param {import('../value-objects/Manifest.js').default} manifest * @returns {Promise} */ + /** + * Derives an encryption key from a passphrase using PBKDF2 or scrypt. + * @param {Object} options + * @param {string} options.passphrase - The passphrase to derive a key from. + * @param {Buffer} [options.salt] - Salt (random if omitted). + * @param {'pbkdf2'|'scrypt'} [options.algorithm='pbkdf2'] - KDF algorithm. + * @param {number} [options.iterations] - PBKDF2 iterations. + * @param {number} [options.cost] - scrypt cost (N). + * @param {number} [options.blockSize] - scrypt block size (r). + * @param {number} [options.parallelization] - scrypt parallelization (p). + * @param {number} [options.keyLength=32] - Derived key length. + * @returns {Promise<{ key: Buffer, salt: Buffer, params: Object }>} + */ + async deriveKey(options) { + return await this.crypto.deriveKey(options); + } + async verifyIntegrity(manifest) { for (const chunk of manifest.chunks) { const blob = await this.persistence.readBlob(chunk.blob); diff --git a/src/domain/value-objects/Manifest.d.ts b/src/domain/value-objects/Manifest.d.ts index b0ef031..ecc9b9a 100644 --- a/src/domain/value-objects/Manifest.d.ts +++ b/src/domain/value-objects/Manifest.d.ts @@ -1,31 +1,61 @@ import Chunk from "./Chunk.js"; +/** KDF parameters stored alongside encryption metadata. */ +export interface KdfParams { + algorithm: "pbkdf2" | "scrypt"; + salt: string; + iterations?: number; + cost?: number; + blockSize?: number; + parallelization?: number; + keyLength: number; +} + /** AES-256-GCM encryption metadata attached to an encrypted manifest. */ export interface EncryptionMeta { algorithm: string; nonce: string; tag: string; encrypted: boolean; + kdf?: KdfParams; +} + +/** Compression metadata. */ +export interface CompressionMeta { + algorithm: "gzip"; +} + +/** Sub-manifest reference in a v2 Merkle manifest. */ +export interface SubManifestRef { + oid: string; + chunkCount: number; + startIndex: number; } /** Raw manifest data accepted by the {@link Manifest} constructor. */ export interface ManifestData { + version?: number; slug: string; filename: string; size: number; chunks: Array<{ index: number; size: number; digest: string; blob: string }>; encryption?: EncryptionMeta; + compression?: CompressionMeta; + subManifests?: SubManifestRef[]; } /** * Immutable value object representing a file manifest. */ export default class Manifest { + readonly version: number; readonly slug: string; readonly filename: string; readonly size: number; readonly chunks: readonly Chunk[]; readonly encryption?: EncryptionMeta; + readonly compression?: CompressionMeta; + readonly subManifests?: readonly SubManifestRef[]; constructor(data: ManifestData); diff --git a/src/domain/value-objects/Manifest.js b/src/domain/value-objects/Manifest.js index 14b4412..07bcf0f 100644 --- a/src/domain/value-objects/Manifest.js +++ b/src/domain/value-objects/Manifest.js @@ -22,11 +22,14 @@ export default class Manifest { constructor(data) { try { ManifestSchema.parse(data); + this.version = data.version || 1; this.slug = data.slug; this.filename = data.filename; this.size = data.size; this.chunks = data.chunks.map((c) => new Chunk(c)); this.encryption = data.encryption ? { ...data.encryption } : undefined; + this.compression = data.compression ? { ...data.compression } : undefined; + this.subManifests = data.subManifests ? data.subManifests.map((s) => ({ ...s })) : undefined; Object.freeze(this); } catch (error) { if (error instanceof ZodError) { @@ -42,11 +45,14 @@ export default class Manifest { */ toJSON() { return { + version: this.version, slug: this.slug, filename: this.filename, size: this.size, chunks: this.chunks, encryption: this.encryption, + compression: this.compression, + subManifests: this.subManifests, }; } } diff --git a/src/infrastructure/adapters/BunCryptoAdapter.js b/src/infrastructure/adapters/BunCryptoAdapter.js index 046889d..29de605 100644 --- a/src/infrastructure/adapters/BunCryptoAdapter.js +++ b/src/infrastructure/adapters/BunCryptoAdapter.js @@ -1,9 +1,10 @@ import { CryptoHasher } from 'bun'; import CryptoPort from '../../ports/CryptoPort.js'; import CasError from '../../domain/errors/CasError.js'; -// We still use node:crypto for AES-GCM because Bun's native implementation +// We still use node:crypto for AES-GCM because Bun's native implementation // is heavily optimized for these specific Node APIs. -import { createCipheriv, createDecipheriv } from 'node:crypto'; +import { createCipheriv, createDecipheriv, pbkdf2, scrypt } from 'node:crypto'; +import { promisify } from 'node:util'; /** * Bun-native {@link CryptoPort} implementation. @@ -82,6 +83,38 @@ export default class BunCryptoAdapter extends CryptoPort { return { encrypt, finalize }; } + /** @override */ + async deriveKey({ + passphrase, + salt, + algorithm = 'pbkdf2', + iterations = 100_000, + cost = 16384, + blockSize = 8, + parallelization = 1, + keyLength = 32, + }) { + const saltBuf = salt || this.randomBytes(32); + let key; + const params = { algorithm, salt: Buffer.from(saltBuf).toString('base64'), keyLength }; + + if (algorithm === 'pbkdf2') { + key = await promisify(pbkdf2)(passphrase, saltBuf, iterations, keyLength, 'sha512'); + params.iterations = iterations; + } else if (algorithm === 'scrypt') { + key = await promisify(scrypt)(passphrase, saltBuf, keyLength, { + N: cost, r: blockSize, p: parallelization, + }); + params.cost = cost; + params.blockSize = blockSize; + params.parallelization = parallelization; + } else { + throw new Error(`Unsupported KDF algorithm: ${algorithm}`); + } + + return { key, salt: Buffer.from(saltBuf), params }; + } + /** * Validates that a key is a 32-byte Buffer or Uint8Array. * @param {Buffer|Uint8Array} key diff --git a/src/infrastructure/adapters/NodeCryptoAdapter.js b/src/infrastructure/adapters/NodeCryptoAdapter.js index fc5d107..86a2559 100644 --- a/src/infrastructure/adapters/NodeCryptoAdapter.js +++ b/src/infrastructure/adapters/NodeCryptoAdapter.js @@ -1,4 +1,5 @@ -import { createHash, createCipheriv, createDecipheriv, randomBytes } from 'node:crypto'; +import { createHash, createCipheriv, createDecipheriv, randomBytes, pbkdf2, scrypt } from 'node:crypto'; +import { promisify } from 'node:util'; import CryptoPort from '../../ports/CryptoPort.js'; import CasError from '../../domain/errors/CasError.js'; @@ -65,6 +66,44 @@ export default class NodeCryptoAdapter extends CryptoPort { return { encrypt, finalize }; } + /** @override */ + async deriveKey({ + passphrase, + salt, + algorithm = 'pbkdf2', + iterations = 100_000, + cost = 16384, + blockSize = 8, + parallelization = 1, + keyLength = 32, + }) { + const saltBuf = salt || randomBytes(32); + let key; + const params = { + algorithm, + salt: saltBuf.toString('base64'), + keyLength, + }; + + if (algorithm === 'pbkdf2') { + key = await promisify(pbkdf2)(passphrase, saltBuf, iterations, keyLength, 'sha512'); + params.iterations = iterations; + } else if (algorithm === 'scrypt') { + key = await promisify(scrypt)(passphrase, saltBuf, keyLength, { + N: cost, + r: blockSize, + p: parallelization, + }); + params.cost = cost; + params.blockSize = blockSize; + params.parallelization = parallelization; + } else { + throw new Error(`Unsupported KDF algorithm: ${algorithm}`); + } + + return { key, salt: saltBuf, params }; + } + /** * Validates that a key is a 32-byte Buffer. * @param {Buffer} key diff --git a/src/infrastructure/adapters/WebCryptoAdapter.js b/src/infrastructure/adapters/WebCryptoAdapter.js index c9c8cb9..0078fbc 100644 --- a/src/infrastructure/adapters/WebCryptoAdapter.js +++ b/src/infrastructure/adapters/WebCryptoAdapter.js @@ -117,6 +117,53 @@ export default class WebCryptoAdapter extends CryptoPort { return { encrypt, finalize }; } + /** @override */ + async deriveKey({ + passphrase, + salt, + algorithm = 'pbkdf2', + iterations = 100_000, + cost = 16384, + blockSize = 8, + parallelization = 1, + keyLength = 32, + }) { + const saltBuf = salt || this.randomBytes(32); + const params = { algorithm, salt: this.#toBase64(saltBuf), keyLength }; + + const opts = { passphrase, saltBuf, iterations, cost, blockSize, parallelization, keyLength, params }; + const key = algorithm === 'pbkdf2' + ? await this.#derivePbkdf2(opts) + : await this.#deriveScrypt(opts); + + return { key: Buffer.from(key), salt: Buffer.from(saltBuf), params }; + } + + async #derivePbkdf2({ passphrase, saltBuf, iterations, keyLength, params }) { + const enc = new globalThis.TextEncoder(); + const baseKey = await globalThis.crypto.subtle.importKey( + 'raw', enc.encode(passphrase), 'PBKDF2', false, ['deriveBits'], + ); + const bits = await globalThis.crypto.subtle.deriveBits( + { name: 'PBKDF2', salt: saltBuf, iterations, hash: 'SHA-512' }, + baseKey, keyLength * 8, + ); + params.iterations = iterations; + return Buffer.from(bits); + } + + async #deriveScrypt({ passphrase, saltBuf, cost, blockSize, parallelization, keyLength, params }) { + const { scrypt: scryptCb } = await import('node:crypto'); + const { promisify: promisifyFn } = await import('node:util'); + const key = await promisifyFn(scryptCb)(passphrase, saltBuf, keyLength, { + N: cost, r: blockSize, p: parallelization, + }); + params.cost = cost; + params.blockSize = blockSize; + params.parallelization = parallelization; + return key; + } + /** * Imports a raw key for use with Web Crypto AES-GCM operations. * @param {Buffer|Uint8Array} rawKey - 32-byte raw key material. diff --git a/src/ports/CryptoPort.js b/src/ports/CryptoPort.js index c898591..e985b85 100644 --- a/src/ports/CryptoPort.js +++ b/src/ports/CryptoPort.js @@ -51,4 +51,21 @@ export default class CryptoPort { createEncryptionStream(_key) { throw new Error('Not implemented'); } + + /** + * Derives an encryption key from a passphrase using a KDF. + * @param {Object} options + * @param {string} options.passphrase - The passphrase to derive a key from. + * @param {Buffer} [options.salt] - Salt for the KDF (random if omitted). + * @param {'pbkdf2'|'scrypt'} [options.algorithm='pbkdf2'] - KDF algorithm. + * @param {number} [options.iterations=100000] - PBKDF2 iteration count. + * @param {number} [options.cost=16384] - scrypt cost parameter (N). + * @param {number} [options.blockSize=8] - scrypt block size (r). + * @param {number} [options.parallelization=1] - scrypt parallelization (p). + * @param {number} [options.keyLength=32] - Derived key length in bytes. + * @returns {Promise<{ key: Buffer, salt: Buffer, params: { algorithm: string, salt: string, iterations?: number, cost?: number, blockSize?: number, parallelization?: number, keyLength: number } }>} + */ + deriveKey(_options) { + throw new Error('Not implemented'); + } } diff --git a/test/unit/domain/services/CasService.compression.test.js b/test/unit/domain/services/CasService.compression.test.js new file mode 100644 index 0000000..0937df3 --- /dev/null +++ b/test/unit/domain/services/CasService.compression.test.js @@ -0,0 +1,310 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { randomBytes } from 'node:crypto'; +import CasService from '../../../../src/domain/services/CasService.js'; +import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; +import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async function* bufferSource(buf) { + yield buf; +} + +async function storeBuffer(svc, buf, opts = {}) { + return svc.store({ + source: bufferSource(buf), + slug: opts.slug || 'test', + filename: opts.filename || 'test.bin', + encryptionKey: opts.encryptionKey, + compression: opts.compression, + }); +} + +/** + * Shared factory: builds the standard test fixtures (crypto, blobStore, + * mockPersistence, service) used by every describe block. + */ +function setup() { + const crypto = new NodeCryptoAdapter(); + const blobStore = new Map(); + + const mockPersistence = { + writeBlob: vi.fn().mockImplementation(async (content) => { + const buf = Buffer.isBuffer(content) ? content : Buffer.from(content); + const oid = crypto.sha256(buf); + blobStore.set(oid, buf); + return oid; + }), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockImplementation(async (oid) => { + const buf = blobStore.get(oid); + if (!buf) { throw new Error(`Blob not found: ${oid}`); } + return buf; + }), + }; + + const service = new CasService({ + persistence: mockPersistence, + crypto, + codec: new JsonCodec(), + chunkSize: 1024, + }); + + return { crypto, blobStore, mockPersistence, service }; +} + +// --------------------------------------------------------------------------- +// 1. Store + restore with compression yields original bytes +// --------------------------------------------------------------------------- +describe('CasService compression – store+restore round-trip', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('store+restore with compression yields original bytes', async () => { + const original = Buffer.from('Hello, World! '.repeat(200)); + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + }); + + const { buffer, bytesWritten } = await service.restore({ manifest }); + + expect(buffer.equals(original)).toBe(true); + expect(bytesWritten).toBe(original.length); + }); +}); + +// --------------------------------------------------------------------------- +// 2. Compressed storage is smaller than uncompressed for compressible data +// --------------------------------------------------------------------------- +describe('CasService compression – size reduction', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('compressed storage is smaller than uncompressed for compressible data', async () => { + // Highly compressible: repeating pattern + const original = Buffer.from('AAAA'.repeat(2048)); + + const manifestPlain = await storeBuffer(service, original); + const manifestCompressed = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + }); + + // Sum up stored chunk sizes for each manifest + const plainSize = manifestPlain.chunks.reduce((sum, c) => sum + c.size, 0); + const compressedSize = manifestCompressed.chunks.reduce((sum, c) => sum + c.size, 0); + + expect(compressedSize).toBeLessThan(plainSize); + }); +}); + +// --------------------------------------------------------------------------- +// 3. Compression + encryption round-trip +// --------------------------------------------------------------------------- +describe('CasService compression – compression + encryption round-trip', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('round-trips data stored with both compression and encryption', async () => { + const key = randomBytes(32); + const original = Buffer.from('Secret compressible content! '.repeat(100)); + + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + encryptionKey: key, + }); + + expect(manifest.compression).toBeDefined(); + expect(manifest.encryption).toBeDefined(); + expect(manifest.encryption.encrypted).toBe(true); + + const { buffer, bytesWritten } = await service.restore({ + manifest, + encryptionKey: key, + }); + + expect(buffer.equals(original)).toBe(true); + expect(bytesWritten).toBe(original.length); + }); +}); + +// --------------------------------------------------------------------------- +// 4. Empty file with compression +// --------------------------------------------------------------------------- +describe('CasService compression – empty file', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('handles a 0-byte file with compression enabled', async () => { + const original = Buffer.alloc(0); + + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + }); + + expect(manifest.compression).toEqual({ algorithm: 'gzip' }); + + const { buffer, bytesWritten } = await service.restore({ manifest }); + + expect(buffer.length).toBe(0); + expect(bytesWritten).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// 5. Incompressible data does not break +// --------------------------------------------------------------------------- +describe('CasService compression – incompressible data', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('incompressible data does not break round-trip', async () => { + // Random bytes are essentially incompressible + const original = randomBytes(2048); + + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + }); + + const { buffer, bytesWritten } = await service.restore({ manifest }); + + expect(buffer.equals(original)).toBe(true); + expect(bytesWritten).toBe(original.length); + }); +}); + +// --------------------------------------------------------------------------- +// 6. Manifest includes compression metadata +// --------------------------------------------------------------------------- +describe('CasService compression – manifest metadata', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('manifest includes compression metadata when compression is enabled', async () => { + const original = Buffer.from('Some data to compress'); + + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + }); + + expect(manifest.compression).toBeDefined(); + expect(manifest.compression).toEqual({ algorithm: 'gzip' }); + }); + + it('manifest does not include compression metadata when compression is not used', async () => { + const original = Buffer.from('Some data without compression'); + + const manifest = await storeBuffer(service, original); + + expect(manifest.compression).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 7. Backward compatibility – restore without compression on uncompressed manifest +// --------------------------------------------------------------------------- +describe('CasService compression – backward compatibility', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('restores uncompressed data from a manifest with no compression field', async () => { + const original = Buffer.from('Plain uncompressed content here'); + + // Store without compression + const manifest = await storeBuffer(service, original); + + expect(manifest.compression).toBeUndefined(); + + const { buffer, bytesWritten } = await service.restore({ manifest }); + + expect(buffer.equals(original)).toBe(true); + expect(bytesWritten).toBe(original.length); + }); + + it('restores encrypted data from a manifest with no compression field', async () => { + const key = randomBytes(32); + const original = Buffer.from('Encrypted but not compressed'); + + const manifest = await storeBuffer(service, original, { + encryptionKey: key, + }); + + expect(manifest.compression).toBeUndefined(); + expect(manifest.encryption).toBeDefined(); + + const { buffer } = await service.restore({ manifest, encryptionKey: key }); + + expect(buffer.equals(original)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 8. Fuzz: round-trip across multiple sizes +// --------------------------------------------------------------------------- +describe('CasService compression – fuzz round-trip across sizes', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + const sizes = [0, 1, 100, 1024, 5000]; + + for (const size of sizes) { + it(`round-trips ${size} bytes with compression (seeded data)`, async () => { + const original = Buffer.alloc(size); + for (let b = 0; b < size; b++) { + original[b] = (size + b * 7) & 0xff; + } + + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + }); + + const { buffer } = await service.restore({ manifest }); + + expect(buffer.equals(original)).toBe(true); + }); + } + + for (const size of sizes) { + it(`round-trips ${size} bytes with compression + encryption (seeded data)`, async () => { + const key = randomBytes(32); + const original = Buffer.alloc(size); + for (let b = 0; b < size; b++) { + original[b] = (size * 3 + b * 13) & 0xff; + } + + const manifest = await storeBuffer(service, original, { + compression: { algorithm: 'gzip' }, + encryptionKey: key, + }); + + const { buffer } = await service.restore({ manifest, encryptionKey: key }); + + expect(buffer.equals(original)).toBe(true); + }); + } +}); diff --git a/test/unit/domain/services/CasService.kdf.test.js b/test/unit/domain/services/CasService.kdf.test.js new file mode 100644 index 0000000..31d967d --- /dev/null +++ b/test/unit/domain/services/CasService.kdf.test.js @@ -0,0 +1,469 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { randomBytes } from 'node:crypto'; +import CasService from '../../../../src/domain/services/CasService.js'; +import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; +import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import CasError from '../../../../src/domain/errors/CasError.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async function* bufferSource(buf) { + yield buf; +} + +/** + * Shared factory: builds the standard test fixtures (crypto, blobStore, + * mockPersistence, service) used by every describe block. + */ +function setup() { + const crypto = new NodeCryptoAdapter(); + const blobs = new Map(); + const mockPersistence = { + writeBlob: vi.fn().mockImplementation((content) => { + const oid = crypto.sha256(Buffer.isBuffer(content) ? content : Buffer.from(content)); + blobs.set(oid, Buffer.isBuffer(content) ? content : Buffer.from(content)); + return Promise.resolve(oid); + }), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockImplementation((oid) => { + const blob = blobs.get(oid); + if (!blob) {return Promise.reject(new Error(`Blob not found: ${oid}`));} + return Promise.resolve(blob); + }), + }; + const service = new CasService({ + persistence: mockPersistence, + crypto, + codec: new JsonCodec(), + chunkSize: 1024, + }); + return { mockPersistence, service, blobs, crypto }; +} + +// --------------------------------------------------------------------------- +// 1. deriveKey with pbkdf2 returns 32-byte key +// --------------------------------------------------------------------------- +describe('CasService.deriveKey() – pbkdf2', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('deriveKey with pbkdf2 returns 32-byte key', async () => { + const result = await service.deriveKey({ passphrase: 'test-passphrase' }); + + expect(Buffer.isBuffer(result.key)).toBe(true); + expect(result.key.length).toBe(32); + expect(Buffer.isBuffer(result.salt)).toBe(true); + expect(result.salt.length).toBe(32); + expect(result.params).toBeDefined(); + expect(result.params.algorithm).toBe('pbkdf2'); + expect(result.params.keyLength).toBe(32); + expect(typeof result.params.iterations).toBe('number'); + expect(result.params.iterations).toBeGreaterThan(0); + expect(typeof result.params.salt).toBe('string'); // base64-encoded + }); +}); + +// --------------------------------------------------------------------------- +// 2. deriveKey with scrypt returns 32-byte key +// --------------------------------------------------------------------------- +describe('CasService.deriveKey() – scrypt', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('deriveKey with scrypt returns 32-byte key', async () => { + const result = await service.deriveKey({ + passphrase: 'test-passphrase', + algorithm: 'scrypt', + }); + + expect(Buffer.isBuffer(result.key)).toBe(true); + expect(result.key.length).toBe(32); + expect(Buffer.isBuffer(result.salt)).toBe(true); + expect(result.salt.length).toBe(32); + expect(result.params).toBeDefined(); + expect(result.params.algorithm).toBe('scrypt'); + expect(result.params.keyLength).toBe(32); + expect(typeof result.params.cost).toBe('number'); + expect(typeof result.params.blockSize).toBe('number'); + expect(typeof result.params.parallelization).toBe('number'); + // scrypt params should NOT have iterations + expect(result.params.iterations).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 3. same passphrase + salt yields same key (determinism) +// --------------------------------------------------------------------------- +describe('CasService.deriveKey() – determinism', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('same passphrase + salt yields same key', async () => { + const salt = randomBytes(32); + const passphrase = 'deterministic-passphrase'; + + const result1 = await service.deriveKey({ passphrase, salt }); + const result2 = await service.deriveKey({ passphrase, salt }); + + expect(result1.key.equals(result2.key)).toBe(true); + }); + + it('same passphrase + salt yields same key with scrypt', async () => { + const salt = randomBytes(32); + const passphrase = 'deterministic-passphrase-scrypt'; + + const result1 = await service.deriveKey({ passphrase, salt, algorithm: 'scrypt' }); + const result2 = await service.deriveKey({ passphrase, salt, algorithm: 'scrypt' }); + + expect(result1.key.equals(result2.key)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 4. different salts yield different keys +// --------------------------------------------------------------------------- +describe('CasService.deriveKey() – different salts', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('different salts yield different keys', async () => { + const passphrase = 'same-passphrase'; + const salt1 = randomBytes(32); + const salt2 = randomBytes(32); + + const result1 = await service.deriveKey({ passphrase, salt: salt1 }); + const result2 = await service.deriveKey({ passphrase, salt: salt2 }); + + expect(result1.key.equals(result2.key)).toBe(false); + }); + + it('different salts yield different keys with scrypt', async () => { + const passphrase = 'same-passphrase-scrypt'; + const salt1 = randomBytes(32); + const salt2 = randomBytes(32); + + const result1 = await service.deriveKey({ passphrase, salt: salt1, algorithm: 'scrypt' }); + const result2 = await service.deriveKey({ passphrase, salt: salt2, algorithm: 'scrypt' }); + + expect(result1.key.equals(result2.key)).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// 5. store with passphrase + restore with passphrase round-trip +// --------------------------------------------------------------------------- +describe('CasService – passphrase store/restore round-trip', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('store with passphrase + restore with passphrase round-trip', async () => { + const original = Buffer.from('hello, passphrase-based encryption'); + const passphrase = 'my-secret-passphrase'; + + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-test', + filename: 'kdf-test.bin', + passphrase, + }); + + expect(manifest.encryption).toBeDefined(); + expect(manifest.encryption.encrypted).toBe(true); + expect(manifest.encryption.kdf).toBeDefined(); + + const { buffer, bytesWritten } = await service.restore({ manifest, passphrase }); + expect(buffer.equals(original)).toBe(true); + expect(bytesWritten).toBe(original.length); + }); +}); + +describe('CasService – passphrase multi-chunk round-trip', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('round-trips a multi-chunk file with passphrase', async () => { + const original = randomBytes(3 * 1024); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-multi', + filename: 'kdf-multi.bin', + passphrase: 'multi-chunk-passphrase', + }); + + expect(manifest.chunks.length).toBe(3); + expect(manifest.encryption.kdf).toBeDefined(); + + const { buffer } = await service.restore({ manifest, passphrase: 'multi-chunk-passphrase' }); + expect(buffer.equals(original)).toBe(true); + }); + + it('round-trips an exact chunk-boundary file with passphrase', async () => { + const original = randomBytes(2 * 1024); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-exact', + filename: 'kdf-exact.bin', + passphrase: 'exact-boundary', + }); + + expect(manifest.chunks.length).toBe(2); + + const { buffer } = await service.restore({ manifest, passphrase: 'exact-boundary' }); + expect(buffer.equals(original)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 6. wrong passphrase fails restore with INTEGRITY_ERROR +// --------------------------------------------------------------------------- +describe('CasService – wrong passphrase fails restore', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('wrong passphrase fails restore with INTEGRITY_ERROR', async () => { + const original = Buffer.from('sensitive payload'); + const correctPassphrase = 'correct-horse-battery-staple'; + const wrongPassphrase = 'wrong-horse-battery-staple'; + + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-wrong', + filename: 'kdf-wrong.bin', + passphrase: correctPassphrase, + }); + + await expect( + service.restore({ manifest, passphrase: wrongPassphrase }), + ).rejects.toThrow(CasError); + + try { + await service.restore({ manifest, passphrase: wrongPassphrase }); + } catch (err) { + expect(err.code).toBe('INTEGRITY_ERROR'); + } + }); +}); + +// --------------------------------------------------------------------------- +// 7. manifest includes KDF params in encryption metadata +// --------------------------------------------------------------------------- +describe('CasService – manifest KDF metadata (pbkdf2)', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('manifest includes KDF params in encryption metadata', async () => { + const manifest = await service.store({ + source: bufferSource(Buffer.from('metadata check')), + slug: 'kdf-meta', + filename: 'kdf-meta.bin', + passphrase: 'metadata-passphrase', + }); + + expect(manifest.encryption).toBeDefined(); + expect(manifest.encryption.encrypted).toBe(true); + expect(manifest.encryption.algorithm).toBe('aes-256-gcm'); + + const kdf = manifest.encryption.kdf; + expect(kdf).toBeDefined(); + expect(kdf.algorithm).toBe('pbkdf2'); + expect(typeof kdf.salt).toBe('string'); + expect(kdf.keyLength).toBe(32); + expect(typeof kdf.iterations).toBe('number'); + }); +}); + +describe('CasService – manifest KDF metadata (scrypt)', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('manifest includes KDF params in encryption metadata', async () => { + const manifest = await service.store({ + source: bufferSource(Buffer.from('scrypt metadata check')), + slug: 'kdf-meta-scrypt', + filename: 'kdf-meta-scrypt.bin', + passphrase: 'scrypt-metadata-passphrase', + kdfOptions: { algorithm: 'scrypt' }, + }); + + const kdf = manifest.encryption.kdf; + expect(kdf).toBeDefined(); + expect(kdf.algorithm).toBe('scrypt'); + expect(typeof kdf.salt).toBe('string'); + expect(kdf.keyLength).toBe(32); + expect(typeof kdf.cost).toBe('number'); + expect(typeof kdf.blockSize).toBe('number'); + expect(kdf.iterations).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 8. passphrase store with scrypt + restore round-trip +// --------------------------------------------------------------------------- +describe('CasService – scrypt passphrase round-trip', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('passphrase store with scrypt + restore round-trip', async () => { + const original = Buffer.from('scrypt round-trip content'); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-scrypt', + filename: 'kdf-scrypt.bin', + passphrase: 'scrypt-passphrase', + kdfOptions: { algorithm: 'scrypt' }, + }); + + expect(manifest.encryption.kdf.algorithm).toBe('scrypt'); + const { buffer } = await service.restore({ manifest, passphrase: 'scrypt-passphrase' }); + expect(buffer.equals(original)).toBe(true); + }); + + it('scrypt round-trip with multi-chunk data', async () => { + const original = randomBytes(3 * 1024); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-scrypt-multi', + filename: 'kdf-scrypt-multi.bin', + passphrase: 'scrypt-multi-chunk', + kdfOptions: { algorithm: 'scrypt' }, + }); + + expect(manifest.chunks.length).toBe(3); + const { buffer } = await service.restore({ manifest, passphrase: 'scrypt-multi-chunk' }); + expect(buffer.equals(original)).toBe(true); + }); +}); + +describe('CasService – wrong scrypt passphrase', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('wrong passphrase with scrypt fails with INTEGRITY_ERROR', async () => { + const manifest = await service.store({ + source: bufferSource(Buffer.from('scrypt integrity test')), + slug: 'kdf-scrypt-wrong', + filename: 'kdf-scrypt-wrong.bin', + passphrase: 'correct-scrypt-pass', + kdfOptions: { algorithm: 'scrypt' }, + }); + + await expect( + service.restore({ manifest, passphrase: 'wrong-scrypt-pass' }), + ).rejects.toThrow(CasError); + }); +}); + +// --------------------------------------------------------------------------- +// 9. passphrase + compression round-trip +// --------------------------------------------------------------------------- +describe('CasService – passphrase + compression round-trip', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('passphrase + compression round-trip', async () => { + const original = Buffer.alloc(2048, 'abcdefghij'); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-compress', + filename: 'kdf-compress.bin', + passphrase: 'compress-and-encrypt', + compression: { algorithm: 'gzip' }, + }); + + expect(manifest.encryption.kdf).toBeDefined(); + expect(manifest.compression.algorithm).toBe('gzip'); + + const { buffer } = await service.restore({ manifest, passphrase: 'compress-and-encrypt' }); + expect(buffer.equals(original)).toBe(true); + }); + + it('passphrase + compression round-trip with scrypt', async () => { + const original = Buffer.alloc(3072, 'compressible-pattern-'); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-scrypt-compress', + filename: 'kdf-scrypt-compress.bin', + passphrase: 'scrypt-compress', + kdfOptions: { algorithm: 'scrypt' }, + compression: { algorithm: 'gzip' }, + }); + + expect(manifest.encryption.kdf.algorithm).toBe('scrypt'); + const { buffer } = await service.restore({ manifest, passphrase: 'scrypt-compress' }); + expect(buffer.equals(original)).toBe(true); + }); +}); + +describe('CasService – passphrase + compression edge cases', () => { + let service; + + beforeEach(() => { + ({ service } = setup()); + }); + + it('passphrase + compression round-trip with random data', async () => { + const original = randomBytes(2 * 1024); + const manifest = await service.store({ + source: bufferSource(original), + slug: 'kdf-compress-random', + filename: 'kdf-compress-random.bin', + passphrase: 'random-compress-encrypt', + compression: { algorithm: 'gzip' }, + }); + + const { buffer } = await service.restore({ manifest, passphrase: 'random-compress-encrypt' }); + expect(buffer.equals(original)).toBe(true); + }); + + it('wrong passphrase with compression fails with INTEGRITY_ERROR', async () => { + const manifest = await service.store({ + source: bufferSource(Buffer.alloc(1024, 'repeated')), + slug: 'kdf-compress-wrong', + filename: 'kdf-compress-wrong.bin', + passphrase: 'correct-compress-pass', + compression: { algorithm: 'gzip' }, + }); + + await expect( + service.restore({ manifest, passphrase: 'wrong-compress-pass' }), + ).rejects.toThrow(CasError); + }); +}); diff --git a/test/unit/domain/services/CasService.merkle.test.js b/test/unit/domain/services/CasService.merkle.test.js new file mode 100644 index 0000000..fc33ff3 --- /dev/null +++ b/test/unit/domain/services/CasService.merkle.test.js @@ -0,0 +1,462 @@ +import { describe, it, expect, vi } from 'vitest'; +import { randomBytes } from 'node:crypto'; +import CasService from '../../../../src/domain/services/CasService.js'; +import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; +import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import Manifest from '../../../../src/domain/value-objects/Manifest.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Async iterable that yields a single buffer. + */ +async function* bufferSource(buf) { + yield buf; +} + +/** + * Builds CasService with an in-memory blob/tree store. + * @param {number} merkleThreshold Chunk count threshold for Merkle manifests. + */ +function setup(merkleThreshold = 5) { + const crypto = new NodeCryptoAdapter(); + const blobs = new Map(); + const trees = new Map(); + let treeCounter = 0; + const mockPersistence = { + writeBlob: vi.fn().mockImplementation((content) => { + const oid = crypto.sha256(Buffer.isBuffer(content) ? content : Buffer.from(content)); + blobs.set(oid, Buffer.isBuffer(content) ? content : Buffer.from(content)); + return Promise.resolve(oid); + }), + writeTree: vi.fn().mockImplementation((entries) => { + const oid = `tree-${treeCounter++}`; + trees.set(oid, entries); + return Promise.resolve(oid); + }), + readBlob: vi.fn().mockImplementation((oid) => { + const blob = blobs.get(oid); + if (!blob) {return Promise.reject(new Error(`Blob not found: ${oid}`));} + return Promise.resolve(blob); + }), + readTree: vi.fn().mockImplementation((treeOid) => { + const entries = trees.get(treeOid); + if (!entries) {return Promise.reject(new Error(`Tree not found: ${treeOid}`));} + return Promise.resolve(entries.map((e) => { + const match = e.match(/^(\d+) (\w+) ([^\t]+)\t(.+)$/); + return { mode: match[1], type: match[2], oid: match[3], name: match[4] }; + })); + }), + }; + const codec = new JsonCodec(); + const service = new CasService({ + persistence: mockPersistence, + crypto, + codec, + chunkSize: 1024, + merkleThreshold, + }); + return { mockPersistence, service, blobs, trees, crypto, codec }; +} + +/** + * Generates a deterministic buffer of a given size. + * @param {number} size Size in bytes. + * @param {number} seed Seed byte for deterministic content. + */ +function generateBuffer(size, seed = 0xAB) { + const buf = Buffer.alloc(size); + for (let i = 0; i < size; i++) { + buf[i] = (seed + i) & 0xFF; + } + return buf; +} + +// --------------------------------------------------------------------------- +// 1. v1 manifest when chunks <= threshold +// --------------------------------------------------------------------------- +describe('CasService Merkle – createTree produces v1 manifest when chunks <= threshold', () => { + it('stores small data and produces a v1 manifest without subManifests', async () => { + const { service, blobs, codec } = setup(5); + + // 3KB -> 3 chunks with chunkSize=1024 — below threshold of 5 + const data = generateBuffer(3 * 1024); + const manifest = await service.store({ + source: bufferSource(data), + slug: 'small-asset', + filename: 'small.bin', + }); + + expect(manifest.chunks).toHaveLength(3); + + const treeOid = await service.createTree({ manifest }); + expect(treeOid).toMatch(/^tree-/); + + // Find the manifest blob in the store and decode it + const manifestBlobContent = findManifestBlob(blobs, codec); + expect(manifestBlobContent).toBeDefined(); + expect(manifestBlobContent.version).toBe(1); + expect(manifestBlobContent.subManifests).toBeUndefined(); + expect(manifestBlobContent.chunks).toHaveLength(3); + }); +}); + +// --------------------------------------------------------------------------- +// 2. v2 Merkle manifest when chunks > threshold +// --------------------------------------------------------------------------- +describe('CasService Merkle – createTree produces v2 Merkle manifest when chunks > threshold', () => { + it('stores 6KB+ data and produces a v2 manifest with subManifests', async () => { + const { service, blobs, codec } = setup(5); + + // 6KB -> 6 chunks with chunkSize=1024 — exceeds threshold of 5 + const data = generateBuffer(6 * 1024); + const manifest = await service.store({ + source: bufferSource(data), + slug: 'large-asset', + filename: 'large.bin', + }); + + expect(manifest.chunks).toHaveLength(6); + + const treeOid = await service.createTree({ manifest }); + expect(treeOid).toMatch(/^tree-/); + + // Decode the root manifest blob + const rootManifest = findLastManifestBlob(blobs, codec); + expect(rootManifest).toBeDefined(); + expect(rootManifest.version).toBe(2); + expect(rootManifest.chunks).toEqual([]); + expect(rootManifest.subManifests).toBeDefined(); + expect(rootManifest.subManifests.length).toBeGreaterThan(0); + + // With 6 chunks and threshold=5, there should be 2 sub-manifests: + // group 0: chunks 0-4 (5 chunks), group 1: chunk 5 (1 chunk) + expect(rootManifest.subManifests).toHaveLength(2); + }); +}); + +// --------------------------------------------------------------------------- +// 3. readManifest reconstitutes v2 manifest into flat chunk list +// --------------------------------------------------------------------------- +describe('CasService Merkle – readManifest reconstitutes v2 manifest into flat chunk list', () => { + it('reads a v2 tree and returns a Manifest with all chunks populated', async () => { + const { service } = setup(5); + + const data = generateBuffer(8 * 1024); // 8 chunks + const manifest = await service.store({ + source: bufferSource(data), + slug: 'reconstitute-test', + filename: 'recon.bin', + }); + + expect(manifest.chunks).toHaveLength(8); + + const treeOid = await service.createTree({ manifest }); + const reconstituted = await service.readManifest({ treeOid }); + + expect(reconstituted).toBeInstanceOf(Manifest); + expect(reconstituted.chunks).toHaveLength(8); + expect(reconstituted.slug).toBe('reconstitute-test'); + expect(reconstituted.filename).toBe('recon.bin'); + expect(reconstituted.size).toBe(8 * 1024); + + // Verify chunk ordering and metadata match + for (let i = 0; i < 8; i++) { + expect(reconstituted.chunks[i].index).toBe(i); + expect(reconstituted.chunks[i].size).toBe(1024); + expect(reconstituted.chunks[i].digest).toBe(manifest.chunks[i].digest); + expect(reconstituted.chunks[i].blob).toBe(manifest.chunks[i].blob); + } + }); +}); + +// --------------------------------------------------------------------------- +// 4. v2 store+createTree+readManifest+restore round-trip +// --------------------------------------------------------------------------- +describe('CasService Merkle – v2 full round-trip', () => { + it('stores, creates tree, reads manifest, and restores byte-identical data', async () => { + const { service } = setup(5); + + const original = generateBuffer(7 * 1024); // 7 chunks > threshold 5 + const manifest = await service.store({ + source: bufferSource(original), + slug: 'roundtrip-v2', + filename: 'roundtrip.bin', + }); + + const treeOid = await service.createTree({ manifest }); + const readBack = await service.readManifest({ treeOid }); + + expect(readBack.chunks).toHaveLength(7); + + const { buffer: restored, bytesWritten } = await service.restore({ manifest: readBack }); + + expect(bytesWritten).toBe(original.length); + expect(restored.equals(original)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 5. v1 manifest still works with v2-capable code +// --------------------------------------------------------------------------- +describe('CasService Merkle – v1 manifest backward compatibility', () => { + it('stores small data and performs a full round-trip without Merkle splitting', async () => { + const { service } = setup(5); + + const original = generateBuffer(4 * 1024); // 4 chunks <= threshold 5 + const manifest = await service.store({ + source: bufferSource(original), + slug: 'v1-compat', + filename: 'v1.bin', + }); + + expect(manifest.chunks).toHaveLength(4); + + const treeOid = await service.createTree({ manifest }); + const readBack = await service.readManifest({ treeOid }); + + expect(readBack).toBeInstanceOf(Manifest); + expect(readBack.chunks).toHaveLength(4); + expect(readBack.version).toBe(1); + + const { buffer: restored } = await service.restore({ manifest: readBack }); + expect(restored.equals(original)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 6. Sub-manifest references have correct startIndex and chunkCount +// --------------------------------------------------------------------------- +describe('CasService Merkle – sub-manifest references have correct startIndex and chunkCount', () => { + it('verifies subManifests array entries for 12-chunk data with threshold=5', async () => { + const { service, blobs, codec } = setup(5); + + // 12 chunks -> groups: [0..4] (5), [5..9] (5), [10..11] (2) + const data = generateBuffer(12 * 1024); + const manifest = await service.store({ + source: bufferSource(data), + slug: 'sub-refs-test', + filename: 'subrefs.bin', + }); + + expect(manifest.chunks).toHaveLength(12); + + await service.createTree({ manifest }); + + const rootManifest = findLastManifestBlob(blobs, codec); + expect(rootManifest.version).toBe(2); + expect(rootManifest.subManifests).toHaveLength(3); + + // Group 0: chunks 0-4 + expect(rootManifest.subManifests[0].startIndex).toBe(0); + expect(rootManifest.subManifests[0].chunkCount).toBe(5); + expect(rootManifest.subManifests[0].oid).toBeTruthy(); + + // Group 1: chunks 5-9 + expect(rootManifest.subManifests[1].startIndex).toBe(5); + expect(rootManifest.subManifests[1].chunkCount).toBe(5); + expect(rootManifest.subManifests[1].oid).toBeTruthy(); + + // Group 2: chunks 10-11 + expect(rootManifest.subManifests[2].startIndex).toBe(10); + expect(rootManifest.subManifests[2].chunkCount).toBe(2); + expect(rootManifest.subManifests[2].oid).toBeTruthy(); + + // Verify that sub-manifest OIDs point to actual blobs with correct chunk data + for (const ref of rootManifest.subManifests) { + const subBlob = blobs.get(ref.oid); + expect(subBlob).toBeDefined(); + const subData = codec.decode(subBlob); + expect(subData.chunks).toHaveLength(ref.chunkCount); + // First chunk in each group should match startIndex + expect(subData.chunks[0].index).toBe(ref.startIndex); + } + }); +}); + +// --------------------------------------------------------------------------- +// 7. Exactly at threshold boundary uses v1 +// --------------------------------------------------------------------------- +describe('CasService Merkle – exactly at threshold boundary uses v1', () => { + it('stores exactly 5 chunks (= threshold) and produces a v1 manifest', async () => { + const { service, blobs, codec } = setup(5); + + const data = generateBuffer(5 * 1024); // exactly 5 chunks + const manifest = await service.store({ + source: bufferSource(data), + slug: 'boundary-v1', + filename: 'boundary.bin', + }); + + expect(manifest.chunks).toHaveLength(5); + + await service.createTree({ manifest }); + + const rootManifest = findManifestBlob(blobs, codec); + expect(rootManifest.version).toBe(1); + expect(rootManifest.subManifests).toBeUndefined(); + expect(rootManifest.chunks).toHaveLength(5); + }); +}); + +// --------------------------------------------------------------------------- +// 8. One above threshold uses v2 +// --------------------------------------------------------------------------- +describe('CasService Merkle – one above threshold uses v2', () => { + it('stores 6 chunks (threshold + 1) and produces a v2 manifest', async () => { + const { service, blobs, codec } = setup(5); + + const data = generateBuffer(6 * 1024); // 6 chunks + const manifest = await service.store({ + source: bufferSource(data), + slug: 'boundary-v2', + filename: 'boundary-v2.bin', + }); + + expect(manifest.chunks).toHaveLength(6); + + await service.createTree({ manifest }); + + const rootManifest = findLastManifestBlob(blobs, codec); + expect(rootManifest.version).toBe(2); + expect(rootManifest.chunks).toEqual([]); + expect(rootManifest.subManifests).toHaveLength(2); + + // Group 0: 5 chunks, Group 1: 1 chunk + expect(rootManifest.subManifests[0].chunkCount).toBe(5); + expect(rootManifest.subManifests[0].startIndex).toBe(0); + expect(rootManifest.subManifests[1].chunkCount).toBe(1); + expect(rootManifest.subManifests[1].startIndex).toBe(5); + }); +}); + +// --------------------------------------------------------------------------- +// 9. v2 with encryption round-trip +// --------------------------------------------------------------------------- +describe('CasService Merkle – v2 with encryption round-trip', () => { + it('stores encrypted data exceeding threshold, then restores byte-identical data', async () => { + const { service } = setup(5); + const encryptionKey = randomBytes(32); + + const original = generateBuffer(8 * 1024); // 8+ encrypted chunks (ciphertext may differ in size) + const manifest = await service.store({ + source: bufferSource(original), + slug: 'encrypted-merkle', + filename: 'encrypted.bin', + encryptionKey, + }); + + // Encrypted content should exceed threshold and trigger v2 + expect(manifest.chunks.length).toBeGreaterThan(5); + expect(manifest.encryption).toBeDefined(); + expect(manifest.encryption.encrypted).toBe(true); + + const treeOid = await service.createTree({ manifest }); + const readBack = await service.readManifest({ treeOid }); + + expect(readBack).toBeInstanceOf(Manifest); + expect(readBack.chunks.length).toBe(manifest.chunks.length); + expect(readBack.encryption).toBeDefined(); + expect(readBack.encryption.encrypted).toBe(true); + + const { buffer: restored } = await service.restore({ + manifest: readBack, + encryptionKey, + }); + + expect(restored.equals(original)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 10. Fuzz: round-trip across various chunk counts +// --------------------------------------------------------------------------- +describe('CasService Merkle – fuzz round-trip across various chunk counts', () => { + const chunkCounts = [1, 5, 6, 10, 25]; + + for (const count of chunkCounts) { + it(`round-trips ${count} chunk(s) correctly (${count <= 5 ? 'v1' : 'v2'})`, async () => { + const { service, blobs, codec } = setup(5); + + const original = generateBuffer(count * 1024); + const manifest = await service.store({ + source: bufferSource(original), + slug: `fuzz-${count}`, + filename: `fuzz-${count}.bin`, + }); + + expect(manifest.chunks).toHaveLength(count); + + const treeOid = await service.createTree({ manifest }); + + // Verify manifest version in the blob store + const rootManifest = findLastManifestBlob(blobs, codec); + if (count <= 5) { + expect(rootManifest.version).toBe(1); + expect(rootManifest.subManifests).toBeUndefined(); + expect(rootManifest.chunks).toHaveLength(count); + } else { + expect(rootManifest.version).toBe(2); + expect(rootManifest.chunks).toEqual([]); + expect(rootManifest.subManifests).toBeDefined(); + expect(rootManifest.subManifests.length).toBe(Math.ceil(count / 5)); + + // Verify total chunk count across all sub-manifests + const totalChunks = rootManifest.subManifests.reduce((sum, ref) => sum + ref.chunkCount, 0); + expect(totalChunks).toBe(count); + } + + // Full round-trip: readManifest + restore + const readBack = await service.readManifest({ treeOid }); + expect(readBack.chunks).toHaveLength(count); + + const { buffer: restored } = await service.restore({ manifest: readBack }); + expect(restored.equals(original)).toBe(true); + }); + } +}); + +// --------------------------------------------------------------------------- +// Utility: find manifest blobs in the in-memory store +// --------------------------------------------------------------------------- + +/** + * Finds the first manifest-like JSON blob in the blobs Map. + * Manifest blobs contain "slug" and "filename" fields. + * Returns the decoded object, or undefined if not found. + */ +function findManifestBlob(blobs, codec) { + for (const [, buf] of blobs) { + try { + const decoded = codec.decode(buf); + if (decoded && typeof decoded.slug === 'string' && typeof decoded.filename === 'string') { + return decoded; + } + } catch { + // Not a JSON blob, skip + } + } + return undefined; +} + +/** + * Finds the last manifest-like JSON blob in the blobs Map. + * When Merkle splitting occurs, sub-manifests are written first and the root + * manifest is written last, so iterating to the end yields the root. + */ +function findLastManifestBlob(blobs, codec) { + let last; + for (const [, buf] of blobs) { + try { + const decoded = codec.decode(buf); + if (decoded && typeof decoded.slug === 'string' && typeof decoded.filename === 'string') { + last = decoded; + } + } catch { + // Not a JSON blob, skip + } + } + return last; +} From f978a286ab1feae921fc6da08fe17c0de5747d0b Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:49:48 -0800 Subject: [PATCH 02/12] fix(docs): remove incorrect `hash` field from KDF example in GUIDE The KDF output example showed `hash: 'sha-512'` but KdfParams schema does not include a `hash` field. Remove it to match the actual output. --- GUIDE.md | 1 - 1 file changed, 1 deletion(-) diff --git a/GUIDE.md b/GUIDE.md index cb388ad..78c088d 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -844,7 +844,6 @@ console.log(manifest.encryption.kdf); // algorithm: 'pbkdf2', // salt: 'base64-encoded-salt', // iterations: 100000, -// hash: 'sha-512', // keyLength: 32 // } ``` From 6d3a386b149fe81da5d50697e0d2ceabb6587d05 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:50:15 -0800 Subject: [PATCH 03/12] fix: forward merkleThreshold from ContentAddressableStore to CasService The facade constructor now accepts and forwards merkleThreshold to CasService, matching the GUIDE documentation example. --- index.d.ts | 1 + index.js | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/index.d.ts b/index.d.ts index 2ed4f24..e684c5e 100644 --- a/index.d.ts +++ b/index.d.ts @@ -79,6 +79,7 @@ export interface ContentAddressableStoreOptions { codec?: CodecPort; crypto?: CryptoPort; policy?: unknown; + merkleThreshold?: number; } /** diff --git a/index.js b/index.js index 8fac546..753be66 100644 --- a/index.js +++ b/index.js @@ -55,13 +55,15 @@ export default class ContentAddressableStore { * @param {import('./src/ports/CodecPort.js').default} [options.codec] - Manifest codec (default JsonCodec). * @param {import('./src/ports/CryptoPort.js').default} [options.crypto] - Crypto adapter (auto-detected if omitted). * @param {import('@git-stunts/alfred').Policy} [options.policy] - Resilience policy for Git I/O. + * @param {number} [options.merkleThreshold=1000] - Chunk count threshold for Merkle manifests. */ - constructor({ plumbing, chunkSize, codec, policy, crypto }) { + constructor({ plumbing, chunkSize, codec, policy, crypto, merkleThreshold }) { this.plumbing = plumbing; this.chunkSizeConfig = chunkSize; this.codecConfig = codec; this.policyConfig = policy; this.cryptoConfig = crypto; + this.merkleThresholdConfig = merkleThreshold; this.service = null; this.#servicePromise = null; } @@ -96,6 +98,7 @@ export default class ContentAddressableStore { chunkSize: this.chunkSizeConfig, codec: this.codecConfig || new JsonCodec(), crypto, + merkleThreshold: this.merkleThresholdConfig, }); return this.service; } From 3d5adce4a8cc7aa1f4dd64cbc29373ac1f34664c Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:50:36 -0800 Subject: [PATCH 04/12] fix: forward passphrase, kdfOptions, compression through storeFile() storeFile() now accepts and forwards passphrase, kdfOptions, and compression options to service.store(), matching the store() API. --- index.d.ts | 3 +++ index.js | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/index.d.ts b/index.d.ts index e684c5e..a2b900d 100644 --- a/index.d.ts +++ b/index.d.ts @@ -123,6 +123,9 @@ export default class ContentAddressableStore { slug: string; filename?: string; encryptionKey?: Buffer; + passphrase?: string; + kdfOptions?: Omit; + compression?: { algorithm: "gzip" }; }): Promise; store(options: { diff --git a/index.js b/index.js index 753be66..a3f9ca4 100644 --- a/index.js +++ b/index.js @@ -179,9 +179,12 @@ export default class ContentAddressableStore { * @param {string} options.slug - Logical identifier for the stored asset. * @param {string} [options.filename] - Override filename (defaults to basename of filePath). * @param {Buffer} [options.encryptionKey] - 32-byte key for AES-256-GCM encryption. + * @param {string} [options.passphrase] - Derive encryption key from passphrase. + * @param {Object} [options.kdfOptions] - KDF options when using passphrase. + * @param {{ algorithm: 'gzip' }} [options.compression] - Enable compression. * @returns {Promise} The resulting manifest. */ - async storeFile({ filePath, slug, filename, encryptionKey }) { + async storeFile({ filePath, slug, filename, encryptionKey, passphrase, kdfOptions, compression }) { const source = createReadStream(filePath); const service = await this.#getService(); return await service.store({ @@ -189,6 +192,9 @@ export default class ContentAddressableStore { slug, filename: filename || path.basename(filePath), encryptionKey, + passphrase, + kdfOptions, + compression, }); } From a519b03b0d53eef5de6b167d02385e14473a5425 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:50:51 -0800 Subject: [PATCH 05/12] fix: use Buffer.from() for salt encoding in NodeCryptoAdapter Uint8Array.prototype.toString() ignores encoding arguments, producing comma-separated byte lists instead of base64. Wrap with Buffer.from() to ensure correct base64 output matching BunCryptoAdapter behavior. --- src/infrastructure/adapters/NodeCryptoAdapter.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/infrastructure/adapters/NodeCryptoAdapter.js b/src/infrastructure/adapters/NodeCryptoAdapter.js index 86a2559..c638322 100644 --- a/src/infrastructure/adapters/NodeCryptoAdapter.js +++ b/src/infrastructure/adapters/NodeCryptoAdapter.js @@ -81,7 +81,7 @@ export default class NodeCryptoAdapter extends CryptoPort { let key; const params = { algorithm, - salt: saltBuf.toString('base64'), + salt: Buffer.from(saltBuf).toString('base64'), keyLength, }; @@ -101,7 +101,7 @@ export default class NodeCryptoAdapter extends CryptoPort { throw new Error(`Unsupported KDF algorithm: ${algorithm}`); } - return { key, salt: saltBuf, params }; + return { key, salt: Buffer.from(saltBuf), params }; } /** From 133375e55d834d352fd70a78a91c4e34288a24e3 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:51:03 -0800 Subject: [PATCH 06/12] fix: validate KDF algorithm in WebCryptoAdapter Previously any non-pbkdf2 algorithm silently fell through to scrypt. Now explicitly validates and throws for unsupported algorithms, matching NodeCryptoAdapter and BunCryptoAdapter behavior. --- src/infrastructure/adapters/WebCryptoAdapter.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/infrastructure/adapters/WebCryptoAdapter.js b/src/infrastructure/adapters/WebCryptoAdapter.js index 0078fbc..7536679 100644 --- a/src/infrastructure/adapters/WebCryptoAdapter.js +++ b/src/infrastructure/adapters/WebCryptoAdapter.js @@ -132,9 +132,14 @@ export default class WebCryptoAdapter extends CryptoPort { const params = { algorithm, salt: this.#toBase64(saltBuf), keyLength }; const opts = { passphrase, saltBuf, iterations, cost, blockSize, parallelization, keyLength, params }; - const key = algorithm === 'pbkdf2' - ? await this.#derivePbkdf2(opts) - : await this.#deriveScrypt(opts); + let key; + if (algorithm === 'pbkdf2') { + key = await this.#derivePbkdf2(opts); + } else if (algorithm === 'scrypt') { + key = await this.#deriveScrypt(opts); + } else { + throw new Error(`Unsupported KDF algorithm: ${algorithm}`); + } return { key: Buffer.from(key), salt: Buffer.from(saltBuf), params }; } From d45a08f86be3b01826f419e4ff80b269d8fb90c2 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:51:21 -0800 Subject: [PATCH 07/12] fix: throw descriptive error when scrypt unavailable in WebCryptoAdapter The scrypt KDF path imports node:crypto which isn't available in browsers. Wrap the import in try/catch so non-Node runtimes get a clear error instead of an opaque import failure. --- src/infrastructure/adapters/WebCryptoAdapter.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/infrastructure/adapters/WebCryptoAdapter.js b/src/infrastructure/adapters/WebCryptoAdapter.js index 7536679..e816fd6 100644 --- a/src/infrastructure/adapters/WebCryptoAdapter.js +++ b/src/infrastructure/adapters/WebCryptoAdapter.js @@ -158,8 +158,13 @@ export default class WebCryptoAdapter extends CryptoPort { } async #deriveScrypt({ passphrase, saltBuf, cost, blockSize, parallelization, keyLength, params }) { - const { scrypt: scryptCb } = await import('node:crypto'); - const { promisify: promisifyFn } = await import('node:util'); + let scryptCb, promisifyFn; + try { + ({ scrypt: scryptCb } = await import('node:crypto')); + ({ promisify: promisifyFn } = await import('node:util')); + } catch { + throw new Error('scrypt KDF requires a Node.js-compatible runtime (node:crypto unavailable)'); + } const key = await promisifyFn(scryptCb)(passphrase, saltBuf, keyLength, { N: cost, r: blockSize, p: parallelization, }); From 04229c6410d919079a0ee7b3f7de8db4fba773a0 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 14:51:39 -0800 Subject: [PATCH 08/12] docs: update CHANGELOG with PR review fixes Add Fixed section covering storeFile param forwarding, NodeCryptoAdapter salt encoding, and WebCryptoAdapter KDF validation. --- CHANGELOG.md | 9 ++++++++- src/infrastructure/adapters/WebCryptoAdapter.js | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 993da8a..70535c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **BREAKING**: Manifest schema now includes `version` field (defaults to 1). Existing v1 manifests are fully backward-compatible. - `CasService` constructor accepts new `merkleThreshold` option. -- `store()` accepts `passphrase`, `kdfOptions`, and `compression` options. +- `ContentAddressableStore` constructor now accepts and forwards `merkleThreshold` to `CasService`. +- `store()` and `storeFile()` accept `passphrase`, `kdfOptions`, and `compression` options. - `restore()` accepts `passphrase` option. +### Fixed +- `storeFile()` now forwards `passphrase`, `kdfOptions`, and `compression` options to `store()` (previously silently dropped). +- `NodeCryptoAdapter.deriveKey()` uses `Buffer.from(salt)` for base64 encoding, preventing corrupt output when salt is a `Uint8Array`. +- `WebCryptoAdapter.deriveKey()` now validates KDF algorithm and throws for unsupported values instead of silently falling through to scrypt. +- `WebCryptoAdapter` scrypt derivation now throws a descriptive error when `node:crypto` is unavailable (e.g. in browsers). + ## [1.6.2] — OIDC publishing + JSR docs coverage (2026-02-07) ### Added diff --git a/src/infrastructure/adapters/WebCryptoAdapter.js b/src/infrastructure/adapters/WebCryptoAdapter.js index e816fd6..02b338f 100644 --- a/src/infrastructure/adapters/WebCryptoAdapter.js +++ b/src/infrastructure/adapters/WebCryptoAdapter.js @@ -158,7 +158,8 @@ export default class WebCryptoAdapter extends CryptoPort { } async #deriveScrypt({ passphrase, saltBuf, cost, blockSize, parallelization, keyLength, params }) { - let scryptCb, promisifyFn; + let scryptCb; + let promisifyFn; try { ({ scrypt: scryptCb } = await import('node:crypto')); ({ promisify: promisifyFn } = await import('node:util')); From 73e9c47e4aff3d2485a59f41d37149bf662d8695 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sat, 7 Feb 2026 23:04:29 -0800 Subject: [PATCH 09/12] docs: fix orphaned JSDoc blocks and stale cross-references - Move restore() JSDoc directly above its method (was orphaned by inserted helpers) - Move verifyIntegrity() JSDoc directly above its method (was orphaned by deriveKey) - Add missing passphrase param to restore() JSDoc - Update storeFile/restoreFile signatures in API.md to include v2 params - Fix stale "Section 10" cross-reference to "Section 13" in GUIDE.md --- GUIDE.md | 2 +- docs/API.md | 4 ++-- src/domain/services/CasService.js | 37 ++++++++++++++++--------------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/GUIDE.md b/GUIDE.md index 78c088d..1428845 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -1380,7 +1380,7 @@ Every Git plumbing command is wrapped in a policy from `@git-stunts/alfred`. The default policy applies a 30-second timeout and retries up to 2 times with exponential backoff (100ms, then up to 2s). This handles transient filesystem errors and lock contention gracefully. You can override the policy at -construction time (see Section 10). +construction time (see Section 13). --- diff --git a/docs/API.md b/docs/API.md index b7774eb..4114675 100644 --- a/docs/API.md +++ b/docs/API.md @@ -147,7 +147,7 @@ const manifest = await cas.store({ #### storeFile ```javascript -await cas.storeFile({ filePath, slug, filename, encryptionKey }) +await cas.storeFile({ filePath, slug, filename, encryptionKey, passphrase, kdfOptions, compression }) ``` Convenience method that opens a file and stores it. @@ -208,7 +208,7 @@ const { buffer, bytesWritten } = await cas.restore({ manifest }); #### restoreFile ```javascript -await cas.restoreFile({ manifest, encryptionKey, outputPath }) +await cas.restoreFile({ manifest, encryptionKey, passphrase, outputPath }) ``` Restores content from a manifest and writes it to a file. diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index 1773ae5..f6d300a 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -346,19 +346,6 @@ export default class CasService extends EventEmitter { return buffers; } - /** - * Restores a file from its manifest by reading and reassembling chunks. - * - * If the manifest has encryption metadata, decrypts the reassembled - * ciphertext using the provided key. - * - * @param {Object} options - * @param {import('../value-objects/Manifest.js').default} options.manifest - The file manifest. - * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. - * @returns {Promise<{ buffer: Buffer, bytesWritten: number }>} - * @throws {CasError} MISSING_KEY if manifest is encrypted but no key is provided. - * @throws {CasError} INTEGRITY_ERROR if chunk verification or decryption fails. - */ /** * Resolves the encryption key from a passphrase using KDF params from the manifest. * @private @@ -396,6 +383,20 @@ export default class CasService extends EventEmitter { return Promise.resolve(encryptionKey); } + /** + * Restores a file from its manifest by reading and reassembling chunks. + * + * If the manifest has encryption metadata, decrypts the reassembled + * ciphertext using the provided key. + * + * @param {Object} options + * @param {import('../value-objects/Manifest.js').default} options.manifest - The file manifest. + * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. + * @param {string} [options.passphrase] - Passphrase for KDF-based decryption. + * @returns {Promise<{ buffer: Buffer, bytesWritten: number }>} + * @throws {CasError} MISSING_KEY if manifest is encrypted but no key is provided. + * @throws {CasError} INTEGRITY_ERROR if chunk verification or decryption fails. + */ async restore({ manifest, encryptionKey, passphrase }) { const key = await this._resolveEncryptionKey(manifest, encryptionKey, passphrase); @@ -548,11 +549,6 @@ export default class CasService extends EventEmitter { return { referenced, total }; } - /** - * Verifies the integrity of a stored file by re-hashing its chunks. - * @param {import('../value-objects/Manifest.js').default} manifest - * @returns {Promise} - */ /** * Derives an encryption key from a passphrase using PBKDF2 or scrypt. * @param {Object} options @@ -570,6 +566,11 @@ export default class CasService extends EventEmitter { return await this.crypto.deriveKey(options); } + /** + * Verifies the integrity of a stored file by re-hashing its chunks. + * @param {import('../value-objects/Manifest.js').default} manifest + * @returns {Promise} + */ async verifyIntegrity(manifest) { for (const chunk of manifest.chunks) { const blob = await this.persistence.readBlob(chunk.blob); From 1cc94af8b0cf21482a98459a66b2baf26bcc5c01 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sun, 8 Feb 2026 09:02:40 -0800 Subject: [PATCH 10/12] fix: include sub-manifest blobs as tree entries to survive git gc Sub-manifest blobs were written to Git's ODB but not added as tree entries, making them unreachable and subject to garbage collection. Add them as `sub-manifest-N.json` entries in the tree. Also: use static imports for node:zlib/node:stream, validate merkleThreshold, and update CasService docs in API.md. --- docs/API.md | 7 +- src/domain/services/CasService.js | 13 +++- .../domain/services/CasService.merkle.test.js | 67 ++++++++++++++++++- 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/docs/API.md b/docs/API.md index 4114675..780efe9 100644 --- a/docs/API.md +++ b/docs/API.md @@ -485,7 +485,7 @@ Core domain service implementing CAS operations. Usually accessed via ContentAdd ### Constructor ```javascript -new CasService({ persistence, codec, crypto, chunkSize }) +new CasService({ persistence, codec, crypto, chunkSize, merkleThreshold }) ``` **Parameters:** @@ -518,8 +518,8 @@ const service = new CasService({ All methods from ContentAddressableStore delegate to CasService. See ContentAddressableStore documentation above for: -- `store({ source, slug, filename, encryptionKey })` -- `restore({ manifest, encryptionKey })` +- `store({ source, slug, filename, encryptionKey, passphrase, kdfOptions, compression })` +- `restore({ manifest, encryptionKey, passphrase })` - `createTree({ manifest })` - `verifyIntegrity(manifest)` - `readManifest({ treeOid })` @@ -527,6 +527,7 @@ All methods from ContentAddressableStore delegate to CasService. See ContentAddr - `findOrphanedChunks({ treeOids })` - `encrypt({ buffer, key })` - `decrypt({ buffer, key, meta })` +- `deriveKey(options)` ### EventEmitter diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index f6d300a..b7c9840 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -4,7 +4,8 @@ * @module */ import { EventEmitter } from 'node:events'; -import { gunzip } from 'node:zlib'; +import { gunzip, createGzip } from 'node:zlib'; +import { Readable } from 'node:stream'; import { promisify } from 'node:util'; import Manifest from '../value-objects/Manifest.js'; import CasError from '../errors/CasError.js'; @@ -44,6 +45,9 @@ export default class CasService extends EventEmitter { this.codec = codec; this.crypto = crypto; this.chunkSize = chunkSize; + if (!Number.isInteger(merkleThreshold) || merkleThreshold < 1) { + throw new Error('Merkle threshold must be a positive integer'); + } this.merkleThreshold = merkleThreshold; } @@ -172,8 +176,6 @@ export default class CasService extends EventEmitter { * @returns {AsyncIterable} */ async *_compressStream(source) { - const { createGzip } = await import('node:zlib'); - const { Readable } = await import('node:stream'); const gz = createGzip(); const input = Readable.from(source); const compressed = input.pipe(gz); @@ -309,8 +311,13 @@ export default class CasService extends EventEmitter { const serializedRoot = this.codec.encode(rootManifestData); const rootOid = await this.persistence.writeBlob(serializedRoot); + const subManifestEntries = subManifestRefs.map( + (ref, idx) => `100644 blob ${ref.oid}\tsub-manifest-${idx}.${this.codec.extension}`, + ); + const treeEntries = [ `100644 blob ${rootOid}\tmanifest.${this.codec.extension}`, + ...subManifestEntries, ...chunkBlobEntries, ]; diff --git a/test/unit/domain/services/CasService.merkle.test.js b/test/unit/domain/services/CasService.merkle.test.js index fc33ff3..7cd9eb1 100644 --- a/test/unit/domain/services/CasService.merkle.test.js +++ b/test/unit/domain/services/CasService.merkle.test.js @@ -277,7 +277,55 @@ describe('CasService Merkle – sub-manifest references have correct startIndex }); // --------------------------------------------------------------------------- -// 7. Exactly at threshold boundary uses v1 +// 7. Sub-manifest blobs are included as tree entries (survive git gc) +// --------------------------------------------------------------------------- +describe('CasService Merkle – sub-manifest blobs are included as tree entries', () => { + it('includes sub-manifest entries in the Git tree so they survive gc', async () => { + const { service, trees, codec } = setup(5); + + // 12 chunks -> 3 sub-manifests + const data = generateBuffer(12 * 1024); + const manifest = await service.store({ + source: bufferSource(data), + slug: 'tree-entry-test', + filename: 'tree-entry.bin', + }); + + const treeOid = await service.createTree({ manifest }); + const treeEntries = trees.get(treeOid); + + // Find sub-manifest entries by name pattern + const subManifestEntries = treeEntries.filter((e) => + e.includes(`sub-manifest-`), + ); + expect(subManifestEntries).toHaveLength(3); + expect(subManifestEntries[0]).toContain(`sub-manifest-0.${codec.extension}`); + expect(subManifestEntries[1]).toContain(`sub-manifest-1.${codec.extension}`); + expect(subManifestEntries[2]).toContain(`sub-manifest-2.${codec.extension}`); + }); + + it('does not include sub-manifest entries for v1 manifests', async () => { + const { service, trees } = setup(5); + + const data = generateBuffer(3 * 1024); // 3 chunks, below threshold + const manifest = await service.store({ + source: bufferSource(data), + slug: 'v1-no-sub', + filename: 'v1.bin', + }); + + const treeOid = await service.createTree({ manifest }); + const treeEntries = trees.get(treeOid); + + const subManifestEntries = treeEntries.filter((e) => + e.includes('sub-manifest-'), + ); + expect(subManifestEntries).toHaveLength(0); + }); +}); + +// --------------------------------------------------------------------------- +// 8. Exactly at threshold boundary uses v1 // --------------------------------------------------------------------------- describe('CasService Merkle – exactly at threshold boundary uses v1', () => { it('stores exactly 5 chunks (= threshold) and produces a v1 manifest', async () => { @@ -418,6 +466,23 @@ describe('CasService Merkle – fuzz round-trip across various chunk counts', () } }); +// --------------------------------------------------------------------------- +// 11. merkleThreshold validation +// --------------------------------------------------------------------------- +describe('CasService Merkle – merkleThreshold validation', () => { + it('rejects merkleThreshold of 0', () => { + expect(() => setup(0)).toThrow('Merkle threshold must be a positive integer'); + }); + + it('rejects negative merkleThreshold', () => { + expect(() => setup(-1)).toThrow('Merkle threshold must be a positive integer'); + }); + + it('rejects non-integer merkleThreshold', () => { + expect(() => setup(1.5)).toThrow('Merkle threshold must be a positive integer'); + }); +}); + // --------------------------------------------------------------------------- // Utility: find manifest blobs in the in-memory store // --------------------------------------------------------------------------- From 42dd58a305c62b0c2c31d1f1f9d43fe0d7f46e93 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sun, 8 Feb 2026 09:11:32 -0800 Subject: [PATCH 11/12] fix: guard against ambiguous options and validate compression algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reject providing both passphrase and encryptionKey in store/restore - Reject unsupported compression algorithms (only gzip supported) - Better error when passphrase given but manifest lacks KDF metadata - Make _resolveEncryptionKey async for consistent error propagation - Fix test section numbering in merkle tests (7→12) - Document merkleThreshold validation error in API.md --- docs/API.md | 5 +- src/domain/services/CasService.js | 57 ++++++++++--- .../domain/services/CasService.errors.test.js | 80 +++++++++++++++++++ .../domain/services/CasService.merkle.test.js | 8 +- 4 files changed, 132 insertions(+), 18 deletions(-) diff --git a/docs/API.md b/docs/API.md index 780efe9..ac6b5d1 100644 --- a/docs/API.md +++ b/docs/API.md @@ -496,7 +496,10 @@ new CasService({ persistence, codec, crypto, chunkSize, merkleThreshold }) - `chunkSize` (optional): `number` - Chunk size in bytes (default: 262144, minimum: 1024) - `merkleThreshold` (optional): `number` - Chunk count threshold for Merkle manifests (default: 1000) -**Throws:** `Error` if chunkSize is less than 1024 bytes +**Throws:** + +- `Error` if chunkSize is less than 1024 bytes +- `Error` if merkleThreshold is not a positive integer **Example:** diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index b7c9840..b967426 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -200,7 +200,36 @@ export default class CasService extends EventEmitter { * @param {{ algorithm: 'gzip' }} [options.compression] - Enable compression. * @returns {Promise} */ + /** + * Validates that passphrase and encryptionKey are not both provided. + * @private + */ + _validateKeySourceExclusive(encryptionKey, passphrase) { + if (passphrase && encryptionKey) { + throw new CasError( + 'Provide either encryptionKey or passphrase, not both', + 'INVALID_OPTIONS', + ); + } + } + + /** + * Validates and normalizes compression options. + * @private + */ + _validateCompression(compression) { + if (compression?.algorithm && compression.algorithm !== 'gzip') { + throw new CasError( + `Unsupported compression algorithm: ${compression.algorithm}`, + 'INVALID_OPTIONS', + ); + } + } + async store({ source, slug, filename, encryptionKey, passphrase, kdfOptions, compression }) { + this._validateKeySourceExclusive(encryptionKey, passphrase); + this._validateCompression(compression); + let kdfParams; if (passphrase) { const derived = await this.deriveKey({ passphrase, ...kdfOptions }); @@ -212,17 +241,12 @@ export default class CasService extends EventEmitter { this._validateKey(encryptionKey); } - const manifestData = { - slug, - filename, - size: 0, - chunks: [], - }; + const manifestData = { slug, filename, size: 0, chunks: [] }; let processedSource = source; if (compression) { processedSource = this._compressStream(processedSource); - manifestData.compression = { algorithm: compression.algorithm || 'gzip' }; + manifestData.compression = { algorithm: 'gzip' }; } if (encryptionKey) { @@ -377,17 +401,24 @@ export default class CasService extends EventEmitter { * Resolves the encryption key from passphrase or validates the provided key. * @private */ - _resolveEncryptionKey(manifest, encryptionKey, passphrase) { - if (passphrase && manifest.encryption?.kdf) { - return this._resolveKeyFromPassphrase(passphrase, manifest.encryption.kdf); + async _resolveEncryptionKey(manifest, encryptionKey, passphrase) { + this._validateKeySourceExclusive(encryptionKey, passphrase); + + if (passphrase) { + if (manifest.encryption?.kdf) { + return this._resolveKeyFromPassphrase(passphrase, manifest.encryption.kdf); + } + throw new CasError( + 'Manifest was not stored with passphrase-based encryption; provide encryptionKey instead', + 'MISSING_KEY', + ); } if (encryptionKey) { this._validateKey(encryptionKey); - } - if (manifest.encryption?.encrypted && !encryptionKey) { + } else if (manifest.encryption?.encrypted) { throw new CasError('Encryption key required to restore encrypted content', 'MISSING_KEY'); } - return Promise.resolve(encryptionKey); + return encryptionKey; } /** diff --git a/test/unit/domain/services/CasService.errors.test.js b/test/unit/domain/services/CasService.errors.test.js index e0c4cff..19affae 100644 --- a/test/unit/domain/services/CasService.errors.test.js +++ b/test/unit/domain/services/CasService.errors.test.js @@ -43,6 +43,86 @@ describe('CasService – constructor – chunkSize validation', () => { }); }); +describe('CasService – store – mutual exclusion and validation', () => { + let service; + + beforeEach(() => { + service = new CasService({ + persistence: { + writeBlob: vi.fn().mockResolvedValue('mock-blob-oid'), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockResolvedValue(Buffer.from('data')), + }, + crypto: new NodeCryptoAdapter(), + codec: new JsonCodec(), + chunkSize: 1024, + }); + }); + + it('rejects when both passphrase and encryptionKey are provided', async () => { + await expect( + service.store({ + source: (async function* () { yield Buffer.from('x'); })(), + slug: 'both', + filename: 'both.bin', + encryptionKey: Buffer.alloc(32), + passphrase: 'secret', + }), + ).rejects.toThrow('Provide either encryptionKey or passphrase, not both'); + }); + + it('rejects unsupported compression algorithm', async () => { + await expect( + service.store({ + source: (async function* () { yield Buffer.from('x'); })(), + slug: 'brotli', + filename: 'brotli.bin', + compression: { algorithm: 'brotli' }, + }), + ).rejects.toThrow('Unsupported compression algorithm: brotli'); + }); +}); + +describe('CasService – restore – mutual exclusion', () => { + let service; + + beforeEach(() => { + service = new CasService({ + persistence: { + writeBlob: vi.fn().mockResolvedValue('mock-blob-oid'), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockResolvedValue(Buffer.from('data')), + }, + crypto: new NodeCryptoAdapter(), + codec: new JsonCodec(), + chunkSize: 1024, + }); + }); + + it('rejects when both passphrase and encryptionKey are provided', async () => { + const manifest = new Manifest({ + slug: 'test', filename: 'test.bin', size: 0, chunks: [], + encryption: { + algorithm: 'aes-256-gcm', nonce: 'abc', tag: 'def', encrypted: true, + kdf: { algorithm: 'pbkdf2', salt: 'c2FsdA==', iterations: 1000, keyLength: 32 }, + }, + }); + await expect( + service.restore({ manifest, encryptionKey: Buffer.alloc(32), passphrase: 'secret' }), + ).rejects.toThrow('Provide either encryptionKey or passphrase, not both'); + }); + + it('rejects passphrase when manifest has no KDF metadata', async () => { + const manifest = new Manifest({ + slug: 'test', filename: 'test.bin', size: 0, chunks: [], + encryption: { algorithm: 'aes-256-gcm', nonce: 'abc', tag: 'def', encrypted: true }, + }); + await expect( + service.restore({ manifest, passphrase: 'secret' }), + ).rejects.toThrow('Manifest was not stored with passphrase-based encryption'); + }); +}); + describe('CasService – store', () => { let mockPersistence; diff --git a/test/unit/domain/services/CasService.merkle.test.js b/test/unit/domain/services/CasService.merkle.test.js index 7cd9eb1..22ef71a 100644 --- a/test/unit/domain/services/CasService.merkle.test.js +++ b/test/unit/domain/services/CasService.merkle.test.js @@ -350,7 +350,7 @@ describe('CasService Merkle – exactly at threshold boundary uses v1', () => { }); // --------------------------------------------------------------------------- -// 8. One above threshold uses v2 +// 9. One above threshold uses v2 // --------------------------------------------------------------------------- describe('CasService Merkle – one above threshold uses v2', () => { it('stores 6 chunks (threshold + 1) and produces a v2 manifest', async () => { @@ -381,7 +381,7 @@ describe('CasService Merkle – one above threshold uses v2', () => { }); // --------------------------------------------------------------------------- -// 9. v2 with encryption round-trip +// 10. v2 with encryption round-trip // --------------------------------------------------------------------------- describe('CasService Merkle – v2 with encryption round-trip', () => { it('stores encrypted data exceeding threshold, then restores byte-identical data', async () => { @@ -419,7 +419,7 @@ describe('CasService Merkle – v2 with encryption round-trip', () => { }); // --------------------------------------------------------------------------- -// 10. Fuzz: round-trip across various chunk counts +// 11. Fuzz: round-trip across various chunk counts // --------------------------------------------------------------------------- describe('CasService Merkle – fuzz round-trip across various chunk counts', () => { const chunkCounts = [1, 5, 6, 10, 25]; @@ -467,7 +467,7 @@ describe('CasService Merkle – fuzz round-trip across various chunk counts', () }); // --------------------------------------------------------------------------- -// 11. merkleThreshold validation +// 12. merkleThreshold validation // --------------------------------------------------------------------------- describe('CasService Merkle – merkleThreshold validation', () => { it('rejects merkleThreshold of 0', () => { From 2db4bbd678e6f1457e429bf602cb2759469b3b26 Mon Sep 17 00:00:00 2001 From: James Ross Date: Sun, 8 Feb 2026 09:22:29 -0800 Subject: [PATCH 12/12] fix: wrap decompression errors as CasError and fix orphaned store() JSDoc - Wrap gunzipAsync in try/catch, throw CasError INTEGRITY_ERROR on failure - Move _validateKeySourceExclusive and _validateCompression above store() JSDoc so the JSDoc attaches to the correct method - Document INVALID_OPTIONS error code in API.md (store/restore throws, error codes table) - Document INTEGRITY_ERROR for decompression failure in restore() throws --- docs/API.md | 5 +++ src/domain/services/CasService.js | 39 +++++++++++-------- .../services/CasService.compression.test.js | 24 +++++++++++- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/docs/API.md b/docs/API.md index ac6b5d1..63b2153 100644 --- a/docs/API.md +++ b/docs/API.md @@ -130,6 +130,8 @@ Stores content from an async iterable source. - `CasError` with code `INVALID_KEY_TYPE` if encryptionKey is not a Buffer - `CasError` with code `INVALID_KEY_LENGTH` if encryptionKey is not 32 bytes - `CasError` with code `STREAM_ERROR` if the source stream fails +- `CasError` with code `INVALID_OPTIONS` if both `passphrase` and `encryptionKey` are provided +- `CasError` with code `INVALID_OPTIONS` if an unsupported compression algorithm is specified **Example:** @@ -198,6 +200,8 @@ Restores content from a manifest and returns the buffer. - `CasError` with code `INVALID_KEY_LENGTH` if encryptionKey is not 32 bytes - `CasError` with code `INTEGRITY_ERROR` if chunk digest verification fails - `CasError` with code `INTEGRITY_ERROR` if decryption fails +- `CasError` with code `INTEGRITY_ERROR` if decompression fails +- `CasError` with code `INVALID_OPTIONS` if both `passphrase` and `encryptionKey` are provided **Example:** @@ -1119,6 +1123,7 @@ new CasError(message, code, meta) | `STREAM_ERROR` | Stream error occurred during store operation | `store()` | | `MANIFEST_NOT_FOUND` | No manifest entry found in the Git tree | `readManifest()`, `deleteAsset()`, `findOrphanedChunks()` | | `GIT_ERROR` | Underlying Git plumbing command failed | `readManifest()`, `deleteAsset()`, `findOrphanedChunks()` | +| `INVALID_OPTIONS` | Mutually exclusive options provided or unsupported option value | `store()`, `restore()` | ### Error Handling diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index b967426..3e5e070 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -184,22 +184,6 @@ export default class CasService extends EventEmitter { } } - /** - * Chunks an async iterable source and stores it in Git. - * - * If `encryptionKey` is provided, the content (and manifest) will be encrypted - * using AES-256-GCM, and the `encryption` field in the manifest will be populated. - * - * @param {Object} options - * @param {AsyncIterable} options.source - * @param {string} options.slug - * @param {string} options.filename - * @param {Buffer} [options.encryptionKey] - * @param {string} [options.passphrase] - Derive encryption key from passphrase instead. - * @param {Object} [options.kdfOptions] - KDF options when using passphrase. - * @param {{ algorithm: 'gzip' }} [options.compression] - Enable compression. - * @returns {Promise} - */ /** * Validates that passphrase and encryptionKey are not both provided. * @private @@ -226,6 +210,22 @@ export default class CasService extends EventEmitter { } } + /** + * Chunks an async iterable source and stores it in Git. + * + * If `encryptionKey` is provided, the content (and manifest) will be encrypted + * using AES-256-GCM, and the `encryption` field in the manifest will be populated. + * + * @param {Object} options + * @param {AsyncIterable} options.source + * @param {string} options.slug + * @param {string} options.filename + * @param {Buffer} [options.encryptionKey] + * @param {string} [options.passphrase] - Derive encryption key from passphrase instead. + * @param {Object} [options.kdfOptions] - KDF options when using passphrase. + * @param {{ algorithm: 'gzip' }} [options.compression] - Enable compression. + * @returns {Promise} + */ async store({ source, slug, filename, encryptionKey, passphrase, kdfOptions, compression }) { this._validateKeySourceExclusive(encryptionKey, passphrase); this._validateCompression(compression); @@ -449,7 +449,12 @@ export default class CasService extends EventEmitter { } if (manifest.compression) { - buffer = await gunzipAsync(buffer); + try { + buffer = await gunzipAsync(buffer); + } catch (err) { + if (err instanceof CasError) { throw err; } + throw new CasError(`Decompression failed: ${err.message}`, 'INTEGRITY_ERROR', { originalError: err }); + } } this.emit('file:restored', { diff --git a/test/unit/domain/services/CasService.compression.test.js b/test/unit/domain/services/CasService.compression.test.js index 0937df3..b860f4f 100644 --- a/test/unit/domain/services/CasService.compression.test.js +++ b/test/unit/domain/services/CasService.compression.test.js @@ -261,7 +261,29 @@ describe('CasService compression – backward compatibility', () => { }); // --------------------------------------------------------------------------- -// 8. Fuzz: round-trip across multiple sizes +// 8. Decompression failure wraps zlib error as CasError +// --------------------------------------------------------------------------- +describe('CasService compression – decompression failure wraps as CasError', () => { + it('throws CasError with INTEGRITY_ERROR when decompression fails on corrupt data', async () => { + const { service } = setup(); + + // Store WITHOUT compression so the raw bytes are not gzip-encoded + const original = Buffer.from('This is not gzip data'); + const manifest = await storeBuffer(service, original); + + // Build a new Manifest from the JSON with compression flag injected, + // so restore will attempt gunzip on the non-gzip chunk data. + const Manifest = (await import('../../../../src/domain/value-objects/Manifest.js')).default; + const tweaked = new Manifest({ ...manifest.toJSON(), compression: { algorithm: 'gzip' } }); + + const err = await service.restore({ manifest: tweaked }).catch((e) => e); + expect(err.code).toBe('INTEGRITY_ERROR'); + expect(err.message).toMatch(/Decompression failed/); + }); +}); + +// --------------------------------------------------------------------------- +// 9. Fuzz: round-trip across multiple sizes // --------------------------------------------------------------------------- describe('CasService compression – fuzz round-trip across sizes', () => { let service;