Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: download tars from @helia/verified-fetch #442

Merged
merged 10 commits into from
Feb 22, 2024
9 changes: 8 additions & 1 deletion packages/verified-fetch/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@
"release": "aegir release"
},
"dependencies": {
"@helia/car": "^3.0.0",
"@helia/block-brokers": "^2.0.1",
"@helia/car": "^3.0.0",
"@helia/http": "^1.0.1",
"@helia/interface": "^4.0.0",
"@helia/ipns": "^6.0.0",
Expand All @@ -155,7 +155,11 @@
"@libp2p/peer-id": "^4.0.5",
"cborg": "^4.0.9",
"hashlru": "^2.3.0",
"interface-blockstore": "^5.2.10",
"ipfs-unixfs-exporter": "^13.5.0",
"it-map": "^3.0.5",
"it-pipe": "^3.0.1",
"it-tar": "^6.0.4",
"it-to-browser-readablestream": "^2.0.6",
"multiformats": "^13.1.0",
"progress-events": "^1.0.0"
Expand All @@ -173,9 +177,12 @@
"@types/sinon": "^17.0.3",
"aegir": "^42.2.2",
"blockstore-core": "^4.4.0",
"browser-readablestream-to-it": "^2.0.5",
"datastore-core": "^9.2.8",
"helia": "^4.0.1",
"ipfs-unixfs-importer": "^15.2.4",
"ipns": "^9.0.0",
"it-all": "^3.0.4",
"it-last": "^3.0.4",
"it-to-buffer": "^4.0.5",
"magic-bytes.js": "^1.8.0",
Expand Down
68 changes: 68 additions & 0 deletions packages/verified-fetch/src/utils/get-tar-stream.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { CodeError } from '@libp2p/interface'
import { exporter, recursive, type UnixFSEntry } from 'ipfs-unixfs-exporter'
import map from 'it-map'
import { pipe } from 'it-pipe'
import { pack, type TarEntryHeader, type TarImportCandidate } from 'it-tar'
import type { AbortOptions } from '@libp2p/interface'
import type { Blockstore } from 'interface-blockstore'

const EXPORTABLE = ['file', 'raw', 'directory']

function toHeader (file: UnixFSEntry): Partial<TarEntryHeader> & { name: string } {
let mode: number | undefined
let mtime: Date | undefined

if (file.type === 'file' || file.type === 'directory') {
mode = file.unixfs.mode
mtime = file.unixfs.mtime != null ? new Date(Number(file.unixfs.mtime.secs * 1000n)) : undefined
}

return {
name: file.path,
mode,
mtime,
size: Number(file.size),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to squash the bigint from Exportable.size to a Number here?

Copy link
Member Author

@achingbrain achingbrain Feb 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have to convert it because it-tar expects the field to be a number.

We may lose some precision but Number.MAX_SAFE_INTEGER is 9PB so famous last words but I think files of that size may be uncommon.

type: file.type === 'directory' ? 'directory' : 'file'
}
}

function toTarImportCandidate (entry: UnixFSEntry): TarImportCandidate {
if (!EXPORTABLE.includes(entry.type)) {
throw new CodeError('Not a UnixFS node', 'ERR_NOT_UNIXFS')
}

Check warning on line 32 in packages/verified-fetch/src/utils/get-tar-stream.ts

View check run for this annotation

Codecov / codecov/patch

packages/verified-fetch/src/utils/get-tar-stream.ts#L31-L32

Added lines #L31 - L32 were not covered by tests

const candidate: TarImportCandidate = {
header: toHeader(entry)
}

if (entry.type === 'file' || entry.type === 'raw') {
candidate.body = entry.content()
}

return candidate
}

export async function * tarStream (ipfsPath: string, blockstore: Blockstore, options?: AbortOptions): AsyncGenerator<Uint8Array> {
const file = await exporter(ipfsPath, blockstore, options)

if (file.type === 'file' || file.type === 'raw') {
yield * pipe(
[toTarImportCandidate(file)],
pack()
)

return
}

if (file.type === 'directory') {
yield * pipe(
recursive(ipfsPath, blockstore, options),
(source) => map(source, (entry) => toTarImportCandidate(entry)),
pack()
)

return
}

throw new CodeError('Not a UnixFS node', 'ERR_NOT_UNIXFS')
}

Check warning on line 68 in packages/verified-fetch/src/utils/get-tar-stream.ts

View check run for this annotation

Codecov / codecov/patch

packages/verified-fetch/src/utils/get-tar-stream.ts#L66-L68

Added lines #L66 - L68 were not covered by tests
3 changes: 2 additions & 1 deletion packages/verified-fetch/src/utils/select-output-type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ const CID_TYPE_MAP: Record<number, string[]> = {
'application/octet-stream',
'application/vnd.ipld.raw',
'application/vnd.ipfs.ipns-record',
'application/vnd.ipld.car'
'application/vnd.ipld.car',
'application/x-tar'
]
}

Expand Down
14 changes: 11 additions & 3 deletions packages/verified-fetch/src/verified-fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import { getContentDispositionFilename } from './utils/get-content-disposition-filename.js'
import { getETag } from './utils/get-e-tag.js'
import { getStreamFromAsyncIterable } from './utils/get-stream-from-async-iterable.js'
import { tarStream } from './utils/get-tar-stream.js'
import { parseResource } from './utils/parse-resource.js'
import { notAcceptableResponse, notSupportedResponse, okResponse } from './utils/responses.js'
import { selectOutputType, queryFormatToAcceptHeader } from './utils/select-output-type.js'
Expand Down Expand Up @@ -151,11 +152,16 @@
* directory structure referenced by the `CID`.
*/
private async handleTar ({ cid, path, options }: FetchHandlerFunctionArg): Promise<Response> {
if (cid.code !== dagPbCode) {
return notAcceptableResponse('only dag-pb CIDs can be returned in TAR files')
if (cid.code !== dagPbCode && cid.code !== rawCode) {
return notAcceptableResponse('only UnixFS data can be returned in a TAR file')

Check warning on line 156 in packages/verified-fetch/src/verified-fetch.ts

View check run for this annotation

Codecov / codecov/patch

packages/verified-fetch/src/verified-fetch.ts#L156

Added line #L156 was not covered by tests
}

return notSupportedResponse('application/tar support is not implemented')
const stream = toBrowserReadableStream<Uint8Array>(tarStream(`/ipfs/${cid}/${path}`, this.helia.blockstore, options))

const response = okResponse(stream)
response.headers.set('content-type', 'application/x-tar')

return response
}

private async handleJson ({ cid, path, accept, options }: FetchHandlerFunctionArg): Promise<Response> {
Expand Down Expand Up @@ -397,6 +403,8 @@
} else if (accept === 'application/x-tar') {
// the user requested a TAR file
reqFormat = 'tar'
query.download = true
query.filename = query.filename ?? `${cid.toString()}.tar`
response = await this.handleTar({ cid, path, options })
} else {
// derive the handler from the CID type
Expand Down
149 changes: 149 additions & 0 deletions packages/verified-fetch/test/tar.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import { unixfs } from '@helia/unixfs'
import { stop } from '@libp2p/interface'
import { expect } from 'aegir/chai'
import browserReadableStreamToIt from 'browser-readablestream-to-it'
import all from 'it-all'
import last from 'it-last'
import { pipe } from 'it-pipe'
import { extract } from 'it-tar'
import toBuffer from 'it-to-buffer'
import { VerifiedFetch } from '../src/verified-fetch.js'
import { createHelia } from './fixtures/create-offline-helia.js'
import type { Helia } from '@helia/interface'
import type { FileCandidate } from 'ipfs-unixfs-importer'

describe('tar files', () => {
let helia: Helia
let verifiedFetch: VerifiedFetch

beforeEach(async () => {
helia = await createHelia()
verifiedFetch = new VerifiedFetch({
helia
})
})

afterEach(async () => {
await stop(helia, verifiedFetch)
})

it('should support fetching a TAR file', async () => {
const file = Uint8Array.from([0, 1, 2, 3, 4])
const fs = unixfs(helia)
const cid = await fs.addBytes(file)

const resp = await verifiedFetch.fetch(cid, {
headers: {
accept: 'application/x-tar'
}
})
expect(resp.status).to.equal(200)
expect(resp.headers.get('content-type')).to.equal('application/x-tar')
expect(resp.headers.get('content-disposition')).to.equal(`attachment; filename="${cid.toString()}.tar"`)

if (resp.body == null) {
throw new Error('Download failed')
}

Check warning on line 46 in packages/verified-fetch/test/tar.spec.ts

View check run for this annotation

Codecov / codecov/patch

packages/verified-fetch/test/tar.spec.ts#L45-L46

Added lines #L45 - L46 were not covered by tests

const entries = await pipe(
browserReadableStreamToIt(resp.body),
extract(),
async source => all(source)
)

expect(entries).to.have.lengthOf(1)
await expect(toBuffer(entries[0].body)).to.eventually.deep.equal(file)
})

it('should support fetching a TAR file containing a directory', async () => {
const directory: FileCandidate[] = [{
path: 'foo.txt',
content: Uint8Array.from([0, 1, 2, 3, 4])
}, {
path: 'bar.txt',
content: Uint8Array.from([5, 6, 7, 8, 9])
}, {
path: 'baz/qux.txt',
content: Uint8Array.from([1, 2, 3, 4, 5])
}]

const fs = unixfs(helia)
const importResult = await last(fs.addAll(directory, {
wrapWithDirectory: true
}))

if (importResult == null) {
throw new Error('Import failed')
}

Check warning on line 77 in packages/verified-fetch/test/tar.spec.ts

View check run for this annotation

Codecov / codecov/patch

packages/verified-fetch/test/tar.spec.ts#L76-L77

Added lines #L76 - L77 were not covered by tests

const resp = await verifiedFetch.fetch(importResult.cid, {
headers: {
accept: 'application/x-tar'
}
})
expect(resp.status).to.equal(200)
expect(resp.headers.get('content-type')).to.equal('application/x-tar')
expect(resp.headers.get('content-disposition')).to.equal(`attachment; filename="${importResult.cid.toString()}.tar"`)

if (resp.body == null) {
throw new Error('Download failed')
}

Check warning on line 90 in packages/verified-fetch/test/tar.spec.ts

View check run for this annotation

Codecov / codecov/patch

packages/verified-fetch/test/tar.spec.ts#L89-L90

Added lines #L89 - L90 were not covered by tests

const entries = await pipe(
browserReadableStreamToIt(resp.body),
extract(),
async source => all(source)
)

expect(entries).to.have.lengthOf(5)
expect(entries[0]).to.have.nested.property('header.name', importResult.cid.toString())

expect(entries[1]).to.have.nested.property('header.name', `${importResult.cid}/${directory[1].path}`)
await expect(toBuffer(entries[1].body)).to.eventually.deep.equal(directory[1].content)

expect(entries[2]).to.have.nested.property('header.name', `${importResult.cid}/${directory[2].path?.split('/')[0]}`)

expect(entries[3]).to.have.nested.property('header.name', `${importResult.cid}/${directory[2].path}`)
await expect(toBuffer(entries[3].body)).to.eventually.deep.equal(directory[2].content)

expect(entries[4]).to.have.nested.property('header.name', `${importResult.cid}/${directory[0].path}`)
await expect(toBuffer(entries[4].body)).to.eventually.deep.equal(directory[0].content)
})
Comment on lines +98 to +111
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the order here seems odd.. should ${importResult.cid}/${directory[0].path} be at entries[1] ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alex let me know it-tar handles ordering and it's stable, so it should be fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's some interesting discussion on tar file ordering here: https://unix.stackexchange.com/questions/120143/how-is-the-order-in-which-tar-works-on-files-determined

Either way, it's external to @helia/verified-fetch.


it('should support fetching a TAR file by format', async () => {
const file = Uint8Array.from([0, 1, 2, 3, 4])
const fs = unixfs(helia)
const cid = await fs.addBytes(file)

const resp = await verifiedFetch.fetch(`ipfs://${cid}?format=tar`)
expect(resp.status).to.equal(200)
expect(resp.headers.get('content-type')).to.equal('application/x-tar')
expect(resp.headers.get('content-disposition')).to.equal(`attachment; filename="${cid.toString()}.tar"`)
})

it('should support specifying a filename for a TAR file', async () => {
const file = Uint8Array.from([0, 1, 2, 3, 4])
const fs = unixfs(helia)
const cid = await fs.addBytes(file)

const resp = await verifiedFetch.fetch(`ipfs://${cid}?filename=foo.bar`, {
headers: {
accept: 'application/x-tar'
}
})
expect(resp.status).to.equal(200)
expect(resp.headers.get('content-type')).to.equal('application/x-tar')
expect(resp.headers.get('content-disposition')).to.equal('attachment; filename="foo.bar"')
})

it('should support fetching a TAR file by format with a filename', async () => {
const file = Uint8Array.from([0, 1, 2, 3, 4])
const fs = unixfs(helia)
const cid = await fs.addBytes(file)

const resp = await verifiedFetch.fetch(`ipfs://${cid}?format=tar&filename=foo.bar`)
expect(resp.status).to.equal(200)
expect(resp.headers.get('content-type')).to.equal('application/x-tar')
expect(resp.headers.get('content-disposition')).to.equal('attachment; filename="foo.bar"')
})
})
2 changes: 1 addition & 1 deletion packages/verified-fetch/test/verified-fetch.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ describe('@helia/verifed-fetch', () => {
})

const formatsAndAcceptHeaders = [
['tar', 'application/x-tar']
['ipns-record', 'application/vnd.ipfs.ipns-record']
]

for (const [format, acceptHeader] of formatsAndAcceptHeaders) {
Expand Down
Loading