diff --git a/src/commands/archive-setup/archive.setup.ts b/src/commands/archive-setup/archive.setup.ts index a9c5b965..396a7851 100644 --- a/src/commands/archive-setup/archive.setup.ts +++ b/src/commands/archive-setup/archive.setup.ts @@ -36,7 +36,7 @@ export const archiveSetup = command({ const archiveLocation = getArchiveLocation(args.path, archiveBucketName); const saveArchiveLocationTo = new URL('archive-location', args.output); - await fsa.write(saveArchiveLocationTo, protocolAwareString(archiveLocation)); + await fsa.write(saveArchiveLocationTo, protocolAwareString(archiveLocation, true)); logger.info( { duration: performance.now() - startTime, archiveLocation: protocolAwareString(archiveLocation) }, diff --git a/src/commands/copy/copy.ts b/src/commands/copy/copy.ts index 616801ce..9433f417 100644 --- a/src/commands/copy/copy.ts +++ b/src/commands/copy/copy.ts @@ -14,8 +14,8 @@ import { mergeStats } from './copy-helpers.ts'; import type { CopyContract, CopyStats } from './copy-rpc.ts'; const CopyValidator = z.object({ - source: z.string().transform((val) => protocolAwareString(fsa.toUrl(val))), - target: z.string().transform((val) => protocolAwareString(fsa.toUrl(val))), + source: z.string().transform((val) => protocolAwareString(fsa.toUrl(val), true)), + target: z.string().transform((val) => protocolAwareString(fsa.toUrl(val), true)), }); const CopyManifest = z.array(CopyValidator); diff --git a/src/commands/create-manifest/create-manifest.ts b/src/commands/create-manifest/create-manifest.ts index 29b91da3..50915be0 100644 --- a/src/commands/create-manifest/create-manifest.ts +++ b/src/commands/create-manifest/create-manifest.ts @@ -65,7 +65,7 @@ export const commandCreateManifest = command({ const targetLocation = new URL(`manifest-${targetHash}.json`, actionLocation); const targetAction: ActionCopy = { action: 'copy', parameters: { manifest: current } }; await fsa.write(targetLocation, JSON.stringify(targetAction)); - outputCopy.push(protocolAwareString(targetLocation)); + outputCopy.push(protocolAwareString(targetLocation, true)); } else { outputCopy.push(gzipSync(outBuf).toString('base64url')); } diff --git a/src/commands/list/list.ts b/src/commands/list/list.ts index 1190bc2f..68546df4 100644 --- a/src/commands/list/list.ts +++ b/src/commands/list/list.ts @@ -40,7 +40,7 @@ export const commandList = command({ } const listLocations = args.location.flat(); const outputFiles = await getFiles(listLocations, args); - const decodedFiles = outputFiles.map((outputFile) => outputFile.map((url) => protocolAwareString(url))); + const decodedFiles = outputFiles.map((outputFile) => outputFile.map((url) => protocolAwareString(url, true))); if (args.output) await fsa.write(args.output, JSON.stringify(decodedFiles)); }, }); diff --git a/src/commands/stac-catalog/__test__/stac.catalog.test.ts b/src/commands/stac-catalog/__test__/stac.catalog.test.ts index aff33d98..bf797564 100644 --- a/src/commands/stac-catalog/__test__/stac.catalog.test.ts +++ b/src/commands/stac-catalog/__test__/stac.catalog.test.ts @@ -2,9 +2,7 @@ import assert from 'node:assert'; import { beforeEach, describe, it } from 'node:test'; import { fsa, FsMemory } from '@chunkd/fs'; -import { pathToFileURL } from 'url'; -import { makeRelative } from '../../../utils/filelist.ts'; import { createLinks } from '../stac.catalog.ts'; describe('stacCatalog', () => { @@ -42,27 +40,3 @@ describe('stacCatalog', () => { ]); }); }); - -describe('makeRelative', () => { - it('should make relative urls', () => { - assert.equal( - makeRelative(fsa.toUrl('s3://linz-imagery/'), fsa.toUrl('s3://linz-imagery/catalog.json')), - './catalog.json', - ); - }); - - it('should make relative from absolute paths', () => { - assert.equal( - makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/blacha/catalog.json')), - './catalog.json', - ); - }); - - it('should make relative relative paths', () => { - assert.equal(makeRelative(pathToFileURL(process.cwd() + '/'), pathToFileURL('./catalog.json')), './catalog.json'); - }); - - it('should not make relative on different paths', () => { - assert.throws(() => makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/test/catalog.json')), Error); - }); -}); diff --git a/src/commands/tileindex-validate/tileindex.validate.ts b/src/commands/tileindex-validate/tileindex.validate.ts index c0d3a9d6..2e9ce4aa 100644 --- a/src/commands/tileindex-validate/tileindex.validate.ts +++ b/src/commands/tileindex-validate/tileindex.validate.ts @@ -347,7 +347,7 @@ async function getTiffsMetadata(tiffs: Tiff[], locations: URL[]): Promise 1) { logger.error({ gsds: [...gsds], roundedGsds: [...roundedGsds] }, 'TileIndex:InconsistentGSDs:Failed'); throw new Error( - `Inconsistent GSDs found: ${[...roundedGsds].join(', ')} ${[...gsds].join(',')}, ${locations.map(protocolAwareString).join(',')}`, + `Inconsistent GSDs found: ${[...roundedGsds].join(', ')} ${[...gsds].join(',')}, ${locations.map((loc) => protocolAwareString(loc)).join(',')}`, ); } else if (gsds.size > 1) { logger.info({ gsds: [...gsds], roundedGsds: [...roundedGsds] }, 'TileIndex:InconsistentGSDs:RoundedToMatch'); diff --git a/src/utils/__test__/filelist.test.ts b/src/utils/__test__/filelist.test.ts index c9c15cb7..5e5fb939 100644 --- a/src/utils/__test__/filelist.test.ts +++ b/src/utils/__test__/filelist.test.ts @@ -1,10 +1,13 @@ import assert from 'node:assert'; import { describe, it } from 'node:test'; +import { pathToFileURL } from 'node:url'; import { fsa } from '@chunkd/fs'; import type { TiffLocation } from '../../commands/tileindex-validate/tileindex.validate.ts'; import type { FileListEntryClass } from '../filelist.ts'; +import { encodePercentSigns } from '../filelist.ts'; +import { makeRelative } from '../filelist.ts'; import { createFileList, protocolAwareString } from '../filelist.ts'; describe('createFileList', () => { @@ -65,6 +68,12 @@ describe('URL handling with special characters', () => { expectedTransformed: 's3://linz-topographic-upload/landpro/Gisborne_2023/Non_Priority_3/VECTOR/EP%23462_Gisborne_LOT_15-16-17.dgn', }, + { + original: + 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%_C.I_4m_39-110m.sd', + expectedTransformed: + 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%25_C.I_4m_39-110m.sd', + }, { original: 's3://linz-topographic-archive/landpro/Gisborne_2023/Non_Priority_3/VECTOR/EP#462_Gisborne_LOT_15-16-17.dgn', @@ -100,7 +109,7 @@ describe('URL handling with special characters', () => { for (const testCase of testCases) { // This is what happens in the CopyValidator const url1 = fsa.toUrl(testCase.original); - const transformedPath = protocolAwareString(url1); + const transformedPath = protocolAwareString(url1, true); // This is what happens in the copy worker const url2 = fsa.toUrl(transformedPath); @@ -121,3 +130,70 @@ describe('URL handling with special characters', () => { } }); }); + +describe('makeRelative', () => { + it('should make relative urls', () => { + assert.equal( + makeRelative(fsa.toUrl('s3://linz-imagery/'), fsa.toUrl('s3://linz-imagery/catalog.json')), + './catalog.json', + ); + }); + + it('should make relative from absolute paths', () => { + assert.equal( + makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/blacha/catalog.json')), + './catalog.json', + ); + }); + + it('should make relative relative paths', () => { + assert.equal(makeRelative(pathToFileURL(process.cwd() + '/'), pathToFileURL('./catalog.json')), './catalog.json'); + }); + + it('should not make relative on different paths', () => { + assert.throws(() => makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/test/catalog.json')), Error); + }); + + it('should handle URLs with spaces', () => { + const base = new URL('s3://bucket/path/'); + const fileWithSpace = new URL('s3://bucket/path/with%20space/file.txt'); + assert.equal(makeRelative(base, fileWithSpace), './with space/file.txt'); + }); + + it('should handle URLs with special characters', () => { + const base = new URL('memory://bucket/path/'); + const fileWithHash = new URL('memory://bucket/path/file%23hash.txt'); + const fileWithBracket = new URL('memory://bucket/path/file[bracket].txt'); + assert.equal(makeRelative(base, fileWithHash), './file#hash.txt'); + assert.equal(makeRelative(base, fileWithBracket), './file[bracket].txt'); + }); + + it('should handle file with percent sign (not followed by two hex digits)', () => { + const base = new URL('s3://bucket/path/'); + const tricky = new URL('s3://bucket/path/95%_C.I_4m_40-110m.sd'); + assert.equal(makeRelative(base, tricky), './95%_C.I_4m_40-110m.sd'); + }); + + it('should throw if strict and not a subfolder', () => { + const base = new URL('s3://bucket/path/'); + const outside = new URL('s3://bucket/other/file.txt'); + assert.throws(() => makeRelative(base, outside, true)); + }); +}); + +describe('encodePercentSigns', () => { + it('should encode lone percent signs', () => { + assert.equal(encodePercentSigns('HS72_M_95%_C.I_4m_39-110m.sd'), 'HS72_M_95%25_C.I_4m_39-110m.sd'); + assert.equal(encodePercentSigns('%foo%'), '%25foo%25'); + }); + + it('should not encode percent signs followed by two hex digits', () => { + assert.equal(encodePercentSigns('foo%20bar'), 'foo%20bar'); + assert.equal(encodePercentSigns('%41%42%43'), '%41%42%43'); + }); + + it('should encode percent signs not followed by two hex digits', () => { + assert.equal(encodePercentSigns('foo%2Gbar'), 'foo%252Gbar'); + assert.equal(encodePercentSigns('foo%'), 'foo%25'); + }); +}); diff --git a/src/utils/filelist.ts b/src/utils/filelist.ts index 23556832..5f7dafb4 100644 --- a/src/utils/filelist.ts +++ b/src/utils/filelist.ts @@ -10,23 +10,31 @@ export const HttpProtocols = ['https:', 'http:']; * For http(s):// URLs it returns the full URL (with encoded characters) * For other protocols it decodes the characters and * for file:// URL it creates a relative path from the current working directory - * For non-HTTP URLs, # characters are kept encoded as %23 to prevent - * them from being interpreted as URL fragments. + * For non-HTTP URLs, if encodeSpecialChars is true, # characters and % signs are kept encoded as %23 and %25 respectively to prevent + * them from being interpreted as URL fragments or special characters. * * @param targetLocation URL to convert to string + * @param encodeSpecialChars whether to encode special characters (#, %) for non-HTTP URLs * @returns string representation of the URL */ -export function protocolAwareString(targetLocation: URL): string { +export function protocolAwareString(targetLocation: URL, encodeSpecialChars = false): string { if (HttpProtocols.includes(targetLocation.protocol)) { return targetLocation.href; } if (targetLocation.protocol === 'file:') { return fileURLToPath(targetLocation); } - - // Decode URI components but keep # characters encoded to prevent - // them from being interpreted as URL fragments - return decodeURIComponent(targetLocation.href).replace(/#/g, '%23'); + if (!encodeSpecialChars) { + return decodeURIComponent(targetLocation.href); + } + // Encode % signs to avoid decodeURIComponent errors + const targetLocationWithEncodedPercents = encodePercentSigns(targetLocation.href); + // Decode URI components + const targetLocationDecodedURI = decodeURIComponent(targetLocationWithEncodedPercents); + // Encode % signs again to avoid issues using the decoded URI with fsa + // Encode # characters to prevent them from being interpreted as URL fragments + const targetLocationWithEncodedHashesAndPercents = encodePercentSigns(targetLocationDecodedURI).replace(/#/g, '%23'); + return targetLocationWithEncodedHashesAndPercents; } /** @@ -65,7 +73,19 @@ export function makeRelative(baseLocation: URL, fileLocation: URL, strict = true if (HttpProtocols.includes(fileLocation.protocol)) { return fileLocation.href.replace(baseLocationFolder.href, './'); } - return decodeURIComponent(fileLocation.href.replace(baseLocationFolder.href, './')); + + const fileLocationWithEncodedPercents = encodePercentSigns(fileLocation.href); + return decodeURIComponent(fileLocationWithEncodedPercents.replace(baseLocationFolder.href, './')); +} + +/** + * Encode percent signs (%) not followed by two hex digits as %25 + * + * @param input string to encode + * @returns encoded string + */ +export function encodePercentSigns(input: string): string { + return input.replace(/%(?![0-9A-Fa-f]{2})/g, '%25'); } export interface FileListEntry {