From 26a3dd382f45058309cf95538f1a33474e280e3e Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Fri, 9 Jan 2026 12:16:29 +1300 Subject: [PATCH 1/6] fix: percent sign was not properly encoded TDE-1767 --- .../__test__/stac.catalog.test.ts | 26 ---------- src/utils/__test__/filelist.test.ts | 52 +++++++++++++++++++ src/utils/filelist.ts | 5 +- 3 files changed, 56 insertions(+), 27 deletions(-) diff --git a/src/commands/stac-catalog/__test__/stac.catalog.test.ts b/src/commands/stac-catalog/__test__/stac.catalog.test.ts index aff33d98..bf797564 100644 --- a/src/commands/stac-catalog/__test__/stac.catalog.test.ts +++ b/src/commands/stac-catalog/__test__/stac.catalog.test.ts @@ -2,9 +2,7 @@ import assert from 'node:assert'; import { beforeEach, describe, it } from 'node:test'; import { fsa, FsMemory } from '@chunkd/fs'; -import { pathToFileURL } from 'url'; -import { makeRelative } from '../../../utils/filelist.ts'; import { createLinks } from '../stac.catalog.ts'; describe('stacCatalog', () => { @@ -42,27 +40,3 @@ describe('stacCatalog', () => { ]); }); }); - -describe('makeRelative', () => { - it('should make relative urls', () => { - assert.equal( - makeRelative(fsa.toUrl('s3://linz-imagery/'), fsa.toUrl('s3://linz-imagery/catalog.json')), - './catalog.json', - ); - }); - - it('should make relative from absolute paths', () => { - assert.equal( - makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/blacha/catalog.json')), - './catalog.json', - ); - }); - - it('should make relative relative paths', () => { - assert.equal(makeRelative(pathToFileURL(process.cwd() + '/'), pathToFileURL('./catalog.json')), './catalog.json'); - }); - - it('should not make relative on different paths', () => { - assert.throws(() => makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/test/catalog.json')), Error); - }); -}); diff --git a/src/utils/__test__/filelist.test.ts b/src/utils/__test__/filelist.test.ts index c9c15cb7..71715801 100644 --- a/src/utils/__test__/filelist.test.ts +++ b/src/utils/__test__/filelist.test.ts @@ -1,10 +1,12 @@ import assert from 'node:assert'; import { describe, it } from 'node:test'; +import { pathToFileURL } from 'node:url'; import { fsa } from '@chunkd/fs'; import type { TiffLocation } from '../../commands/tileindex-validate/tileindex.validate.ts'; import type { FileListEntryClass } from '../filelist.ts'; +import { makeRelative } from '../filelist.ts'; import { createFileList, protocolAwareString } from '../filelist.ts'; describe('createFileList', () => { @@ -121,3 +123,53 @@ describe('URL handling with special characters', () => { } }); }); + +describe('makeRelative', () => { + it('should make relative urls', () => { + assert.equal( + makeRelative(fsa.toUrl('s3://linz-imagery/'), fsa.toUrl('s3://linz-imagery/catalog.json')), + './catalog.json', + ); + }); + + it('should make relative from absolute paths', () => { + assert.equal( + makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/blacha/catalog.json')), + './catalog.json', + ); + }); + + it('should make relative relative paths', () => { + assert.equal(makeRelative(pathToFileURL(process.cwd() + '/'), pathToFileURL('./catalog.json')), './catalog.json'); + }); + + it('should not make relative on different paths', () => { + assert.throws(() => makeRelative(pathToFileURL('/home/blacha/'), pathToFileURL('/home/test/catalog.json')), Error); + }); + + it('should handle URLs with spaces', () => { + const base = new URL('s3://bucket/path/'); + const fileWithSpace = new URL('s3://bucket/path/with%20space/file.txt'); + assert.equal(makeRelative(base, fileWithSpace), './with space/file.txt'); + }); + + it('should handle URLs with special characters', () => { + const base = new URL('memory://bucket/path/'); + const fileWithHash = new URL('memory://bucket/path/file%23hash.txt'); + const fileWithBracket = new URL('memory://bucket/path/file[bracket].txt'); + assert.equal(makeRelative(base, fileWithHash), './file#hash.txt'); + assert.equal(makeRelative(base, fileWithBracket), './file[bracket].txt'); + }); + + it('should handle file with percent sign (not followed by two hex digits)', () => { + const base = new URL('s3://bucket/path/'); + const tricky = new URL('s3://bucket/path/95%_C.I_4m_40-110m.sd'); + assert.equal(makeRelative(base, tricky), './95%_C.I_4m_40-110m.sd'); + }); + + it('should throw if strict and not a subfolder', () => { + const base = new URL('s3://bucket/path/'); + const outside = new URL('s3://bucket/other/file.txt'); + assert.throws(() => makeRelative(base, outside, true)); + }); +}); diff --git a/src/utils/filelist.ts b/src/utils/filelist.ts index 23556832..336c30ab 100644 --- a/src/utils/filelist.ts +++ b/src/utils/filelist.ts @@ -43,6 +43,7 @@ export function protocolAwareString(targetLocation: URL): string { * @returns relative path to file */ export function makeRelative(baseLocation: URL, fileLocation: URL, strict = true): string { + console.log({ baseLocation: baseLocation.href, fileLocation: fileLocation.href, strict }); const baseLocationFolder = new URL('./', baseLocation); // Ensure baseLocation ends with "/" (cuts off anything after the final "/", i.e. a file name) // If the fileLocation starts with baseLocationFolder, we can return the relative path of fileLocation if (strict && !fileLocation.href.startsWith(baseLocationFolder.href)) { @@ -65,7 +66,9 @@ export function makeRelative(baseLocation: URL, fileLocation: URL, strict = true if (HttpProtocols.includes(fileLocation.protocol)) { return fileLocation.href.replace(baseLocationFolder.href, './'); } - return decodeURIComponent(fileLocation.href.replace(baseLocationFolder.href, './')); + // Before decoding, we need to ensure that any percent signs (%) not followed by two hex digits are encoded as %25 + const encodedSignsFileLocation = fileLocation.href.replace(/%(?![0-9A-Fa-f]{2})/g, '%25'); + return decodeURIComponent(encodedSignsFileLocation.replace(baseLocationFolder.href, './')); } export interface FileListEntry { From afca21993ef7efeab3cf1b75ca3630eb59127ed6 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Fri, 9 Jan 2026 12:26:40 +1300 Subject: [PATCH 2/6] fix: remove debug --- src/utils/filelist.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/filelist.ts b/src/utils/filelist.ts index 336c30ab..673ab30a 100644 --- a/src/utils/filelist.ts +++ b/src/utils/filelist.ts @@ -43,7 +43,6 @@ export function protocolAwareString(targetLocation: URL): string { * @returns relative path to file */ export function makeRelative(baseLocation: URL, fileLocation: URL, strict = true): string { - console.log({ baseLocation: baseLocation.href, fileLocation: fileLocation.href, strict }); const baseLocationFolder = new URL('./', baseLocation); // Ensure baseLocation ends with "/" (cuts off anything after the final "/", i.e. a file name) // If the fileLocation starts with baseLocationFolder, we can return the relative path of fileLocation if (strict && !fileLocation.href.startsWith(baseLocationFolder.href)) { From b3f381f603e50c6267cd6534aeed7a142a9c102f Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Fri, 9 Jan 2026 14:44:55 +1300 Subject: [PATCH 3/6] fix: `protocolAwareString` needs to encode percent signs --- src/utils/__test__/filelist.test.ts | 24 ++++++++++++++++++++++++ src/utils/filelist.ts | 19 +++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/src/utils/__test__/filelist.test.ts b/src/utils/__test__/filelist.test.ts index 71715801..aae1675c 100644 --- a/src/utils/__test__/filelist.test.ts +++ b/src/utils/__test__/filelist.test.ts @@ -6,6 +6,7 @@ import { fsa } from '@chunkd/fs'; import type { TiffLocation } from '../../commands/tileindex-validate/tileindex.validate.ts'; import type { FileListEntryClass } from '../filelist.ts'; +import { encodePercentSigns } from '../filelist.ts'; import { makeRelative } from '../filelist.ts'; import { createFileList, protocolAwareString } from '../filelist.ts'; @@ -67,6 +68,12 @@ describe('URL handling with special characters', () => { expectedTransformed: 's3://linz-topographic-upload/landpro/Gisborne_2023/Non_Priority_3/VECTOR/EP%23462_Gisborne_LOT_15-16-17.dgn', }, + { + original: + 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%_C.I_4m_39-110m.sd', + expectedTransformed: + 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%_C.I_4m_39-110m.sd', + }, { original: 's3://linz-topographic-archive/landpro/Gisborne_2023/Non_Priority_3/VECTOR/EP#462_Gisborne_LOT_15-16-17.dgn', @@ -173,3 +180,20 @@ describe('makeRelative', () => { assert.throws(() => makeRelative(base, outside, true)); }); }); + +describe('encodePercentSigns', () => { + it('should encode lone percent signs', () => { + assert.equal(encodePercentSigns('HS72_M_95%_C.I_4m_39-110m.sd'), 'HS72_M_95%25_C.I_4m_39-110m.sd'); + assert.equal(encodePercentSigns('%foo%'), '%25foo%25'); + }); + + it('should not encode percent signs followed by two hex digits', () => { + assert.equal(encodePercentSigns('foo%20bar'), 'foo%20bar'); + assert.equal(encodePercentSigns('%41%42%43'), '%41%42%43'); + }); + + it('should encode percent signs not followed by two hex digits', () => { + assert.equal(encodePercentSigns('foo%2Gbar'), 'foo%252Gbar'); + assert.equal(encodePercentSigns('foo%'), 'foo%25'); + }); +}); diff --git a/src/utils/filelist.ts b/src/utils/filelist.ts index 673ab30a..4a00384b 100644 --- a/src/utils/filelist.ts +++ b/src/utils/filelist.ts @@ -24,9 +24,10 @@ export function protocolAwareString(targetLocation: URL): string { return fileURLToPath(targetLocation); } + const targetLocationWithEncodedPercents = encodePercentSigns(targetLocation.href); // Decode URI components but keep # characters encoded to prevent // them from being interpreted as URL fragments - return decodeURIComponent(targetLocation.href).replace(/#/g, '%23'); + return decodeURIComponent(targetLocationWithEncodedPercents).replace(/#/g, '%23'); } /** @@ -65,9 +66,19 @@ export function makeRelative(baseLocation: URL, fileLocation: URL, strict = true if (HttpProtocols.includes(fileLocation.protocol)) { return fileLocation.href.replace(baseLocationFolder.href, './'); } - // Before decoding, we need to ensure that any percent signs (%) not followed by two hex digits are encoded as %25 - const encodedSignsFileLocation = fileLocation.href.replace(/%(?![0-9A-Fa-f]{2})/g, '%25'); - return decodeURIComponent(encodedSignsFileLocation.replace(baseLocationFolder.href, './')); + + const fileLocationWithEncodedPercents = encodePercentSigns(fileLocation.href); + return decodeURIComponent(fileLocationWithEncodedPercents.replace(baseLocationFolder.href, './')); +} + +/** + * Encode percent signs (%) not followed by two hex digits as %25 + * + * @param input string to encode + * @returns encoded string + */ +export function encodePercentSigns(input: string): string { + return input.replace(/%(?![0-9A-Fa-f]{2})/g, '%25'); } export interface FileListEntry { From 05d975d6ecc28a234c03e5118e2c9ac97c004fb0 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Mon, 12 Jan 2026 15:01:12 +1300 Subject: [PATCH 4/6] fix: re-encode % to avoid issue using --- src/utils/__test__/filelist.test.ts | 2 +- src/utils/filelist.ts | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/utils/__test__/filelist.test.ts b/src/utils/__test__/filelist.test.ts index aae1675c..63bc412f 100644 --- a/src/utils/__test__/filelist.test.ts +++ b/src/utils/__test__/filelist.test.ts @@ -72,7 +72,7 @@ describe('URL handling with special characters', () => { original: 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%_C.I_4m_39-110m.sd', expectedTransformed: - 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%_C.I_4m_39-110m.sd', + 's3://linz-hydrographic-archive/Authoritative_Surveys/HS72_Taranaki/Processed_Data/3_Processed/1_GSF_PROJECTS/HS72_Block M_GSF Project/SD/HS72_M_95%25_C.I_4m_39-110m.sd', }, { original: diff --git a/src/utils/filelist.ts b/src/utils/filelist.ts index 4a00384b..531432a6 100644 --- a/src/utils/filelist.ts +++ b/src/utils/filelist.ts @@ -24,10 +24,14 @@ export function protocolAwareString(targetLocation: URL): string { return fileURLToPath(targetLocation); } + // Encode % signs to avoid decodeURIComponent errors const targetLocationWithEncodedPercents = encodePercentSigns(targetLocation.href); - // Decode URI components but keep # characters encoded to prevent - // them from being interpreted as URL fragments - return decodeURIComponent(targetLocationWithEncodedPercents).replace(/#/g, '%23'); + // Decode URI components + const targetLocationDecodedURI = decodeURIComponent(targetLocationWithEncodedPercents); + // Encode % signs again to avoid issues using the decoded URI with fsa + // Encode # characters to prevent them from being interpreted as URL fragments + const targetLocationWithEncodedHashesAndPercents = encodePercentSigns(targetLocationDecodedURI).replace(/#/g, '%23'); + return targetLocationWithEncodedHashesAndPercents; } /** From 427e9ab07bb9dcb423fddafa4c4ea76f87a2d225 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Tue, 13 Jan 2026 09:31:14 +1300 Subject: [PATCH 5/6] refactor: do not output encoded URIs for logging --- src/commands/archive-setup/archive.setup.ts | 2 +- src/commands/copy/copy.ts | 4 ++-- src/commands/create-manifest/create-manifest.ts | 2 +- src/commands/list/list.ts | 2 +- src/utils/__test__/filelist.test.ts | 2 +- src/utils/filelist.ts | 11 +++++++---- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/commands/archive-setup/archive.setup.ts b/src/commands/archive-setup/archive.setup.ts index a9c5b965..396a7851 100644 --- a/src/commands/archive-setup/archive.setup.ts +++ b/src/commands/archive-setup/archive.setup.ts @@ -36,7 +36,7 @@ export const archiveSetup = command({ const archiveLocation = getArchiveLocation(args.path, archiveBucketName); const saveArchiveLocationTo = new URL('archive-location', args.output); - await fsa.write(saveArchiveLocationTo, protocolAwareString(archiveLocation)); + await fsa.write(saveArchiveLocationTo, protocolAwareString(archiveLocation, true)); logger.info( { duration: performance.now() - startTime, archiveLocation: protocolAwareString(archiveLocation) }, diff --git a/src/commands/copy/copy.ts b/src/commands/copy/copy.ts index 616801ce..9433f417 100644 --- a/src/commands/copy/copy.ts +++ b/src/commands/copy/copy.ts @@ -14,8 +14,8 @@ import { mergeStats } from './copy-helpers.ts'; import type { CopyContract, CopyStats } from './copy-rpc.ts'; const CopyValidator = z.object({ - source: z.string().transform((val) => protocolAwareString(fsa.toUrl(val))), - target: z.string().transform((val) => protocolAwareString(fsa.toUrl(val))), + source: z.string().transform((val) => protocolAwareString(fsa.toUrl(val), true)), + target: z.string().transform((val) => protocolAwareString(fsa.toUrl(val), true)), }); const CopyManifest = z.array(CopyValidator); diff --git a/src/commands/create-manifest/create-manifest.ts b/src/commands/create-manifest/create-manifest.ts index 29b91da3..50915be0 100644 --- a/src/commands/create-manifest/create-manifest.ts +++ b/src/commands/create-manifest/create-manifest.ts @@ -65,7 +65,7 @@ export const commandCreateManifest = command({ const targetLocation = new URL(`manifest-${targetHash}.json`, actionLocation); const targetAction: ActionCopy = { action: 'copy', parameters: { manifest: current } }; await fsa.write(targetLocation, JSON.stringify(targetAction)); - outputCopy.push(protocolAwareString(targetLocation)); + outputCopy.push(protocolAwareString(targetLocation, true)); } else { outputCopy.push(gzipSync(outBuf).toString('base64url')); } diff --git a/src/commands/list/list.ts b/src/commands/list/list.ts index 1190bc2f..68546df4 100644 --- a/src/commands/list/list.ts +++ b/src/commands/list/list.ts @@ -40,7 +40,7 @@ export const commandList = command({ } const listLocations = args.location.flat(); const outputFiles = await getFiles(listLocations, args); - const decodedFiles = outputFiles.map((outputFile) => outputFile.map((url) => protocolAwareString(url))); + const decodedFiles = outputFiles.map((outputFile) => outputFile.map((url) => protocolAwareString(url, true))); if (args.output) await fsa.write(args.output, JSON.stringify(decodedFiles)); }, }); diff --git a/src/utils/__test__/filelist.test.ts b/src/utils/__test__/filelist.test.ts index 63bc412f..5e5fb939 100644 --- a/src/utils/__test__/filelist.test.ts +++ b/src/utils/__test__/filelist.test.ts @@ -109,7 +109,7 @@ describe('URL handling with special characters', () => { for (const testCase of testCases) { // This is what happens in the CopyValidator const url1 = fsa.toUrl(testCase.original); - const transformedPath = protocolAwareString(url1); + const transformedPath = protocolAwareString(url1, true); // This is what happens in the copy worker const url2 = fsa.toUrl(transformedPath); diff --git a/src/utils/filelist.ts b/src/utils/filelist.ts index 531432a6..5f7dafb4 100644 --- a/src/utils/filelist.ts +++ b/src/utils/filelist.ts @@ -10,20 +10,23 @@ export const HttpProtocols = ['https:', 'http:']; * For http(s):// URLs it returns the full URL (with encoded characters) * For other protocols it decodes the characters and * for file:// URL it creates a relative path from the current working directory - * For non-HTTP URLs, # characters are kept encoded as %23 to prevent - * them from being interpreted as URL fragments. + * For non-HTTP URLs, if encodeSpecialChars is true, # characters and % signs are kept encoded as %23 and %25 respectively to prevent + * them from being interpreted as URL fragments or special characters. * * @param targetLocation URL to convert to string + * @param encodeSpecialChars whether to encode special characters (#, %) for non-HTTP URLs * @returns string representation of the URL */ -export function protocolAwareString(targetLocation: URL): string { +export function protocolAwareString(targetLocation: URL, encodeSpecialChars = false): string { if (HttpProtocols.includes(targetLocation.protocol)) { return targetLocation.href; } if (targetLocation.protocol === 'file:') { return fileURLToPath(targetLocation); } - + if (!encodeSpecialChars) { + return decodeURIComponent(targetLocation.href); + } // Encode % signs to avoid decodeURIComponent errors const targetLocationWithEncodedPercents = encodePercentSigns(targetLocation.href); // Decode URI components From 36b73698434143073be8407d279da738833c3a70 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Tue, 13 Jan 2026 09:52:00 +1300 Subject: [PATCH 6/6] fix: protocolAwareString parameters --- src/commands/tileindex-validate/tileindex.validate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/tileindex-validate/tileindex.validate.ts b/src/commands/tileindex-validate/tileindex.validate.ts index c0d3a9d6..2e9ce4aa 100644 --- a/src/commands/tileindex-validate/tileindex.validate.ts +++ b/src/commands/tileindex-validate/tileindex.validate.ts @@ -347,7 +347,7 @@ async function getTiffsMetadata(tiffs: Tiff[], locations: URL[]): Promise 1) { logger.error({ gsds: [...gsds], roundedGsds: [...roundedGsds] }, 'TileIndex:InconsistentGSDs:Failed'); throw new Error( - `Inconsistent GSDs found: ${[...roundedGsds].join(', ')} ${[...gsds].join(',')}, ${locations.map(protocolAwareString).join(',')}`, + `Inconsistent GSDs found: ${[...roundedGsds].join(', ')} ${[...gsds].join(',')}, ${locations.map((loc) => protocolAwareString(loc)).join(',')}`, ); } else if (gsds.size > 1) { logger.info({ gsds: [...gsds], roundedGsds: [...roundedGsds] }, 'TileIndex:InconsistentGSDs:RoundedToMatch');