From 7f1fd40ffdb10313b2ae0b7ec0e4d8e2197ba70f Mon Sep 17 00:00:00 2001 From: Andrew Polk Date: Tue, 2 Apr 2024 08:56:11 -0700 Subject: [PATCH] fix: fallback to getting image file extension from url --- src/MakeImagePersistencePlan.ts | 61 ++++++++++++++++------------ src/makeImagePersistencePlan.spec.ts | 12 ++++++ 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/MakeImagePersistencePlan.ts b/src/MakeImagePersistencePlan.ts index 00cae39..0edde9b 100644 --- a/src/MakeImagePersistencePlan.ts +++ b/src/MakeImagePersistencePlan.ts @@ -8,44 +8,51 @@ export function makeImagePersistencePlan( imageOutputRootPath: string, imagePrefix: string ): void { - if (imageSet.fileType?.ext) { - // Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url - // Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example: - // https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject - // But around Sept 2023, they changed the url to be something like: - // https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject - // The thing we want is the last UUID before the ? + const urlBeforeQuery = imageSet.primaryUrl.split("?")[0]; - const urlBeforeQuery = imageSet.primaryUrl.split("?")[0]; - const thingToHash = findLastUuid(urlBeforeQuery) ?? urlBeforeQuery; + let imageFileExtension: string | undefined = imageSet.fileType?.ext; + if (!imageFileExtension) { + // Try to get the extension from the url + imageFileExtension = urlBeforeQuery.split(".").pop(); - const hash = hashOfString(thingToHash); - imageSet.outputFileName = `${hash}.${imageSet.fileType.ext}`; - - imageSet.primaryFileOutputPath = Path.posix.join( - imageOutputRootPath?.length > 0 - ? imageOutputRootPath - : imageSet.pathToParentDocument!, - imageSet.outputFileName - ); - - if (imageOutputRootPath && imageSet.localizedUrls.length) { + if (!imageFileExtension) { error( - "imageOutputPath was declared, but one or more localizedUrls were found too. If you are going to localize screenshots, then you can't declare an imageOutputPath." + `Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}` ); exit(1); } + } + + // Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url + // Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example: + // https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject + // But around Sept 2023, they changed the url to be something like: + // https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject + // The thing we want is the last UUID before the ? - imageSet.filePathToUseInMarkdown = - (imagePrefix?.length > 0 ? imagePrefix : ".") + - "/" + - imageSet.outputFileName; - } else { + const thingToHash = findLastUuid(urlBeforeQuery) ?? urlBeforeQuery; + + const hash = hashOfString(thingToHash); + imageSet.outputFileName = `${hash}.${imageFileExtension}`; + + imageSet.primaryFileOutputPath = Path.posix.join( + imageOutputRootPath?.length > 0 + ? imageOutputRootPath + : imageSet.pathToParentDocument!, + imageSet.outputFileName + ); + + if (imageOutputRootPath && imageSet.localizedUrls.length) { error( - `Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}` + "imageOutputPath was declared, but one or more localizedUrls were found too. If you are going to localize screenshots, then you can't declare an imageOutputPath." ); exit(1); } + + imageSet.filePathToUseInMarkdown = + (imagePrefix?.length > 0 ? imagePrefix : ".") + + "/" + + imageSet.outputFileName; } function findLastUuid(url: string): string | null { diff --git a/src/makeImagePersistencePlan.spec.ts b/src/makeImagePersistencePlan.spec.ts index b585865..7333580 100644 --- a/src/makeImagePersistencePlan.spec.ts +++ b/src/makeImagePersistencePlan.spec.ts @@ -42,6 +42,18 @@ test("primary file with defaults for image output path and prefix", () => { ); expect(imageSet.filePathToUseInMarkdown).toBe(`./${expectedHash}.png`); }); +test("falls back to getting file extension from url if not in fileType", () => { + const imageSet: ImageSet = { + primaryUrl: "https://s3.us-west-2.amazonaws.com/primaryImage.png", + localizedUrls: [], + pathToParentDocument: "/pathToParentSomewhere/", + }; + makeImagePersistencePlan(imageSet, "", ""); + const expectedHash = hashOfString( + "https://s3.us-west-2.amazonaws.com/primaryImage.png" + ); + expect(imageSet.outputFileName).toBe(`${expectedHash}.png`); +}); test("properly extract UUID from old-style notion image url", () => { const imageSet: ImageSet = {