Skip to content

Commit

Permalink
fix: fallback to getting image file extension from url
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-polk committed Apr 2, 2024
1 parent a73e040 commit 7f1fd40
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 27 deletions.
61 changes: 34 additions & 27 deletions src/MakeImagePersistencePlan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,51 @@ export function makeImagePersistencePlan(
imageOutputRootPath: string,
imagePrefix: string
): void {
if (imageSet.fileType?.ext) {
// Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url
// Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example:
// https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject
// But around Sept 2023, they changed the url to be something like:
// https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject
// The thing we want is the last UUID before the ?
const urlBeforeQuery = imageSet.primaryUrl.split("?")[0];

const urlBeforeQuery = imageSet.primaryUrl.split("?")[0];
const thingToHash = findLastUuid(urlBeforeQuery) ?? urlBeforeQuery;
let imageFileExtension: string | undefined = imageSet.fileType?.ext;
if (!imageFileExtension) {
// Try to get the extension from the url
imageFileExtension = urlBeforeQuery.split(".").pop();

const hash = hashOfString(thingToHash);
imageSet.outputFileName = `${hash}.${imageSet.fileType.ext}`;

imageSet.primaryFileOutputPath = Path.posix.join(
imageOutputRootPath?.length > 0
? imageOutputRootPath
: imageSet.pathToParentDocument!,
imageSet.outputFileName
);

if (imageOutputRootPath && imageSet.localizedUrls.length) {
if (!imageFileExtension) {
error(
"imageOutputPath was declared, but one or more localizedUrls were found too. If you are going to localize screenshots, then you can't declare an imageOutputPath."
`Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}`
);
exit(1);
}
}

// Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url
// Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example:
// https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject
// But around Sept 2023, they changed the url to be something like:
// https://prod-files-secure.s3.us-west-2.amazonaws.com/d9a2b712-cf69-4bd6-9d65-87a4ceeacca2/d1bcdc8c-b065-4e40-9a11-392aabeb220e/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20230915%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230915T161258Z&X-Amz-Expires=3600&X-Amz-Signature=28fca48e65fba86d539c3c4b7676fce1fa0857aa194f7b33dd4a468ecca6ab24&X-Amz-SignedHeaders=host&x-id=GetObject
// The thing we want is the last UUID before the ?

imageSet.filePathToUseInMarkdown =
(imagePrefix?.length > 0 ? imagePrefix : ".") +
"/" +
imageSet.outputFileName;
} else {
const thingToHash = findLastUuid(urlBeforeQuery) ?? urlBeforeQuery;

const hash = hashOfString(thingToHash);
imageSet.outputFileName = `${hash}.${imageFileExtension}`;

imageSet.primaryFileOutputPath = Path.posix.join(
imageOutputRootPath?.length > 0
? imageOutputRootPath
: imageSet.pathToParentDocument!,
imageSet.outputFileName
);

if (imageOutputRootPath && imageSet.localizedUrls.length) {
error(
`Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}`
"imageOutputPath was declared, but one or more localizedUrls were found too. If you are going to localize screenshots, then you can't declare an imageOutputPath."
);
exit(1);
}

imageSet.filePathToUseInMarkdown =
(imagePrefix?.length > 0 ? imagePrefix : ".") +
"/" +
imageSet.outputFileName;
}

function findLastUuid(url: string): string | null {
Expand Down
12 changes: 12 additions & 0 deletions src/makeImagePersistencePlan.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ test("primary file with defaults for image output path and prefix", () => {
);
expect(imageSet.filePathToUseInMarkdown).toBe(`./${expectedHash}.png`);
});
test("falls back to getting file extension from url if not in fileType", () => {
const imageSet: ImageSet = {
primaryUrl: "https://s3.us-west-2.amazonaws.com/primaryImage.png",
localizedUrls: [],
pathToParentDocument: "/pathToParentSomewhere/",
};
makeImagePersistencePlan(imageSet, "", "");
const expectedHash = hashOfString(
"https://s3.us-west-2.amazonaws.com/primaryImage.png"
);
expect(imageSet.outputFileName).toBe(`${expectedHash}.png`);
});

test("properly extract UUID from old-style notion image url", () => {
const imageSet: ImageSet = {
Expand Down

0 comments on commit 7f1fd40

Please sign in to comment.