Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install poetry==2.1.3
poetry install --with test -E pg && poetry run pytest papermerge/
poetry install --with test -E cloud -E pg && poetry run pytest papermerge/
env:
PAPERMERGE__DATABASE__URL: 'postgresql://pmguser:pmgpass@localhost:5432/test_pmgdb'
PAPERMERGE__MAIN__API_PREFIX: ''
6 changes: 6 additions & 0 deletions papermerge/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ class Settings(BaseSettings):
papermerge__ocr__automatic: bool = False
papermerge__search__url: str | None = None

aws_access_key_id: str | None = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned in comments, so far, when using S3 storage, Papermerge does not serve files. Thus, there is no need for aws_access_key_id etc. Just keep this in mind, because I assume in your S3 setup, you want Papermerge to serve files as well? Which means that you will need to add code for downloading from S3?

Copy link
Contributor Author

@bl1nkker bl1nkker Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, you are right. in the default Papermerge + cloudfront + S3 setup those variables are not needed.

However, in my use case (and in this PR) I’m implementing a setup without a CDN.

In this scenario Papermerge uses boto3 to generate signed URLs for direct access to files in the object storage, so aws_access_key_id and other variables are required

aws_secret_access_key: str | None = None
aws_region_name: str | None = None
aws_endpoint_url: str | None = None
papermerge__s3__bucket_name: str | None = None

settings = Settings()

def get_settings():
Expand Down
49 changes: 34 additions & 15 deletions papermerge/core/features/document/s3.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,49 @@
from uuid import UUID
from urllib.parse import quote
from pathlib import Path
from urllib.parse import quote
from uuid import UUID

import boto3
from botocore.config import Config

from papermerge.core.types import ImagePreviewSize
from papermerge.core import pathlib as plib
from papermerge.core import config
from papermerge.core import pathlib as plib
from papermerge.core.types import ImagePreviewSize

settings = config.get_settings()

VALID_FOR_SECONDS = 600


def resource_sign_url(prefix, resource_path: Path):
from papermerge.core.cloudfront import sign_url
def generate_s3_signed_url(path: str):
client = boto3.client(
"s3",
aws_access_key_id=settings.aws_access_key_id,
aws_secret_access_key=settings.aws_secret_access_key,
region_name=settings.aws_region_name,
endpoint_url=settings.aws_endpoint_url,
config=Config(signature_version="s3v4"),
)
return client.generate_presigned_url(
ClientMethod="get_object",
Params={"Bucket": settings.papermerge__s3__bucket_name, "Key": path},
ExpiresIn=VALID_FOR_SECONDS,
)

encoded_path = quote(str(resource_path))

if prefix:
url = f"https://{settings.papermerge__main__cf_domain}/{prefix}/{encoded_path}"
def resource_sign_url(prefix, resource_path: Path):
encoded_path = quote(str(resource_path))
path = encoded_path if not prefix else f"{prefix}/{encoded_path}"
# if a cloudFront domain is configured -> generate a signed url via cloudFront
# else -> generate a direct signed url to object storage
if settings.papermerge__main__cf_domain is not None:
from papermerge.core.cloudfront import sign_url

return sign_url(
f"https://{settings.papermerge__main__cf_domain}/{path}",
valid_for=VALID_FOR_SECONDS,
)
else:
url = f"https://{settings.papermerge__main__cf_domain}/{encoded_path}"

return sign_url(
url,
valid_for=VALID_FOR_SECONDS,
)
return generate_s3_signed_url(path=path)


def doc_thumbnail_signed_url(uid: UUID) -> str:
Expand Down
1 change: 1 addition & 0 deletions papermerge/core/features/document/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def thumbnail_url_validator(cls, value, info):
return f"/api/thumbnails/{info.data['id']}"

# if it is not local, then it is s3 + CDN/cloudfront
# or just plain s3 without CDN
if (
"preview_status" in info.data
and info.data["preview_status"] == ImagePreviewStatus.ready
Expand Down