diff --git a/scripts/files/fs.py b/scripts/files/fs.py index a037457a1..359846c75 100644 --- a/scripts/files/fs.py +++ b/scripts/files/fs.py @@ -1,6 +1,8 @@ import os from concurrent.futures import Future, ThreadPoolExecutor, as_completed -from typing import List, Optional +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional from boto3 import resource from linz_logger import get_log @@ -8,6 +10,11 @@ from scripts.aws.aws_helper import is_s3 from scripts.files import fs_local, fs_s3 +if TYPE_CHECKING: + from mypy_boto3_s3 import S3Client +else: + S3Client = dict + def write(destination: str, source: bytes, content_type: Optional[str] = None) -> str: """Write a file from its source to a destination path. @@ -79,6 +86,13 @@ def exists(path: str) -> bool: return fs_local.exists(path) +def modified(path: str, s3_client: Optional[S3Client] = None) -> datetime: + """Get modified datetime for S3 URL or local path""" + if is_s3(path): + return fs_s3.modified(fs_s3.bucket_name_from_path(path), fs_s3.prefix_from_path(path), s3_client) + return fs_local.modified(Path(path)) + + def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> List[str]: """Writes list of files to target destination using multithreading. diff --git a/scripts/files/fs_local.py b/scripts/files/fs_local.py index ca2d5bdd8..8f174983b 100644 --- a/scripts/files/fs_local.py +++ b/scripts/files/fs_local.py @@ -1,4 +1,6 @@ import os +from datetime import datetime, timezone +from pathlib import Path def write(destination: str, source: bytes) -> None: @@ -36,3 +38,9 @@ def exists(path: str) -> bool: True if the path exists """ return os.path.exists(path) + + +def modified(path: Path) -> datetime: + """Get path modified datetime as UTC""" + modified_timestamp = os.path.getmtime(path) + return datetime.fromtimestamp(modified_timestamp, tz=timezone.utc) diff --git a/scripts/files/fs_s3.py b/scripts/files/fs_s3.py index 6d2d47b2d..a5296f1f0 100644 --- a/scripts/files/fs_s3.py +++ b/scripts/files/fs_s3.py @@ -1,5 +1,6 @@ from concurrent import futures from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from typing import TYPE_CHECKING, Any, Generator, List, Optional, Union from boto3 import client, resource @@ -237,3 +238,8 @@ def get_object_parallel_multithreading( yield key, future.result() else: yield key, exception + + +def modified(bucket_name: str, key: str, s3_client: Optional[S3Client]) -> datetime: + s3_client = s3_client or client("s3") + return _get_object(bucket_name, key, s3_client)["LastModified"] diff --git a/scripts/files/tests/fs_local_test.py b/scripts/files/tests/fs_local_test.py index 839846e7b..780387060 100644 --- a/scripts/files/tests/fs_local_test.py +++ b/scripts/files/tests/fs_local_test.py @@ -1,8 +1,10 @@ import os +from pathlib import Path import pytest -from scripts.files.fs_local import exists, read, write +from scripts.files.fs_local import exists, modified, read, write +from scripts.tests.datetimes_test import any_epoch_datetime @pytest.mark.dependency(name="write") @@ -43,3 +45,11 @@ def test_exists(setup: str) -> None: def test_exists_file_not_found() -> None: found = exists("/tmp/test.file") assert found is False + + +def test_should_get_modified_datetime(setup: str) -> None: + path = Path(os.path.join(setup, "modified.file")) + path.touch() + modified_datetime = any_epoch_datetime() + os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + assert modified(path) == modified_datetime diff --git a/scripts/files/tests/fs_s3_test.py b/scripts/files/tests/fs_s3_test.py index 3fbf99439..5885b35d0 100644 --- a/scripts/files/tests/fs_s3_test.py +++ b/scripts/files/tests/fs_s3_test.py @@ -3,12 +3,17 @@ from boto3 import client, resource from botocore.exceptions import ClientError from moto import mock_aws +from moto.core.models import DEFAULT_ACCOUNT_ID +from moto.s3.models import s3_backends from moto.s3.responses import DEFAULT_REGION_NAME +from moto.wafv2.models import GLOBAL_REGION +from mypy_boto3_s3 import S3Client from pytest import CaptureFixture, raises from pytest_subtests import SubTests from scripts.files.files_helper import ContentType -from scripts.files.fs_s3 import exists, list_files_in_uri, read, write +from scripts.files.fs_s3 import exists, list_files_in_uri, modified, read, write +from scripts.tests.datetimes_test import any_epoch_datetime @mock_aws @@ -156,3 +161,17 @@ def test_list_files_in_uri(subtests: SubTests) -> None: with subtests.test(): assert "data/image.tiff" not in files + + +@mock_aws +def test_should_get_modified_datetime() -> None: + bucket_name = "any-bucket-name" + key = "any-key" + modified_datetime = any_epoch_datetime() + + s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body") + s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime + + assert modified(bucket_name, key, s3_client) == modified_datetime diff --git a/scripts/files/tests/fs_test.py b/scripts/files/tests/fs_test.py index 4077b9740..71d55e8e9 100644 --- a/scripts/files/tests/fs_test.py +++ b/scripts/files/tests/fs_test.py @@ -1,14 +1,20 @@ import os +from pathlib import Path from shutil import rmtree from tempfile import mkdtemp -from boto3 import resource +from boto3 import client, resource from moto import mock_aws +from moto.core.models import DEFAULT_ACCOUNT_ID +from moto.s3.models import s3_backends from moto.s3.responses import DEFAULT_REGION_NAME +from moto.wafv2.models import GLOBAL_REGION +from mypy_boto3_s3 import S3Client from pytest import CaptureFixture, raises from pytest_subtests import SubTests -from scripts.files.fs import NoSuchFileError, read, write, write_all, write_sidecars +from scripts.files.fs import NoSuchFileError, modified, read, write, write_all, write_sidecars +from scripts.tests.datetimes_test import any_epoch_datetime def test_read_key_not_found_local() -> None: @@ -81,3 +87,25 @@ def test_write_sidecars_one_found(capsys: CaptureFixture[str], subtests: SubTest assert "wrote_sidecar_file" in logs rmtree(target) + + +@mock_aws +def test_should_get_s3_object_modified_datetime() -> None: + bucket_name = "any-bucket-name" + key = "any-key" + modified_datetime = any_epoch_datetime() + + s3_client: S3Client = client("s3", region_name=DEFAULT_REGION_NAME) + s3_client.create_bucket(Bucket=bucket_name) + s3_client.put_object(Bucket=bucket_name, Key=key, Body=b"any body") + s3_backends[DEFAULT_ACCOUNT_ID][GLOBAL_REGION].buckets[bucket_name].keys[key].last_modified = modified_datetime + + assert modified(f"s3://{bucket_name}/{key}", s3_client) == modified_datetime + + +def test_should_get_local_file_modified_datetime(setup: str) -> None: + path = os.path.join(setup, "modified.file") + Path(path).touch() + modified_datetime = any_epoch_datetime() + os.utime(path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + assert modified(path) == modified_datetime diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index 9bc703623..651f6b553 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -1,7 +1,9 @@ import os from typing import Any, Dict, Tuple +from scripts.datetimes import format_rfc_3339_datetime_string from scripts.files import fs +from scripts.files.fs import modified from scripts.stac.util import checksum from scripts.stac.util.STAC_VERSION import STAC_VERSION from scripts.stac.util.stac_extensions import StacExtensions @@ -12,6 +14,7 @@ class ImageryItem: def __init__(self, id_: str, file: str) -> None: file_content = fs.read(file) + file_modified_datetime = format_rfc_3339_datetime_string(modified(file)) self.stac = { "type": "Feature", "stac_version": STAC_VERSION, @@ -24,6 +27,8 @@ def __init__(self, id_: str, file: str) -> None: "href": os.path.join(".", os.path.basename(file)), "type": "image/tiff; application=geotiff; profile=cloud-optimized", "file:checksum": checksum.multihash_as_hex(file_content), + "created": file_modified_datetime, + "updated": file_modified_datetime, } }, "stac_extensions": [StacExtensions.file.value], diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index 453015f2b..9ad0d8125 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -1,14 +1,13 @@ import json import os import tempfile -from datetime import datetime +from datetime import datetime, timezone from shutil import rmtree from tempfile import mkdtemp from typing import Generator import pytest import shapely.geometry -from pytest_mock import MockerFixture from pytest_subtests import SubTests from scripts.files.fs import read @@ -17,6 +16,7 @@ from scripts.stac.imagery.metadata_constants import CollectionMetadata from scripts.stac.imagery.provider import Provider, ProviderRole from scripts.stac.util.stac_extensions import StacExtensions +from scripts.tests.datetimes_test import any_epoch_datetime # pylint: disable=duplicate-code @@ -113,10 +113,12 @@ def test_interval_updated_from_existing(metadata: CollectionMetadata) -> None: assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]] -def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: SubTests) -> None: +def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None: collection = ImageryCollection(metadata) - mocker.patch("scripts.files.fs.read", return_value=b"") - item = ImageryItem("BR34_5000_0304", "./test/BR34_5000_0304.tiff") + item_file_path = "./scripts/tests/data/empty.tiff" + modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc) + os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) + item = ImageryItem("BR34_5000_0304", item_file_path) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], @@ -131,7 +133,7 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: with subtests.test(): assert { - "file:checksum": "1220a049888b3971d9ed3fd52b830cfeb379d7069d6b7a927456bcf1fabab0ec4f46", + "file:checksum": "122097b5d2b049c6ffdf608af28c4ba2744fad7f03046d1f58b2523402f30577f618", "rel": "item", "href": "./BR34_5000_0304.json", "type": "application/json", @@ -143,6 +145,10 @@ def test_add_item(mocker: MockerFixture, metadata: CollectionMetadata, subtests: with subtests.test(): assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] + for property_name in ["created", "updated"]: + with subtests.test(msg=f"{property_name} property"): + assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z" + def test_write_collection(metadata: CollectionMetadata) -> None: target = mkdtemp() diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index 575551be0..4b3be4858 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -18,7 +18,7 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None: bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) mocker.patch("scripts.files.fs.read", return_value=b"") - path = "./test/BR34_5000_0302.tiff" + path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" @@ -74,7 +74,7 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No ulid = "fake_ulid" collection = ImageryCollection(metadata=metadata, collection_id=ulid) - path = "./test/BR34_5000_0302.tiff" + path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) mocker.patch("scripts.files.fs.read", return_value=b"") item = ImageryItem(id_, path)