From 2f2abc585bc0d536dd51841345d7dcbf3c84eb8d Mon Sep 17 00:00:00 2001 From: scott Date: Tue, 28 May 2024 22:45:00 -0400 Subject: [PATCH 1/3] try pydantic serializers --- src/dolphin/workflows/config/_displacement.py | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/dolphin/workflows/config/_displacement.py b/src/dolphin/workflows/config/_displacement.py index 494cf6e4a..3a7ccf224 100644 --- a/src/dolphin/workflows/config/_displacement.py +++ b/src/dolphin/workflows/config/_displacement.py @@ -14,7 +14,7 @@ ) from dolphin._log import get_log -from dolphin._types import TropoModel, TropoType +from dolphin._types import GeneralPath, TropoModel, TropoType from ._common import ( InputOptions, @@ -98,18 +98,40 @@ def _to_empty_list(cls, v): return v if v is not None else [] +import json + +from pydantic import ( + PlainSerializer, + WithJsonSchema, + field_serializer, +) +from typing_extensions import Annotated + +CslcFileList = Annotated[ + list[GeneralPath], + # PlainSerializer(lambda x: f"{x:.1e}", return_type=str), + PlainSerializer(lambda x: json.dumps([str(f) for f in x]), return_type=str), + WithJsonSchema({"type": "string"}, mode="serialization"), +] + + class DisplacementWorkflow(WorkflowBase): """Configuration for the workflow.""" # Paths to input/output files input_options: InputOptions = Field(default_factory=InputOptions) - cslc_file_list: list[Path] = Field( + cslc_file_list: CslcFileList = Field( default_factory=list, description=( "list of CSLC files, or newline-delimited file " "containing list of CSLC files." ), ) + + @field_serializer("cslc_file_list") + def _serialize_cslc_file_list(self, cslc_file_list: list[GeneralPath]) -> str: + return json.dumps([str(f) for f in cslc_file_list]) + output_options: OutputOptions = Field(default_factory=OutputOptions) # Options for each step in the workflow @@ -140,7 +162,9 @@ class DisplacementWorkflow(WorkflowBase): # internal helpers # Stores the list of directories to be created by the workflow model_config = ConfigDict( - extra="allow", json_schema_extra={"required": ["cslc_file_list"]} + extra="allow", + json_schema_extra={"required": ["cslc_file_list"]}, + arbitrary_types_allowed=True, ) # validators From 96b42f0e7d14f5e5405ea3f5e1272e7ed013598f Mon Sep 17 00:00:00 2001 From: scott Date: Tue, 28 May 2024 23:07:01 -0400 Subject: [PATCH 2/3] still not working --- src/dolphin/io/_paths.py | 7 +++-- src/dolphin/workflows/config/_common.py | 10 +++++-- src/dolphin/workflows/config/_displacement.py | 29 ++++++++----------- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/dolphin/io/_paths.py b/src/dolphin/io/_paths.py index e9f9c0645..ad0027510 100644 --- a/src/dolphin/io/_paths.py +++ b/src/dolphin/io/_paths.py @@ -67,12 +67,13 @@ def __init__(self, s3_url: Union[str, "S3Path"], unsigned: bool = False): self.path: Path = s3_url.path self._trailing_slash: str = s3_url._trailing_slash else: - parsed: ParseResult = urlparse(s3_url) + s = str(s3_url).strip() + parsed: ParseResult = urlparse(str(s)) self._scheme = parsed.scheme self._netloc = self.bucket = parsed.netloc self._parsed = parsed self.path = Path(parsed.path) - self._trailing_slash = "/" if s3_url.endswith("/") else "" + self._trailing_slash = "/" if s.endswith("/") else "" if self._scheme != "s3": raise ValueError(f"{s3_url} is not an S3 url") @@ -119,7 +120,7 @@ def parent(self): def suffix(self): return self.path.suffix - def resolve(self) -> S3Path: + def resolve(self, strict: bool = False) -> S3Path: """Resolve the path to an absolute path- S3 paths are always absolute.""" return self diff --git a/src/dolphin/workflows/config/_common.py b/src/dolphin/workflows/config/_common.py index d0b72d451..960987a40 100644 --- a/src/dolphin/workflows/config/_common.py +++ b/src/dolphin/workflows/config/_common.py @@ -17,7 +17,7 @@ from dolphin import __version__ as _dolphin_version from dolphin._log import get_log from dolphin._types import Bbox -from dolphin.io import DEFAULT_HDF5_OPTIONS, DEFAULT_TIFF_OPTIONS +from dolphin.io import DEFAULT_HDF5_OPTIONS, DEFAULT_TIFF_OPTIONS, S3Path from ._enums import ShpMethod, UnwrapMethod from ._yaml_model import YamlModel @@ -518,4 +518,10 @@ def _read_file_list_or_glob(cls, value): # noqa: ARG001: msg = f"Input file list {v_path} does not exist or is not a file." raise ValueError(msg) - return list(value) + out = [] + for v in list(value): + try: + out.append(S3Path(v)) + except ValueError: + out.append(Path(v)) + return out diff --git a/src/dolphin/workflows/config/_displacement.py b/src/dolphin/workflows/config/_displacement.py index 3a7ccf224..bd986f60a 100644 --- a/src/dolphin/workflows/config/_displacement.py +++ b/src/dolphin/workflows/config/_displacement.py @@ -14,7 +14,7 @@ ) from dolphin._log import get_log -from dolphin._types import GeneralPath, TropoModel, TropoType +from dolphin._types import TropoModel, TropoType from ._common import ( InputOptions, @@ -98,20 +98,15 @@ def _to_empty_list(cls, v): return v if v is not None else [] -import json - from pydantic import ( PlainSerializer, - WithJsonSchema, - field_serializer, ) from typing_extensions import Annotated +from dolphin.io import S3Path + CslcFileList = Annotated[ - list[GeneralPath], - # PlainSerializer(lambda x: f"{x:.1e}", return_type=str), - PlainSerializer(lambda x: json.dumps([str(f) for f in x]), return_type=str), - WithJsonSchema({"type": "string"}, mode="serialization"), + list[S3Path | Path], PlainSerializer(lambda x: [str(f) for f in x]) ] @@ -128,10 +123,6 @@ class DisplacementWorkflow(WorkflowBase): ), ) - @field_serializer("cslc_file_list") - def _serialize_cslc_file_list(self, cslc_file_list: list[GeneralPath]) -> str: - return json.dumps([str(f) for f in cslc_file_list]) - output_options: OutputOptions = Field(default_factory=OutputOptions) # Options for each step in the workflow @@ -183,7 +174,7 @@ def _check_input_files_exist(self) -> DisplacementWorkflow: input_options = self.input_options date_fmt = input_options.cslc_date_fmt # Filter out files that don't have dates in the filename - files_matching_date = [Path(f) for f in file_list if get_dates(f, fmt=date_fmt)] + files_matching_date = [f for f in file_list if get_dates(f, fmt=date_fmt)] if len(files_matching_date) < len(file_list): msg = ( f"Found {len(files_matching_date)} files with dates like {date_fmt} in" @@ -200,9 +191,13 @@ def _check_input_files_exist(self) -> DisplacementWorkflow: raise ValueError(msg) # Coerce the file_list to a sorted list of Path objects - self.cslc_file_list = [ - Path(f) for f in sort_files_by_date(file_list, file_date_fmt=date_fmt)[0] - ] + out: list[S3Path | Path] = [] + for f in sort_files_by_date(file_list, file_date_fmt=date_fmt)[0]: + try: + out.append(S3Path(f)) + except ValueError: + out.append(Path(f)) + self.cslc_file_list = out return self From 13299b694899f8700cce39b3e5e5b45d89d01035 Mon Sep 17 00:00:00 2001 From: scott Date: Wed, 29 May 2024 10:57:16 -0400 Subject: [PATCH 3/3] Make the `GeneralPath` work as Pydantic type --- src/dolphin/workflows/config/_displacement.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/dolphin/workflows/config/_displacement.py b/src/dolphin/workflows/config/_displacement.py index bd986f60a..35f22e287 100644 --- a/src/dolphin/workflows/config/_displacement.py +++ b/src/dolphin/workflows/config/_displacement.py @@ -8,13 +8,16 @@ BaseModel, ConfigDict, Field, + PlainSerializer, StringConstraints, + WithJsonSchema, field_validator, model_validator, ) from dolphin._log import get_log -from dolphin._types import TropoModel, TropoType +from dolphin._types import GeneralPath, TropoModel, TropoType +from dolphin.io import S3Path from ._common import ( InputOptions, @@ -98,15 +101,14 @@ def _to_empty_list(cls, v): return v if v is not None else [] -from pydantic import ( - PlainSerializer, -) -from typing_extensions import Annotated - -from dolphin.io import S3Path - CslcFileList = Annotated[ - list[S3Path | Path], PlainSerializer(lambda x: [str(f) for f in x]) + # Any Path-like object is acceptable + list[GeneralPath], + # All Paths will be serialized to strings: + PlainSerializer(lambda x: [str(f) for f in x]), + # Let Pydantic know what the JSON Schema should be for this custom protocol: + # https://docs.pydantic.dev/latest/concepts/json_schema/#withjsonschema-annotation + WithJsonSchema({"type": "array", "items": {"type": "string", "format": "uri"}}), ]