diff --git a/src/opera_utils/_dates.py b/src/opera_utils/_dates.py index 74816d9..bca3be8 100644 --- a/src/opera_utils/_dates.py +++ b/src/opera_utils/_dates.py @@ -144,17 +144,24 @@ def _date_format_to_regex(date_format: str) -> re.Pattern: def group_by_date( - files: Iterable[PathLikeT], file_date_fmt: str = DATE_FORMAT + files: Iterable[PathLikeT], + file_date_fmt: str = DATE_FORMAT, + date_idx: int | None = None, ) -> dict[tuple[datetime.datetime, ...], list[PathLikeT]]: """Combine files by date into a dict. Parameters ---------- - files: Iterable[Filename] + files : Iterable[Filename] Path to folder containing files with dates in the filename. - file_date_fmt: str + file_date_fmt : str Format of the date in the filename. Default is [dolphin.DEFAULT_DATETIME_FORMAT][] + date_idx : int, optional + If provided, uses only this index of the dates found in each filename. + For example, if `file_date_fmt='%Y%m%d'`, and the files have pairs of + these date strings but you only wish to group by the first, use + `date_idx=0`. Returns ------- @@ -182,7 +189,11 @@ def group_by_date( for dates, g in itertools.groupby( files, key=lambda x: tuple(get_dates(x, fmt=file_date_fmt)) ): - grouped_images[dates].extend(list(g)) + if date_idx is None: + key = dates + else: + key = (dates[date_idx],) + grouped_images[key].extend(list(g)) return grouped_images diff --git a/tests/test_dates.py b/tests/test_dates.py index 82d2cdc..98cacde 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -252,3 +252,76 @@ def test_sort_by_date_different_fmt(): ) assert sorted_files == expected_files assert sorted_dates == expected_dates + + +def test_group_by_date(): + files = [ + "slc_20180101.tif", + "slc_1_20190101.tif", + "slc_2_20190101.tif", + "slc_20210101.tif", + ] + expected = { + (datetime.datetime(2018, 1, 1),): [ + "slc_20180101.tif", + ], + (datetime.datetime(2019, 1, 1),): [ + "slc_1_20190101.tif", + "slc_2_20190101.tif", + ], + (datetime.datetime(2021, 1, 1),): [ + "slc_20210101.tif", + ], + } + assert expected == _dates.group_by_date(files) + + +def test_group_by_date_with_idx(): + files = [ + "slc_20170101_20180101.tif", + "slc_20170101_20190101.tif", + "slc_20170101_20210101.tif", + ] + expected = { + ( + datetime.datetime(2017, 1, 1), + datetime.datetime(2018, 1, 1), + ): [ + "slc_20170101_20180101.tif", + ], + ( + datetime.datetime(2017, 1, 1), + datetime.datetime(2019, 1, 1), + ): [ + "slc_20170101_20190101.tif", + ], + ( + datetime.datetime(2017, 1, 1), + datetime.datetime(2021, 1, 1), + ): [ + "slc_20170101_20210101.tif", + ], + } + assert expected == _dates.group_by_date(files) + + expected_idx1 = { + (datetime.datetime(2018, 1, 1),): [ + "slc_20170101_20180101.tif", + ], + (datetime.datetime(2019, 1, 1),): [ + "slc_20170101_20190101.tif", + ], + (datetime.datetime(2021, 1, 1),): [ + "slc_20170101_20210101.tif", + ], + } + assert expected_idx1 == _dates.group_by_date(files, date_idx=1) + + expected_idx0 = { + (datetime.datetime(2017, 1, 1),): [ + "slc_20170101_20180101.tif", + "slc_20170101_20190101.tif", + "slc_20170101_20210101.tif", + ] + } + assert expected_idx0 == _dates.group_by_date(files, date_idx=0)