Skip to content

Commit 9520ec3

Browse files
committed
Adding guards in HSCDataSet for the manifest representation of un-downloaded files
- Should fix issue #127. - Moved removal of incomplete downloads from the prune stage to the f/s read stage - Added a better error to the case where HSCDataSet arrives at an absurdly small size of image to crop to.
1 parent 0467d4a commit 9520ec3

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

src/fibad/data_sets/hsc_data_set.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -520,17 +520,24 @@ def _read_filter_catalog(
520520
object_id = row["object_id"]
521521
filter = row["filter"]
522522
filename = row["filename"]
523+
if "dim" in colnames:
524+
dim = tuple(row["dim"])
525+
526+
# Skip over any files that are marked as didn't download.
527+
# or have a dimension listed less than 1px x 1px
528+
if filename == "Attempted" or min(dim) < 1:
529+
continue
523530

531+
# Insert into the filter catalog.
524532
if object_id not in filter_catalog:
525533
filter_catalog[object_id] = {}
526-
527534
filter_catalog[object_id][filter] = filename
528535

529-
# Dimension is optional
536+
# Dimension is optional, insert into dimension catalog.
530537
if "dim" in colnames:
531538
if object_id not in dim_catalog:
532539
dim_catalog[object_id] = []
533-
dim_catalog[object_id].append(tuple(row["dim"]))
540+
dim_catalog[object_id].append(dim)
534541

535542
return (filter_catalog, dim_catalog) if "dim" in colnames else filter_catalog
536543

@@ -632,11 +639,6 @@ def _prune_objects(self, filters_ref: list[str]):
632639
filters_ref = sorted(filters_ref)
633640
self.prune_count = 0
634641
for index, (object_id, filters) in enumerate(self.files.items()):
635-
# Drop objects that failed to download
636-
if any("Attempted" in v for v in filters.items()):
637-
msg = f"Attempted to download {object_id} but failed. Pruning."
638-
self._mark_for_prune(object_id, msg)
639-
640642
# Drop objects with missing filters
641643
filters = sorted(list(filters))
642644
if filters != filters_ref:
@@ -726,6 +728,13 @@ def _check_file_dimensions(self) -> tuple[int, int]:
726728
finally:
727729
logger.warning(msg)
728730

731+
if min(cutout_height, cutout_width) < 1:
732+
msg = "Automatic determination found an absurd dimension of "
733+
msg += f"({cutout_width}px, {cutout_height}px)\n"
734+
msg += "Please either correct the data source or set a static cutout side with the \n"
735+
msg += "crop_to configuration in the dataset section of the fibad config.\n"
736+
raise RuntimeError(msg)
737+
729738
return cutout_width, cutout_height
730739

731740
def _rebuild_manifest(self, config):

0 commit comments

Comments
 (0)