Fix intermediate issues with non-resampled outlier methods (#8853)

spacetelescope · Oct 2, 2024 · 18d165f · 18d165f
2 parents c8ea68c + fb3c2cd
commit 18d165f
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 61 deletions.
diff --git a/changes/8853.outlier_detection.rst b/changes/8853.outlier_detection.rst
@@ -0,0 +1 @@
+Avoid modifying input and saving duplicate files when resample_data=False.
diff --git a/jwst/outlier_detection/tests/test_outlier_detection.py b/jwst/outlier_detection/tests/test_outlier_detection.py
@@ -203,6 +203,7 @@ def we_three_sci():
 def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd):
     """Test whole step, no outliers"""
     container = ModelContainer(list(we_three_sci))
+    container[0].var_rnoise[10, 10] = 1E9
     pristine = ModelContainer([m.copy() for m in container])
     OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample)
 
@@ -261,7 +262,9 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
     assert len(median_files) != 0
 
 
-def test_outlier_step_spec(tmp_cwd, tmp_path):
+@pytest.mark.parametrize('resample', [True, False])
+@pytest.mark.parametrize('save_intermediate', [True, False])
+def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate):
     """Test outlier step for spec data including saving intermediate results."""
     output_dir = tmp_path / 'output'
     output_dir.mkdir(exist_ok=True)
@@ -275,50 +278,33 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
     miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"
 
     # Make a couple copies, give them unique exposure numbers and filename
-    container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()])
+    container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
     for i, model in enumerate(container):
         model.meta.filename = f'test_{i}_cal.fits'
 
     # Drop a CR on the science array in the first image
     container[0].data[209, 37] += 1
 
-    # Verify that intermediate files are removed when not saved
-    # (s2d files are expected, i2d files are not, but we'll check
-    # for them to make sure the imaging extension didn't creep back in)
-    OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True)
-    for dirname in [output_dir, tmp_cwd]:
-        result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
-        i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
-        s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits'))
-        median_files = glob(os.path.join(dirname, '*median.fits'))
-        blot_files = glob(os.path.join(dirname, '*blot.fits'))
-
-        # intermediate files are removed
-        assert len(i2d_files) == 0
-        assert len(s2d_files) == 0
-        assert len(median_files) == 0
-        assert len(blot_files) == 0
-
-        # result files are written to the output directory
-        if dirname == output_dir:
-            assert len(result_files) == len(container)
-        else:
-            assert len(result_files) == 0
-
-    # Call again, but save intermediate to the output path
+    # Call outlier detection
     result = OutlierDetectionStep.call(
-        container, save_results=True, save_intermediate_results=True,
-        output_dir=output_dir
-    )
+        container, resample_data=resample,
+        output_dir=output_dir, save_results=True,
+        save_intermediate_results=save_intermediate)
 
     # Make sure nothing changed in SCI array
-    for image, corrected in zip(container, result):
-        np.testing.assert_allclose(image.data, corrected.data)
+    for image in result:
+        nn = ~np.isnan(image.data)
+        np.testing.assert_allclose(image.data[nn], miri_cal.data[nn])
 
     # Verify CR is flagged
+    assert np.isnan(result[0].data[209, 37])
     assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE
 
     # Verify that intermediate files are saved at the specified location
+    if save_intermediate:
+        expected_intermediate = len(container)
+    else:
+        expected_intermediate = 0
     for dirname in [output_dir, tmp_cwd]:
         all_files = glob(os.path.join(dirname, '*.fits'))
         result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
@@ -327,24 +313,35 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
         median_files = glob(os.path.join(dirname, '*median.fits'))
         blot_files = glob(os.path.join(dirname, '*blot.fits'))
         if dirname == output_dir:
-            # result files are written to the output directory
+            # Result files are always written to the output directory
             assert len(result_files) == len(container)
 
-            # s2d, median, and blot files are written to the output directory
-            assert len(s2d_files) == len(container)
-            assert len(blot_files) == len(container)
-            assert len(median_files) == 1
-
-            # i2d files not written
+            # s2d and blot files are written to the output directory
+            # if save_intermediate is True and resampling is set
+            if resample:
+                assert len(s2d_files) == expected_intermediate
+                assert len(blot_files) == expected_intermediate
+            else:
+                assert len(s2d_files) == 0
+                assert len(blot_files) == 0
+
+            # Only one median file is saved if save_intermediate is True,
+            # no matter how many input files there are
+            if save_intermediate:
+                assert len(median_files) == 1
+            else:
+                assert len(median_files) == 0
+
+            # i2d files are never written
             assert len(i2d_files) == 0
 
-            # nothing else was written
-            assert len(all_files) == len(s2d_files) + \
-                                     len(median_files) + \
-                                     len(result_files) + \
-                                     len(blot_files)
+            # Nothing else was written
+            assert len(all_files) == (len(s2d_files)
+                                      + len(median_files)
+                                      + len(result_files)
+                                      + len(blot_files))
         else:
-            # nothing should be written to the current directory
+            # Nothing should be written to the current directory
             assert len(result_files) == 0
             assert len(s2d_files) == 0
             assert len(median_files) == 0
@@ -674,4 +671,4 @@ def make_resamp(input_models):
         asn_id="test",
         allowed_memory=None,
     )
-    return resamp
+    return resamp
diff --git a/jwst/outlier_detection/utils.py b/jwst/outlier_detection/utils.py
@@ -79,24 +79,21 @@ def median_without_resampling(input_models,
         for i in range(len(input_models)):
 
             drizzled_model = input_models.borrow(i)
-            drizzled_model.wht = build_driz_weight(drizzled_model,
-                                                    weight_type=weight_type,
-                                                    good_bits=good_bits)
-            median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
-            input_models.shelve(drizzled_model, i, modify=True)
-
-            if save_intermediate_results:
-                # write the drizzled model to file
-                _fileio.save_drizzled(drizzled_model, make_output_path)
-
+            drizzled_data = drizzled_model.data.copy()
+            weight = build_driz_weight(drizzled_model,
+                                       weight_type=weight_type,
+                                       good_bits=good_bits)
             if i == 0:
-                input_shape = (ngroups,)+drizzled_model.data.shape
-                dtype = drizzled_model.data.dtype
+                median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
+                input_shape = (ngroups,) + drizzled_data.shape
+                dtype = drizzled_data.dtype
                 computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
 
-            weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
-            drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
-            computer.append(drizzled_model.data, i)
+            weight_threshold = compute_weight_threshold(weight, maskpt)
+            drizzled_data[weight < weight_threshold] = np.nan
+            computer.append(drizzled_data, i)
+
+            input_models.shelve(drizzled_model, i, modify=False)
 
     # Perform median combination on set of drizzled mosaics
     median_data = computer.evaluate()
@@ -154,14 +151,14 @@ def median_with_resampling(input_models,
     with input_models:
         for i, indices in enumerate(indices_by_group):
 
-            median_wcs = resamp.output_wcs
             drizzled_model = resamp.resample_group(input_models, indices)
 
             if save_intermediate_results:
                 # write the drizzled model to file
                 _fileio.save_drizzled(drizzled_model, make_output_path)
 
             if i == 0:
+                median_wcs = resamp.output_wcs
                 input_shape = (ngroups,)+drizzled_model.data.shape
                 dtype = drizzled_model.data.dtype
                 computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)