Skip to content

Commit

Permalink
Fix intermediate issues with non-resampled outlier methods (#8853)
Browse files Browse the repository at this point in the history
  • Loading branch information
melanieclarke authored Oct 2, 2024
2 parents c8ea68c + fb3c2cd commit 18d165f
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 61 deletions.
1 change: 1 addition & 0 deletions changes/8853.outlier_detection.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Avoid modifying input and saving duplicate files when resample_data=False.
87 changes: 42 additions & 45 deletions jwst/outlier_detection/tests/test_outlier_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def we_three_sci():
def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd):
"""Test whole step, no outliers"""
container = ModelContainer(list(we_three_sci))
container[0].var_rnoise[10, 10] = 1E9
pristine = ModelContainer([m.copy() for m in container])
OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample)

Expand Down Expand Up @@ -261,7 +262,9 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
assert len(median_files) != 0


def test_outlier_step_spec(tmp_cwd, tmp_path):
@pytest.mark.parametrize('resample', [True, False])
@pytest.mark.parametrize('save_intermediate', [True, False])
def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate):
"""Test outlier step for spec data including saving intermediate results."""
output_dir = tmp_path / 'output'
output_dir.mkdir(exist_ok=True)
Expand All @@ -275,50 +278,33 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"

# Make a couple copies, give them unique exposure numbers and filename
container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()])
container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
for i, model in enumerate(container):
model.meta.filename = f'test_{i}_cal.fits'

# Drop a CR on the science array in the first image
container[0].data[209, 37] += 1

# Verify that intermediate files are removed when not saved
# (s2d files are expected, i2d files are not, but we'll check
# for them to make sure the imaging extension didn't creep back in)
OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True)
for dirname in [output_dir, tmp_cwd]:
result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits'))
median_files = glob(os.path.join(dirname, '*median.fits'))
blot_files = glob(os.path.join(dirname, '*blot.fits'))

# intermediate files are removed
assert len(i2d_files) == 0
assert len(s2d_files) == 0
assert len(median_files) == 0
assert len(blot_files) == 0

# result files are written to the output directory
if dirname == output_dir:
assert len(result_files) == len(container)
else:
assert len(result_files) == 0

# Call again, but save intermediate to the output path
# Call outlier detection
result = OutlierDetectionStep.call(
container, save_results=True, save_intermediate_results=True,
output_dir=output_dir
)
container, resample_data=resample,
output_dir=output_dir, save_results=True,
save_intermediate_results=save_intermediate)

# Make sure nothing changed in SCI array
for image, corrected in zip(container, result):
np.testing.assert_allclose(image.data, corrected.data)
for image in result:
nn = ~np.isnan(image.data)
np.testing.assert_allclose(image.data[nn], miri_cal.data[nn])

# Verify CR is flagged
assert np.isnan(result[0].data[209, 37])
assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE

# Verify that intermediate files are saved at the specified location
if save_intermediate:
expected_intermediate = len(container)
else:
expected_intermediate = 0
for dirname in [output_dir, tmp_cwd]:
all_files = glob(os.path.join(dirname, '*.fits'))
result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
Expand All @@ -327,24 +313,35 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
median_files = glob(os.path.join(dirname, '*median.fits'))
blot_files = glob(os.path.join(dirname, '*blot.fits'))
if dirname == output_dir:
# result files are written to the output directory
# Result files are always written to the output directory
assert len(result_files) == len(container)

# s2d, median, and blot files are written to the output directory
assert len(s2d_files) == len(container)
assert len(blot_files) == len(container)
assert len(median_files) == 1

# i2d files not written
# s2d and blot files are written to the output directory
# if save_intermediate is True and resampling is set
if resample:
assert len(s2d_files) == expected_intermediate
assert len(blot_files) == expected_intermediate
else:
assert len(s2d_files) == 0
assert len(blot_files) == 0

# Only one median file is saved if save_intermediate is True,
# no matter how many input files there are
if save_intermediate:
assert len(median_files) == 1
else:
assert len(median_files) == 0

# i2d files are never written
assert len(i2d_files) == 0

# nothing else was written
assert len(all_files) == len(s2d_files) + \
len(median_files) + \
len(result_files) + \
len(blot_files)
# Nothing else was written
assert len(all_files) == (len(s2d_files)
+ len(median_files)
+ len(result_files)
+ len(blot_files))
else:
# nothing should be written to the current directory
# Nothing should be written to the current directory
assert len(result_files) == 0
assert len(s2d_files) == 0
assert len(median_files) == 0
Expand Down Expand Up @@ -674,4 +671,4 @@ def make_resamp(input_models):
asn_id="test",
allowed_memory=None,
)
return resamp
return resamp
29 changes: 13 additions & 16 deletions jwst/outlier_detection/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,21 @@ def median_without_resampling(input_models,
for i in range(len(input_models)):

drizzled_model = input_models.borrow(i)
drizzled_model.wht = build_driz_weight(drizzled_model,
weight_type=weight_type,
good_bits=good_bits)
median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
input_models.shelve(drizzled_model, i, modify=True)

if save_intermediate_results:
# write the drizzled model to file
_fileio.save_drizzled(drizzled_model, make_output_path)

drizzled_data = drizzled_model.data.copy()
weight = build_driz_weight(drizzled_model,
weight_type=weight_type,
good_bits=good_bits)
if i == 0:
input_shape = (ngroups,)+drizzled_model.data.shape
dtype = drizzled_model.data.dtype
median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
input_shape = (ngroups,) + drizzled_data.shape
dtype = drizzled_data.dtype
computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)

weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
computer.append(drizzled_model.data, i)
weight_threshold = compute_weight_threshold(weight, maskpt)
drizzled_data[weight < weight_threshold] = np.nan
computer.append(drizzled_data, i)

input_models.shelve(drizzled_model, i, modify=False)

# Perform median combination on set of drizzled mosaics
median_data = computer.evaluate()
Expand Down Expand Up @@ -154,14 +151,14 @@ def median_with_resampling(input_models,
with input_models:
for i, indices in enumerate(indices_by_group):

median_wcs = resamp.output_wcs
drizzled_model = resamp.resample_group(input_models, indices)

if save_intermediate_results:
# write the drizzled model to file
_fileio.save_drizzled(drizzled_model, make_output_path)

if i == 0:
median_wcs = resamp.output_wcs
input_shape = (ngroups,)+drizzled_model.data.shape
dtype = drizzled_model.data.dtype
computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
Expand Down

0 comments on commit 18d165f

Please sign in to comment.