diff --git a/CHANGES.rst b/CHANGES.rst index 6b1162d6..f45add44 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,14 @@ Bug Fixes Other ----- +jump +~~~~ + +- Enable the use of multiple integrations to find outliers. Also, + when the number of groups is above a threshold, use single pass + outlier flagging rather than the iterative flagging. [#242] + + 1.6.1 (2024-02-29) ================== diff --git a/src/stcal/jump/jump.py b/src/stcal/jump/jump.py index 03405c08..80017306 100644 --- a/src/stcal/jump/jump.py +++ b/src/stcal/jump/jump.py @@ -1,6 +1,7 @@ import logging import multiprocessing import time + import numpy as np import cv2 as cv import astropy.stats as stats @@ -8,7 +9,6 @@ from astropy.convolution import Ring2DKernel from astropy.convolution import convolve - from . import constants from . import twopoint_difference as twopt @@ -18,10 +18,10 @@ def detect_jumps( frames_per_group, - data, + indata, gdq, pdq, - err, + inerr, gain_2d, readnoise_2d, rejection_thresh, @@ -56,6 +56,7 @@ def detect_jumps( minimum_groups=3, minimum_sigclip_groups=100, only_use_ints=True, + min_diffs_single_pass=10, mask_persist_grps_next_int=True, persist_grps_flagged=25, ): @@ -81,7 +82,7 @@ def detect_jumps( frames_per_group : int number of frames per group - data : float, 4D array + indata : float, 4D array science array gdq : int, 4D array @@ -90,7 +91,7 @@ def detect_jumps( pdq : int, 2D array pixelg dq array - err : float, 4D array + inerr : float, 4D array error array gain_2d : float, 2D array @@ -109,7 +110,7 @@ def detect_jumps( four_grp_thresh : float cosmic ray sigma rejection threshold for ramps having 4 groups - max_cores: str + max_cores : str Maximum number of cores to use for multiprocessing. Available choices are 'none' (which will create one process), 'quarter', 'half', 'all' (of available cpu cores). @@ -124,11 +125,11 @@ def detect_jumps( neighbors (marginal detections). Any primary jump below this value will not have its neighbors flagged. - flag_4_neighbors: bool + flag_4_neighbors : bool if set to True (default is True), it will cause the four perpendicular neighbors of all detected jumps to also be flagged as a jump. - dqflags: dict + dqflags : dict A dictionary with at least the following keywords: DO_NOT_USE, SATURATED, JUMP_DET, NO_GAIN_VALUE, GOOD @@ -209,6 +210,15 @@ def detect_jumps( min_sat_radius_extend : float The minimum radius of the saturated core of a snowball for the core to be extended + minimum_groups : int + The minimum number of groups for jump detection + minimum_sigclip_groups : int + The minimum number of groups required to use sigma clipping to find outliers. + only_use_ints : boolean + In sigma clipping, if True only differences between integrations are compared. If False, + then all differences are processed at once. + min_diffs_single_pass : int + The minimum number of groups to switch to flagging all outliers in a single pass. Returns ------- @@ -235,13 +245,12 @@ def detect_jumps( # Apply gain to the SCI, ERR, and readnoise arrays so they're in units # of electrons - data *= gain_2d - err *= gain_2d + data = indata * gain_2d + err = inerr * gain_2d readnoise_2d *= gain_2d # also apply to the after_jump thresholds after_jump_flag_e1 = after_jump_flag_dn1 * np.nanmedian(gain_2d) after_jump_flag_e2 = after_jump_flag_dn2 * np.nanmedian(gain_2d) - # Apply the 2-point difference method as a first pass log.info("Executing two-point difference method") start = time.time() @@ -280,6 +289,7 @@ def detect_jumps( minimum_groups=3, minimum_sigclip_groups=minimum_sigclip_groups, only_use_ints=only_use_ints, + min_diffs_single_pass=min_diffs_single_pass, ) # remove redundant bits in pixels that have jump flagged but were # already flagged as do_not_use or saturated. @@ -362,6 +372,7 @@ def detect_jumps( minimum_groups, minimum_sigclip_groups, only_use_ints, + min_diffs_single_pass, ), ) @@ -388,6 +399,7 @@ def detect_jumps( minimum_groups, minimum_sigclip_groups, only_use_ints, + min_diffs_single_pass, ), ) log.info("Creating %d processes for jump detection ", n_slices) @@ -560,7 +572,7 @@ def flag_large_events( Nothing, gdq array is modified. """ - log.info("Flagging large Snowballs") + log.info("Flagging Snowballs") n_showers_grp = [] total_snowballs = 0 @@ -745,10 +757,10 @@ def find_last_grp(grp, ngrps, num_grps_masked): ---------- grp : int The location of the shower - + ngrps : int The number of groups in the integration - + num_grps_masked : int The requested number of groups to be flagged after the shower @@ -895,7 +907,7 @@ def find_faint_extended( emission. min_shower_area : int The minimum area for a group of pixels to be flagged as a shower. - inner: int + inner : int The inner radius of the ring_2D_kernal used for the convolution. outer : int The outer radius of the ring_2D_kernal used for the convolution. @@ -903,13 +915,16 @@ def find_faint_extended( The integer value of the saturation flag. jump_flag : int The integer value of the jump flag - ellipse_expand: float + ellipse_expand : float The relative increase in the size of the fitted ellipse to be applied to the shower. - num_grps_masked: int - The number of groups after the detected shower to be flagged as jump. - max_extended_radius: int - The upper limit for the extension of saturation and jump + num_grps_masked : int + The number of groups after the detected shower to be flagged as jump. + max_extended_radius : int + The upper limit for the extension of saturation and jump + minimum_sigclip_groups : int + The minimum number of groups to use sigma clipping. + Returns ------- diff --git a/src/stcal/jump/twopoint_difference.py b/src/stcal/jump/twopoint_difference.py index b9111870..5d3e4705 100644 --- a/src/stcal/jump/twopoint_difference.py +++ b/src/stcal/jump/twopoint_difference.py @@ -2,6 +2,7 @@ import warnings import numpy as np +import warnings from astropy import stats log = logging.getLogger(__name__) @@ -28,6 +29,7 @@ def find_crs( minimum_groups=3, minimum_sigclip_groups=100, only_use_ints=True, + min_diffs_single_pass=10, ): """ Find CRs/Jumps in each integration within the input data array. The input @@ -111,6 +113,9 @@ def find_crs( same group in other integrations. If False all groups across all integrations will be used to detect outliers. + min_diffs_single_pass: integer + The minimum number of groups to switch from the iterative flagging of + cosmic rays to just finding all the outliers at once. Returns ------- gdq : int, 4D array @@ -132,7 +137,7 @@ def find_crs( gdq = group_dq # Get data characteristics nints, ngroups, nrows, ncols = dataa.shape - ndiffs = ngroups - 1 + ndiffs = (ngroups - 1) * nints # get readnoise, squared read_noise_2 = read_noise**2 # create arrays for output @@ -152,186 +157,201 @@ def find_crs( for grp in range(dat.shape[1]): if np.all(np.bitwise_and(gdq[integ, grp, :, :], dnu_flag)): num_flagged_grps += 1 - total_groups = nints if only_use_ints and nints else nints * ngrps - num_flagged_grps - if (ngrps < minimum_groups and only_use_ints and nints < minimum_sigclip_groups) or ( - not only_use_ints and nints * ngrps < minimum_sigclip_groups and ngrps < minimum_groups - ): + if only_use_ints: + total_sigclip_groups = nints + else: + total_sigclip_groups = nints * (ngrps - num_flagged_grps) + total_groups = nints * (ngrps - num_flagged_grps) + total_diffs = nints * (ngrps - 1 - num_flagged_grps) + total_usable_diffs = total_diffs - num_flagged_grps + if ((ngrps < minimum_groups and only_use_ints and nints < minimum_sigclip_groups) or + (not only_use_ints and nints * ngrps < minimum_sigclip_groups and + total_groups < minimum_groups)): log.info("Jump Step was skipped because exposure has less than the minimum number of usable groups") - log.info("Data shape %s", dat.shape) - dummy = np.zeros((dataa.shape[1] - 1, dataa.shape[2], dataa.shape[3]), dtype=np.float32) - + dummy = np.zeros((dataa.shape[1] - 1, dataa.shape[2], dataa.shape[3]), + dtype=np.float32) return gdq, row_below_gdq, row_above_gdq, 0, dummy - - # set 'saturated' or 'do not use' pixels to nan in data - dat[np.where(np.bitwise_and(gdq, sat_flag))] = np.nan - dat[np.where(np.bitwise_and(gdq, dnu_flag))] = np.nan - dat[np.where(np.bitwise_and(gdq, dnu_flag + sat_flag))] = np.nan - - # calculate the differences between adjacent groups (first diffs) - # use mask on data, so the results will have sat/donotuse groups masked - first_diffs = np.diff(dat, axis=1) - - # calc. the median of first_diffs for each pixel along the group axis - first_diffs_masked = np.ma.masked_array(first_diffs, mask=np.isnan(first_diffs)) - median_diffs = np.ma.median(first_diffs_masked, axis=(0, 1)) - # calculate sigma for each pixel - sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) - - # reset sigma so pxels with 0 readnoise are not flagged as jumps - sigma[np.where(sigma == 0.0)] = np.nan - - # compute 'ratio' for each group. this is the value that will be - # compared to 'threshold' to classify jumps. subtract the median of - # first_diffs from first_diffs, take the abs. value and divide by sigma. - e_jump_4d = first_diffs - median_diffs[np.newaxis, :, :] - ratio_all = ( - np.abs(first_diffs - median_diffs[np.newaxis, np.newaxis, :, :]) / sigma[np.newaxis, np.newaxis, :, :] - ) - if (only_use_ints and nints >= minimum_sigclip_groups) or ( - not only_use_ints and total_groups >= minimum_sigclip_groups - ): - log.info( - " Jump Step using sigma clip %s greater than %s, rejection threshold %s", - total_groups, - minimum_sigclip_groups, - normal_rej_thresh, - ) - warnings.filterwarnings("ignore", ".*All-NaN slice encountered.*", RuntimeWarning) - warnings.filterwarnings("ignore", ".*Mean of empty slice.*", RuntimeWarning) - warnings.filterwarnings("ignore", ".*Degrees of freedom <= 0.*", RuntimeWarning) - - if only_use_ints: - mean, median, stddev = stats.sigma_clipped_stats( - first_diffs_masked, sigma=normal_rej_thresh, axis=0 - ) - clipped_diffs = stats.sigma_clip(first_diffs_masked, sigma=normal_rej_thresh, axis=0, masked=True) - else: - mean, median, stddev = stats.sigma_clipped_stats( - first_diffs_masked, sigma=normal_rej_thresh, axis=(0, 1) - ) - clipped_diffs = stats.sigma_clip( - first_diffs_masked, sigma=normal_rej_thresh, axis=(0, 1), masked=True - ) - jump_mask = np.logical_and(clipped_diffs.mask, np.logical_not(first_diffs_masked.mask)) - jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == sat_flag)] = False - jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == dnu_flag)] = False - jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == (dnu_flag + sat_flag))] = False - gdq[:, 1:, :, :] = np.bitwise_or(gdq[:, 1:, :, :], jump_mask * np.uint8(dqflags["JUMP_DET"])) - # if grp is all jump set to do not use - for integ in range(nints): - for grp in range(ngrps): - if np.all( - np.bitwise_or( - np.bitwise_and(gdq[integ, grp, :, :], jump_flag), - np.bitwise_and(gdq[integ, grp, :, :], dnu_flag), - ) - ): - jumpy, jumpx = np.where(gdq[integ, grp, :, :] == jump_flag) - gdq[integ, grp, jumpy, jumpx] = 0 - warnings.resetwarnings() else: - for integ in range(nints): - # get data, gdq for this integration - dat = dataa[integ] - gdq_integ = gdq[integ] - - # set 'saturated' or 'do not use' pixels to nan in data - dat[np.where(np.bitwise_and(gdq_integ, sat_flag))] = np.nan - dat[np.where(np.bitwise_and(gdq_integ, dnu_flag))] = np.nan - - # calculate the differences between adjacent groups (first diffs) - # use mask on data, so the results will have sat/donotuse groups masked - first_diffs = np.diff(dat, axis=0) - - # calc. the median of first_diffs for each pixel along the group axis - median_diffs = calc_med_first_diffs(first_diffs) - - # calculate sigma for each pixel - sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) - # reset sigma so pxels with 0 readnoise are not flagged as jumps - sigma[np.where(sigma == 0.0)] = np.nan - - # compute 'ratio' for each group. this is the value that will be - # compared to 'threshold' to classify jumps. subtract the median of - # first_diffs from first_diffs, take the abs. value and divide by sigma. - e_jump = first_diffs - median_diffs[np.newaxis, :, :] - ratio = np.abs(e_jump) / sigma[np.newaxis, :, :] - - # create a 2d array containing the value of the largest 'ratio' for each group + # set 'saturated' or 'do not use' pixels to nan in data + dat[np.where(np.bitwise_and(gdq, sat_flag))] = np.nan + dat[np.where(np.bitwise_and(gdq, dnu_flag))] = np.nan + dat[np.where(np.bitwise_and(gdq, dnu_flag + sat_flag))] = np.nan + + # calculate the differences between adjacent groups (first diffs) + # use mask on data, so the results will have sat/donotuse groups masked + first_diffs = np.diff(dat, axis=1) + + # calc. the median of first_diffs for each pixel along the group axis + first_diffs_masked = np.ma.masked_array(first_diffs, mask=np.isnan(first_diffs)) + median_diffs = np.ma.median(first_diffs_masked, axis=(0, 1)) + # calculate sigma for each pixel + sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) + + # reset sigma so pxels with 0 readnoise are not flagged as jumps + sigma[np.where(sigma == 0.)] = np.nan + + # compute 'ratio' for each group. this is the value that will be + # compared to 'threshold' to classify jumps. subtract the median of + # first_diffs from first_diffs, take the absolute value and divide by sigma. + e_jump_4d = first_diffs - median_diffs[np.newaxis, :, :] + ratio_all = np.abs(first_diffs - median_diffs[np.newaxis, np.newaxis, :, :]) / \ + sigma[np.newaxis, np.newaxis, :, :] + # Test to see if there are enough groups to use sigma clipping + if (only_use_ints and nints >= minimum_sigclip_groups) or \ + (not only_use_ints and total_groups >= minimum_sigclip_groups): + log.info(" Jump Step using sigma clip {} greater than {}, rejection threshold {}".format( + str(total_groups), str(minimum_sigclip_groups), str(normal_rej_thresh))) warnings.filterwarnings("ignore", ".*All-NaN slice encountered.*", RuntimeWarning) - max_ratio = np.nanmax(ratio, axis=0) + warnings.filterwarnings("ignore", ".*Mean of empty slice.*", RuntimeWarning) + warnings.filterwarnings("ignore", ".*Degrees of freedom <= 0.*", RuntimeWarning) + + if only_use_ints: + mean, median, stddev = stats.sigma_clipped_stats(first_diffs_masked, sigma=normal_rej_thresh, + axis=0) + clipped_diffs = stats.sigma_clip(first_diffs_masked, sigma=normal_rej_thresh, + axis=0, masked=True) + else: + mean, median, stddev = stats.sigma_clipped_stats(first_diffs_masked, sigma=normal_rej_thresh, + axis=(0, 1)) + clipped_diffs = stats.sigma_clip(first_diffs_masked, sigma=normal_rej_thresh, + axis=(0, 1), masked=True) + jump_mask = np.logical_and(clipped_diffs.mask, np.logical_not(first_diffs_masked.mask)) + jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == sat_flag)] = False + jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == dnu_flag)] = False + jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == (dnu_flag + sat_flag))] = False + gdq[:, 1:, :, :] = np.bitwise_or(gdq[:, 1:, :, :], jump_mask * + np.uint8(dqflags["JUMP_DET"])) + # if grp is all jump set to do not use + for integ in range(nints): + for grp in range(ngrps): + if np.all(np.bitwise_or(np.bitwise_and(gdq[integ, grp, :, :], jump_flag), + np.bitwise_and(gdq[integ, grp, :, :], dnu_flag))): + jumpy, jumpx = np.where(gdq[integ, grp, :, :] == jump_flag) + gdq[integ, grp, jumpy, jumpx] = 0 warnings.resetwarnings() - # now see if the largest ratio of all groups for each pixel exceeds the threshold. - # there are different threshold for 4+, 3, and 2 usable groups - num_unusable_groups = np.sum(np.isnan(first_diffs), axis=0) - row4cr, col4cr = np.where( - np.logical_and(ndiffs - num_unusable_groups >= 4, max_ratio > normal_rej_thresh) - ) - row3cr, col3cr = np.where( - np.logical_and(ndiffs - num_unusable_groups == 3, max_ratio > three_diff_rej_thresh) - ) - row2cr, col2cr = np.where( - np.logical_and(ndiffs - num_unusable_groups == 2, max_ratio > two_diff_rej_thresh) - ) - - # get the rows, col pairs for all pixels with at least one CR - all_crs_row = np.concatenate((row4cr, row3cr, row2cr)) - all_crs_col = np.concatenate((col4cr, col3cr, col2cr)) - - # iterate over all groups of the pix w/ an initial CR to look for subsequent CRs - # flag and clip the first CR found. recompute median/sigma/ratio - # and repeat the above steps of comparing the max 'ratio' for each pixel - # to the threshold to determine if another CR can be flagged and clipped. - # repeat this process until no more CRs are found. - for j in range(len(all_crs_row)): - # get arrays of abs(diffs), ratio, readnoise for this pixel - pix_first_diffs = first_diffs[:, all_crs_row[j], all_crs_col[j]] - pix_ratio = ratio[:, all_crs_row[j], all_crs_col[j]] - pix_rn2 = read_noise_2[all_crs_row[j], all_crs_col[j]] - - # Create a mask to flag CRs. pix_cr_mask = 0 denotes a CR - pix_cr_mask = np.ones(pix_first_diffs.shape, dtype=bool) - - # set the largest ratio as a CR - pix_cr_mask[np.nanargmax(pix_ratio)] = 0 - new_CR_found = True - - # loop and check for more CRs, setting the mask as you go and - # clipping the group with the CR. stop when no more CRs are found - # or there is only one two diffs left (which means there is - # actually one left, since the next CR will be masked after - # checking that condition) - while new_CR_found and (ndiffs - np.sum(np.isnan(pix_first_diffs)) > 2): - new_CR_found = False - - # set CRs to nans in first diffs to clip them - pix_first_diffs[~pix_cr_mask] = np.nan - - # recalculate median, sigma, and ratio - new_pix_median_diffs = calc_med_first_diffs(pix_first_diffs) - - new_pix_sigma = np.sqrt(np.abs(new_pix_median_diffs) + pix_rn2 / nframes) - new_pix_ratio = np.abs(pix_first_diffs - new_pix_median_diffs) / new_pix_sigma - - # check if largest ratio exceeds threshold appropriate for num remaining groups - - # select appropriate thresh. based on number of remaining groups - rej_thresh = normal_rej_thresh - if ndiffs - np.sum(np.isnan(pix_first_diffs)) == 3: - rej_thresh = three_diff_rej_thresh - if ndiffs - np.sum(np.isnan(pix_first_diffs)) == 2: - rej_thresh = two_diff_rej_thresh - new_pix_max_ratio_idx = np.nanargmax(new_pix_ratio) # index of largest ratio - if new_pix_ratio[new_pix_max_ratio_idx] > rej_thresh: + else: # There are not enough groups for sigma clipping + + # set 'saturated' or 'do not use' pixels to nan in data + dat[np.where(np.bitwise_and(gdq, sat_flag))] = np.nan + dat[np.where(np.bitwise_and(gdq, dnu_flag))] = np.nan + + # calculate the differences between adjacent groups (first diffs) + # use mask on data, so the results will have sat/donotuse groups masked + first_diffs = np.diff(dat, axis=1) + + if total_usable_diffs >= min_diffs_single_pass: + median_diffs = np.nanmedian(first_diffs, axis=(0, 1)) + # calculate sigma for each pixel + sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) + # reset sigma so pixels with 0 read noise are not flagged as jumps + sigma[np.where(sigma == 0.)] = np.nan + + # compute 'ratio' for each group. this is the value that will be + # compared to 'threshold' to classify jumps. subtract the median of + # first_diffs from first_diffs, take the abs. value and divide by sigma. + e_jump = first_diffs - median_diffs[np.newaxis, np.newaxis, :, :] + + ratio = np.abs(e_jump) / sigma[np.newaxis, np.newaxis, :, :] + masked_ratio = np.ma.masked_greater(ratio, normal_rej_thresh) + # The jump mask is the ratio greater than the threshold and the difference is usable + jump_mask = np.logical_and(masked_ratio.mask, np.logical_not(first_diffs_masked.mask)) + gdq[:, 1:, :, :] = np.bitwise_or(gdq[:, 1:, :, :], jump_mask * + np.uint8(dqflags["JUMP_DET"])) + else: # low number of diffs requires iterative flagging + # calculate the differences between adjacent groups (first diffs) + # use mask on data, so the results will have sat/donotuse groups masked + first_diffs = np.abs(np.diff(dat, axis=1)) + + # calc. the median of first_diffs for each pixel along the group axis + median_diffs = calc_med_first_diffs(first_diffs) + + # calculate sigma for each pixel + sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) + # reset sigma so pxels with 0 readnoise are not flagged as jumps + sigma[np.where(sigma == 0.0)] = np.nan + + # compute 'ratio' for each group. this is the value that will be + # compared to 'threshold' to classify jumps. subtract the median of + # first_diffs from first_diffs, take the abs. value and divide by sigma. + e_jump = first_diffs - median_diffs[np.newaxis, :, :] + ratio = np.abs(e_jump) / sigma[np.newaxis, :, :] + + # create a 2d array containing the value of the largest 'ratio' for each pixel + warnings.filterwarnings("ignore", ".*All-NaN slice encountered.*", RuntimeWarning) + max_ratio = np.nanmax(ratio, axis=1) + warnings.resetwarnings() + # now see if the largest ratio of all groups for each pixel exceeds the threshold. + # there are different threshold for 4+, 3, and 2 usable groups + num_unusable_groups = np.sum(np.isnan(first_diffs), axis=(0, 1)) + int4cr, row4cr, col4cr = np.where( + np.logical_and(ndiffs - num_unusable_groups >= 4, max_ratio > normal_rej_thresh) + ) + int3cr, row3cr, col3cr = np.where( + np.logical_and(ndiffs - num_unusable_groups == 3, max_ratio > three_diff_rej_thresh) + ) + int2cr, row2cr, col2cr = np.where( + np.logical_and(ndiffs - num_unusable_groups == 2, max_ratio > two_diff_rej_thresh) + ) + # get the rows, col pairs for all pixels with at least one CR +# all_crs_int = np.concatenate((int4cr, int3cr, int2cr)) + all_crs_row = np.concatenate((row4cr, row3cr, row2cr)) + all_crs_col = np.concatenate((col4cr, col3cr, col2cr)) + + # iterate over all groups of the pix w/ an initial CR to look for subsequent CRs + # flag and clip the first CR found. recompute median/sigma/ratio + # and repeat the above steps of comparing the max 'ratio' for each pixel + # to the threshold to determine if another CR can be flagged and clipped. + # repeat this process until no more CRs are found. + for j in range(len(all_crs_row)): + # get arrays of abs(diffs), ratio, readnoise for this pixel + pix_first_diffs = first_diffs[:, :, all_crs_row[j], all_crs_col[j]] + pix_ratio = ratio[:, :, all_crs_row[j], all_crs_col[j]] + pix_rn2 = read_noise_2[all_crs_row[j], all_crs_col[j]] + + # Create a mask to flag CRs. pix_cr_mask = 0 denotes a CR + pix_cr_mask = np.ones(pix_first_diffs.shape, dtype=bool) + + # set the largest ratio as a CR + location = np.unravel_index(np.nanargmax(pix_ratio), pix_ratio.shape) + pix_cr_mask[location] = 0 new_CR_found = True - pix_cr_mask[new_pix_max_ratio_idx] = 0 - unusable_diffs = np.sum(np.isnan(pix_first_diffs)) - # Found all CRs for this pix - set flags in input DQ array - gdq[integ, 1:, all_crs_row[j], all_crs_col[j]] = np.bitwise_or( - gdq[integ, 1:, all_crs_row[j], all_crs_col[j]], - dqflags["JUMP_DET"] * np.invert(pix_cr_mask), - ) + # loop and check for more CRs, setting the mask as you go and + # clipping the group with the CR. stop when no more CRs are found + # or there is only one two diffs left (which means there is + # actually one left, since the next CR will be masked after + # checking that condition) + while new_CR_found and (ndiffs - np.sum(np.isnan(pix_first_diffs)) > 2): + new_CR_found = False + + # set CRs to nans in first diffs to clip them + pix_first_diffs[~pix_cr_mask] = np.nan + + # recalculate median, sigma, and ratio + new_pix_median_diffs = calc_med_first_diffs(pix_first_diffs) + + new_pix_sigma = np.sqrt(np.abs(new_pix_median_diffs) + pix_rn2 / nframes) + new_pix_ratio = np.abs(pix_first_diffs - new_pix_median_diffs) / new_pix_sigma + + # check if largest ratio exceeds threshold appropriate for num remaining groups + + # select appropriate thresh. based on number of remaining groups + rej_thresh = normal_rej_thresh + if ndiffs - np.sum(np.isnan(pix_first_diffs)) == 3: + rej_thresh = three_diff_rej_thresh + if ndiffs - np.sum(np.isnan(pix_first_diffs)) == 2: + rej_thresh = two_diff_rej_thresh + max_idx = np.nanargmax(new_pix_ratio) + location = np.unravel_index(max_idx, new_pix_ratio.shape) + if new_pix_ratio[location] > rej_thresh: + new_CR_found = True + pix_cr_mask[location] = 0 + unusable_diffs = np.sum(np.isnan(pix_first_diffs)) + # Found all CRs for this pix - set flags in input DQ array + gdq[:, 1:, all_crs_row[j], all_crs_col[j]] = np.bitwise_or( + gdq[:, 1:, all_crs_row[j], all_crs_col[j]], + dqflags["JUMP_DET"] * np.invert(pix_cr_mask), + ) cr_integ, cr_group, cr_row, cr_col = np.where(np.bitwise_and(gdq, jump_flag)) num_primary_crs = len(cr_group) if flag_4_neighbors: # iterate over each 'jump' pixel @@ -402,7 +422,7 @@ def find_crs( flag_e_threshold = [after_jump_flag_e1, after_jump_flag_e2] flag_groups = [after_jump_flag_n1, after_jump_flag_n2] for cthres, cgroup in zip(flag_e_threshold, flag_groups): - if cgroup > 0 and cthres > 0: + if cgroup > 0: cr_intg, cr_group, cr_row, cr_col = np.where(np.bitwise_and(gdq, jump_flag)) for j in range(len(cr_group)): intg = cr_intg[j] @@ -410,8 +430,12 @@ def find_crs( row = cr_row[j] col = cr_col[j] if e_jump_4d[intg, group - 1, row, col] >= cthres: - for kk in range(group + 1, min(group + cgroup + 1, ngroups)): - gdq[intg, kk, row, col] = np.bitwise_or(gdq[intg, kk, row, col], jump_flag) + for kk in range(group, min(group + cgroup + 1, ngroups)): + if (gdq[intg, kk, row, col] & sat_flag) == 0 and ( + gdq[intg, kk, row, col] & dnu_flag + ) == 0: + gdq[intg, kk, row, col] = np.bitwise_or(gdq[intg, kk, row, col], jump_flag) + if "stddev" in locals(): return gdq, row_below_gdq, row_above_gdq, num_primary_crs, stddev @@ -423,7 +447,7 @@ def find_crs( return gdq, row_below_gdq, row_above_gdq, num_primary_crs, dummy -def calc_med_first_diffs(first_diffs): +def calc_med_first_diffs(in_first_diffs): """Calculate the median of `first diffs` along the group axis. If there are 4+ usable groups (e.g not flagged as saturated, donotuse, @@ -433,10 +457,9 @@ def calc_med_first_diffs(first_diffs): those three groups will be returned without any clipping. Finally, if there are two usable groups, the group with the smallest absolute difference will be returned. - Parameters ---------- - first_diffs : array, float + in_first_diffs : array, float array containing the first differences of adjacent groups for a single integration. Can be 3d or 1d (for a single pix) @@ -448,6 +471,7 @@ def calc_med_first_diffs(first_diffs): array of several pixels, a 2d array with the median for each pixel will be returned. """ + first_diffs = in_first_diffs.copy() if first_diffs.ndim == 1: # in the case where input is a single pixel num_usable_groups = len(first_diffs) - np.sum(np.isnan(first_diffs), axis=0) if num_usable_groups >= 4: # if 4+, clip largest and return median @@ -463,39 +487,56 @@ def calc_med_first_diffs(first_diffs): return np.nan - # if input is multi-dimensional - - ngroups, nrows, ncols = first_diffs.shape - num_usable_groups = ngroups - np.sum(np.isnan(first_diffs), axis=0) - median_diffs = np.zeros((nrows, ncols)) # empty array to store median for each pix - - # process groups with >=4 usable groups - row4, col4 = np.where(num_usable_groups >= 4) # locations of >= 4 usable group pixels - if len(row4) > 0: - four_slice = first_diffs[:, row4, col4] - four_slice[ - np.nanargmax(np.abs(four_slice), axis=0), np.arange(four_slice.shape[1]) - ] = np.nan # mask largest group in slice - median_diffs[row4, col4] = np.nanmedian(four_slice, axis=0) # add median to return arr for these pix - - # process groups with 3 usable groups - row3, col3 = np.where(num_usable_groups == 3) # locations of >= 4 usable group pixels - if len(row3) > 0: - three_slice = first_diffs[:, row3, col3] - median_diffs[row3, col3] = np.nanmedian(three_slice, axis=0) # add median to return arr for these pix - - # process groups with 2 usable groups - row2, col2 = np.where(num_usable_groups == 2) # locations of >= 4 usable group pixels - if len(row2) > 0: - two_slice = first_diffs[:, row2, col2] - two_slice[ - np.nanargmax(np.abs(two_slice), axis=0), np.arange(two_slice.shape[1]) - ] = np.nan # mask larger abs. val - median_diffs[row2, col2] = np.nanmin(two_slice, axis=0) # add med. to return arr - - # set the medians all groups with less than 2 usable groups to nan to skip further - # calculations for these pixels - row_none, col_none = np.where(num_usable_groups < 2) - median_diffs[row_none, col_none] = np.nan - - return median_diffs + if first_diffs.ndim == 2: # in the case where input is a single pixel + nansum = np.sum(np.isnan(first_diffs), axis=(0, 1)) + num_usable_diffs = first_diffs.size - np.sum(np.isnan(first_diffs), axis=(0, 1)) + if num_usable_diffs >= 4: # if 4+, clip largest and return median + mask = np.ones_like(first_diffs).astype(bool) + location = np.unravel_index(first_diffs.argmax(), first_diffs.shape) + mask[location] = False # clip the diff with the largest abs value + return np.nanmedian(first_diffs[mask]) + elif num_usable_diffs == 3: # if 3, no clipping just return median + return np.nanmedian(first_diffs) + elif num_usable_diffs == 2: # if 2, return diff with minimum abs + TEST = np.nanargmin(np.abs(first_diffs)) + diff_min_idx = np.nanargmin(first_diffs) + location = np.unravel_index(diff_min_idx, first_diffs.shape) + return first_diffs[location] + else: + return np.nan + + if first_diffs.ndim == 4: + # if input is multi-dimensional + nints, ndiffs, nrows, ncols = first_diffs.shape + shaped_diffs = np.reshape(first_diffs, ((nints * ndiffs), nrows, ncols)) + num_usable_diffs = (ndiffs * nints) - np.sum(np.isnan(shaped_diffs), axis=0) + median_diffs = np.zeros((nrows, ncols)) # empty array to store median for each pix + + # process groups with >=4 usable diffs + row4, col4 = np.where(num_usable_diffs >= 4) # locations of >= 4 usable diffs pixels + if len(row4) > 0: + four_slice = shaped_diffs[:, row4, col4] + loc0 = np.nanargmax(four_slice, axis=0) + shaped_diffs[loc0, row4, col4] = np.nan + median_diffs[row4, col4] = np.nanmedian(shaped_diffs[:, row4, col4], axis=0) + + # process groups with 3 usable groups + row3, col3 = np.where(num_usable_diffs == 3) # locations of == 3 usable diff pixels + if len(row3) > 0: + three_slice = shaped_diffs[:, row3, col3] + median_diffs[row3, col3] = np.nanmedian(three_slice, axis=0) # add median to return arr for these pix + + # process groups with 2 usable groups + row2, col2 = np.where(num_usable_diffs == 2) # locations of == 2 usable diff pixels + if len(row2) > 0: + two_slice = shaped_diffs[ :, row2, col2] + two_slice[np.nanargmax(np.abs(two_slice), axis=0), + np.arange(two_slice.shape[1])] = np.nan # mask larger abs. val + median_diffs[row2, col2] = np.nanmin(two_slice, axis=0) # add med. to return arr + + # set the medians all groups with less than 2 usable diffs to nan to skip further + # calculations for these pixels + row_none, col_none = np.where(num_usable_diffs < 2) + median_diffs[row_none, col_none] = np.nan + + return median_diffs diff --git a/tests/test_jump.py b/tests/test_jump.py index 34575b5f..c6378713 100644 --- a/tests/test_jump.py +++ b/tests/test_jump.py @@ -31,6 +31,54 @@ def _cube(ngroups, readnoise=10): return _cube +def test_nirspec_saturated_pix(): + """ + This test is based on an actual NIRSpec exposure that has some pixels + flagged as saturated in one or more groups, which the jump step is + supposed to ignore, but an old version of the code was setting JUMP flags + for some of the saturated groups. This is to verify that the saturated + groups are no longer flagged with jumps. + """ + ingain = 1.0 + inreadnoise = 10.7 + ngroups = 7 + nrows = 2 + ncols = 2 + nints = 1 + nframes = 1 + data = np.zeros(shape=(nints, ngroups, nrows, ncols), dtype=np.float32) + read_noise = np.full((nrows, ncols), inreadnoise, dtype=np.float32) + gdq = np.zeros(shape=(nints, ngroups, nrows, ncols), dtype=np.uint32) + err = np.zeros(shape=(nrows, ncols), dtype=np.float32) + pdq = np.zeros(shape=(nrows, ncols), dtype=np.uint32) + gain = np.ones_like(read_noise) * ingain + + # Setup the needed input pixel and DQ values + data[0, :, 1, 1] = [639854.75, 4872.451, -17861.791, 14022.15, 22320.176, + 1116.3828, 1936.9746] + gdq[0, :, 1, 1] = [0, 0, 0, 0, 0, 2, 2] + data[0, :, 0, 1] = [8.25666812e+05, -1.10471914e+05, 1.95755371e+02, 1.83118457e+03, + 1.72250879e+03, 1.81733496e+03, 1.65188281e+03] + # 2 non-sat groups means only 1 non-sat diff, so no jumps should be flagged + gdq[0, :, 0, 1] = [0, 0, 2, 2, 2, 2, 2] + data[0, :, 1, 0] = [1228767., 46392.234, -3245.6553, 7762.413, + 37190.76, 266611.62, 5072.4434] + gdq[0, :, 1, 0] = [0, 0, 0, 0, 0, 0, 2] + + # run jump detection + gdq, pdq, total_primary_crs, number_extended_events, stddev = detect_jumps(nframes, data, gdq, pdq, err, + gain, read_noise, rejection_thresh=4.0, + three_grp_thresh=5, + four_grp_thresh=6, + max_cores='none', max_jump_to_flag_neighbors=200, + min_jump_to_flag_neighbors=10, flag_4_neighbors=True, dqflags=DQFLAGS) + + # Check the results. There should not be any pixels with DQ values of 6, which + # is saturated (2) plus jump (4). All the DQ's should be either just 2 or just 4. + np.testing.assert_array_equal(gdq[0, :, 1, 1], [0, 4, 0, 4, 4, 2, 2]) + # assert that no groups are flagged when there's only 1 non-sat. grp + np.testing.assert_array_equal(gdq[0, :, 0, 1], [0, 0, 2, 2, 2, 2, 2]) + np.testing.assert_array_equal(gdq[0, :, 1, 0], [0, 4, 4, 0, 4, 4, 2]) def test_multiprocessing(): nints = 1 @@ -124,7 +172,6 @@ def test_multiprocessing_big(): assert gdq[0, 4, 204, 6] == DQFLAGS['DO_NOT_USE'] #This value would have been 5 without the fix. - def test_find_simple_ellipse(): plane = np.zeros(shape=(5, 5), dtype=np.uint8) plane[2, 2] = DQFLAGS["JUMP_DET"] @@ -411,6 +458,40 @@ def test_find_faint_extended_sigclip(): # Check that the flags are not applied in the 3rd group after the event assert np.all(gdq[0, 4, 12:22, 14:23]) == 0 +# No shower is found because the event is identical in all ints +def test_find_faint_extended_sigclip(): + nint, ngrps, ncols, nrows = 101, 6, 30, 30 + data = np.zeros(shape=(nint, ngrps, nrows, ncols), dtype=np.float32) + gdq = np.zeros_like(data, dtype=np.uint8) + gain = 4 + readnoise = np.ones(shape=(nrows, ncols), dtype=np.float32) * 6.0 * gain + rng = np.random.default_rng(12345) + data[0, 1:, 14:20, 15:20] = 6 * gain * 1.7 + data = data + rng.normal(size=(nint, ngrps, nrows, ncols)) * readnoise + gdq, num_showers = find_faint_extended(data, gdq, readnoise, 1, 100, + snr_threshold=1.3, + min_shower_area=20, inner=1, + outer=2, sat_flag=2, jump_flag=4, + ellipse_expand=1.1, num_grps_masked=3) + # Check that all the expected samples in group 2 are flagged as jump and + # that they are not flagged outside + assert (np.all(gdq[0, 1, 22, 14:23] == 0)) + assert (np.all(gdq[0, 1, 21, 16:20] == 0)) + assert (np.all(gdq[0, 1, 20, 15:22] == 0)) + assert (np.all(gdq[0, 1, 19, 15:23] == 0)) + assert (np.all(gdq[0, 1, 18, 14:23] == 0)) + assert (np.all(gdq[0, 1, 17, 14:23] == 0)) + assert (np.all(gdq[0, 1, 16, 14:23] == 0)) + assert (np.all(gdq[0, 1, 15, 14:22] == 0)) + assert (np.all(gdq[0, 1, 14, 16:22] == 0)) + assert (np.all(gdq[0, 1, 13, 17:21] == 0)) + assert (np.all(gdq[0, 1, 12, 14:23] == 0)) + assert (np.all(gdq[0, 1, 12:23, 24] == 0)) + assert (np.all(gdq[0, 1, 12:23, 13] == 0)) + + # Check that the flags are not applied in the 3rd group after the event + assert (np.all(gdq[0, 4, 12:22, 14:23]) == 0) + def test_inside_ellipse5(): ellipse = ((0, 0), (1, 2), -10) @@ -451,7 +532,7 @@ def test_flag_persist_groups(): sat_expand=1.1, mask_persist_grps_next_int=True, persist_grps_flagged=0) -# fits.writeto("persitflaggedgdq.fits", gdq, overwrite=True) + def test_calc_num_slices(): n_rows = 20 max_available_cores = 10 diff --git a/tests/test_ramp_fitting.py b/tests/test_ramp_fitting.py index aab91a12..a6edbe33 100644 --- a/tests/test_ramp_fitting.py +++ b/tests/test_ramp_fitting.py @@ -1,5 +1,4 @@ import numpy as np - from stcal.ramp_fitting.ramp_fit import ramp_fit_data from stcal.ramp_fitting.ramp_fit_class import RampData from stcal.ramp_fitting.utils import compute_num_slices diff --git a/tests/test_twopoint_difference.py b/tests/test_twopoint_difference.py index f066b4b1..ce6d7b14 100644 --- a/tests/test_twopoint_difference.py +++ b/tests/test_twopoint_difference.py @@ -1,4 +1,5 @@ import numpy as np +from astropy.io import fits import pytest from stcal.jump.twopoint_difference import calc_med_first_diffs, find_crs @@ -8,10 +9,8 @@ @pytest.fixture() def setup_cube(): - def _cube(ngroups, readnoise=10): - nints = 1 - nrows = 204 - ncols = 204 + + def _cube(ngroups, nints=1, nrows=204, ncols=204, readnoise=10): rej_threshold = 3 nframes = 1 data = np.zeros(shape=(nints, ngroups, nrows, ncols), dtype=np.float32) @@ -23,6 +22,40 @@ def _cube(ngroups, readnoise=10): return _cube +def test_varying_groups(setup_cube): + ngroups = 5 + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nints=1, nrows=2, ncols=2, readnoise=8) + data[0, :, 0, 0] = [10, 20, 30, 530, 540] + data[0, :, 0, 1] = [10, 20, 30, 530, np.nan] + data[0, :, 1, 0] = [10, 20, 530, np.nan, np.nan] + data[0, :, 1, 1] = [10, 520, np.nan, np.nan, np.nan] + out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( + data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS + ) + assert np.array_equal(out_gdq[0, :, 0, 0], [0, 0, 0, 4, 0]) + assert np.array_equal(out_gdq[0, :, 0, 1], [0, 0, 0, 4, 0]) + assert np.array_equal(out_gdq[0, :, 1, 0], [0, 0, 4, 0, 0]) + assert np.array_equal(out_gdq[0, :, 1, 1], [0, 0, 0, 0, 0]) + + +def test_multint_pixel(setup_cube): + ngroups=4 + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nints=7, nrows=2, ncols=2, readnoise=8) + data[0, :, 0, 0] = (-24, -15, 0, 13) + data[1, :, 0, 0] = (-24, -11, 6, 21) + data[2, :, 0, 0] = (-40, -28, -24, -4) + data[3, :, 0, 0] = (-11, 3, 11, 24) + data[4, :, 0, 0] = (-43 , -24, -12, 1) + data[5, :, 0, 0] = (-45, 8537, 17380, 17437) + data[6, :, 0, 0] = (-178, -156, -139, -125) + out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( + data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS + ) + assert np.max(out_gdq) == 4 # a CR was found + assert (np.array_equal([0, 4, 4, 4], out_gdq[5, :, 0, 0])) + + + def test_nocrs_noflux(setup_cube): ngroups = 5 data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups) @@ -44,20 +77,25 @@ def test_5grps_cr3_noflux(setup_cube): ) assert np.max(out_gdq) == 4 # a CR was found assert np.argmax(out_gdq[0, :, 100, 100]) == 2 # find the CR in the expected group - - -def test_5grps_cr2_noflux(setup_cube): - ngroups = 5 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups) - data[0, 0, 100, 100] = 10.0 data[0, 1:6, 100, 100] = 1000 + +def test_4grps_2ints_cr2_noflux(setup_cube): + ngroups = 5 + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nints=2, ncols=2, nrows=2) + data[0, 1, 1, 1] = 5 + data[1, 0, 1, 1] = 10.0 + data[1, 1:6, 1, 1] = 1000 + out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs(data, gdq, read_noise, rej_threshold, + rej_threshold, rej_threshold, nframes, + False, 200, 10, DQFLAGS) + assert(4 == np.max(out_gdq)) # a CR was found out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS ) assert np.max(out_gdq) == 4 # a CR was found - assert np.argmax(out_gdq[0, :, 100, 100]) == 1 # find the CR in the expected group - + assert np.argmax(out_gdq[1, :, 1, 1]) == 1 # find the CR in the expected group + assert(1 == np.argmax(out_gdq[1, :, 1, 1])) # find the CR in the expected group def test_6grps_negative_differences_zeromedian(setup_cube): ngroups = 6 @@ -93,13 +131,31 @@ def test_3grps_cr2_noflux(setup_cube): data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups) data[0, 0, 100, 100] = 10.0 data[0, 1:4, 100, 100] = 1000 + data[0, 0, 99, 99] = 10.0 + data[0, 2:4, 99, 99] = 1000 out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS ) assert np.max(out_gdq) == 4 # a CR was found # assert(1,np.argmax(out_gdq[0, :, 100, 100])) # find the CR in the expected group - assert np.array_equal([0, 4, 0], out_gdq[0, :, 100, 100]) + assert(np.array_equal([0, 4, 0], out_gdq[0, :, 100, 100])) + assert (np.array_equal([0, 0, 4], out_gdq[0, :, 99, 99])) +def test_2ints_2grps_noflux(setup_cube): + ngroups = 2 + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nints=2, ncols=2, nrows=2) + data[0, 0, 1, 1] = 10.0 + data[0, 1:3, 1, 1] = 1000 + data[1, 0, 0, 0] = 10.0 + data[1, 1:3, 0, 0] = 1000 + out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs(data, gdq, read_noise, rej_threshold, + rej_threshold, rej_threshold, nframes, + False, 200, 10, DQFLAGS, + minimum_groups=2) + # assert(4 == np.max(out_gdq)) # a CR was found + # assert(1,np.argmax(out_gdq[0, :, 100, 100])) # find the CR in the expected group + assert(np.array_equal([0, 4], out_gdq[0, :, 1, 1])) + assert (np.array_equal([0, 4], out_gdq[1, :, 0, 0])) def test_4grps_cr2_noflux(setup_cube): ngroups = 4 @@ -113,20 +169,24 @@ def test_4grps_cr2_noflux(setup_cube): assert np.argmax(out_gdq[0, :, 100, 100]) == 1 # find the CR in the expected group -def test_5grps_cr2_nframe2(setup_cube): - ngroups = 5 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups) +def test_6grps_cr2_nframe2(setup_cube): + ngroups = 6 + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nrows=2, ncols=2) nframes = 2 - data[0, 0, 100, 100] = 10.0 - data[0, 1, 100, 100] = 500 - data[0, 2, 100, 100] = 1002 - data[0, 3, 100, 100] = 1001 - data[0, 4, 100, 100] = 1005 + data[0, 0, 1, 1] = 10.0 + data[0, 1, 1, 1] = 500 + data[0, 2, 1, 1] = 1002 + data[0, 3, 1, 1] = 1001 + data[0, 4, 1, 1] = 1005 + data[0, 5, 1, 1] = 1015 out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS ) assert np.max(out_gdq) == 4 # a CR was found - assert np.array_equal([0, 4, 4, 0, 0], out_gdq[0, :, 100, 100]) + assert (np.array_equal([0, 4, 4, 0, 0, 0], out_gdq[0, :, 1, 1])) + assert (np.max(out_gdq[0, :, 0, 0]) == 0) + assert (np.max(out_gdq[0, :, 1, 0]) == 0) + assert (np.max(out_gdq[0, :, 0, 1]) == 0) def test_4grps_twocrs_2nd_4th(setup_cube): @@ -164,9 +224,9 @@ def test_5grps_twocrs_2nd_5thbig(setup_cube): data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups) nframes = 1 data[0, 0, 100, 100] = 10.0 - data[0, 1, 100, 100] = 60 - data[0, 2, 100, 100] = 60 - data[0, 3, 100, 100] = 60 + data[0, 1, 100, 100] = 600 + data[0, 2, 100, 100] = 600 + data[0, 3, 100, 100] = 600 data[0, 4, 100, 100] = 2115 out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS @@ -295,16 +355,16 @@ def test_5grps_cr2_negslope(setup_cube): ngroups = 5 data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups) nframes = 1 - data[0, 0, 100, 100] = 100.0 - data[0, 1, 100, 100] = 0 - data[0, 2, 100, 100] = -200 - data[0, 3, 100, 100] = -260 - data[0, 4, 100, 100] = -360 + data[0, 0, 1, 1] = 100.0 + data[0, 1, 1, 1] = 0 + data[0, 2, 1, 1] = -200 + data[0, 3, 1, 1] = -260 + data[0, 4, 1, 1] = -360 out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS ) assert np.max(out_gdq) == 4 # a CR was found - assert np.array_equal([0, 0, 4, 0, 0], out_gdq[0, :, 100, 100]) + assert np.array_equal([0, 0, 4, 0, 0], out_gdq[0, :, 1, 1]) def test_6grps_1cr(setup_cube): @@ -360,22 +420,22 @@ def test_8grps_1cr(setup_cube): def test_9grps_1cr_1sat(setup_cube): ngroups = 9 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=10) + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=10, nrows=2, ncols=2) nframes = 1 - data[0, 0, 100, 100] = 0 - data[0, 1, 100, 100] = 10 - data[0, 2, 100, 100] = 21 - data[0, 3, 100, 100] = 33 - data[0, 4, 100, 100] = 46 - data[0, 5, 100, 100] = 60 - data[0, 6, 100, 100] = 1160 - data[0, 7, 100, 100] = 1175 - data[0, 8, 100, 100] = 6175 - gdq[0, 8, 100, 100] = DQFLAGS["SATURATED"] + data[0, 0, 1, 1] = 0 + data[0, 1, 1, 1] = 10 + data[0, 2, 1, 1] = 21 + data[0, 3, 1, 1] = 33 + data[0, 4, 1, 1] = 46 + data[0, 5, 1, 1] = 60 + data[0, 6, 1, 1] = 1160 + data[0, 7, 1, 1] = 1175 + data[0, 8, 1, 1] = 6175 + gdq[0, 8, 1, 1] = DQFLAGS["SATURATED"] out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS ) - assert out_gdq[0, 6, 100, 100] == 4 + assert out_gdq[0, 6, 1, 1] == 4 def test_10grps_1cr_2sat(setup_cube): @@ -402,26 +462,26 @@ def test_10grps_1cr_2sat(setup_cube): def test_11grps_1cr_3sat(setup_cube): ngroups = 11 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=10) + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=10, nrows=2, ncols=2) nframes = 1 - data[0, 0, 100, 100] = 0 - data[0, 1, 100, 100] = 20 - data[0, 2, 100, 100] = 39 - data[0, 3, 100, 100] = 57 - data[0, 4, 100, 100] = 74 - data[0, 5, 100, 100] = 90 - data[0, 6, 100, 100] = 1160 - data[0, 7, 100, 100] = 1175 - data[0, 8, 100, 100] = 6175 - data[0, 9, 100, 100] = 6175 - data[0, 10, 100, 100] = 6175 - gdq[0, 8, 100, 100] = DQFLAGS["SATURATED"] - gdq[0, 9, 100, 100] = DQFLAGS["SATURATED"] - gdq[0, 10, 100, 100] = DQFLAGS["SATURATED"] + data[0, 0, 1, 1] = 0 + data[0, 1, 1, 1] = 20 + data[0, 2, 1, 1] = 39 + data[0, 3, 1, 1] = 57 + data[0, 4, 1, 1] = 74 + data[0, 5, 1, 1] = 90 + data[0, 6, 1, 1] = 1160 + data[0, 7, 1, 1] = 1175 + data[0, 8, 1, 1] = 6175 + data[0, 9, 1, 1] = 6175 + data[0, 10, 1, 1] = 6175 + gdq[0, 8, 1, 1] = DQFLAGS["SATURATED"] + gdq[0, 9, 1, 1] = DQFLAGS["SATURATED"] + gdq[0, 10, 1, 1] = DQFLAGS["SATURATED"] out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS ) - assert out_gdq[0, 6, 100, 100] == 4 + assert out_gdq[0, 6, 1, 1] == 4 def test_11grps_0cr_3donotuse(setup_cube): @@ -564,23 +624,34 @@ def test_10grps_nocr_2pixels_sigma0(setup_cube): def test_5grps_satat4_crat3(setup_cube): ngroups = 5 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=5 * np.sqrt(2)) + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nrows=2, ncols=2, readnoise=5 * np.sqrt(2)) nframes = 1 - data[0, 0, 100, 100] = 10000 - data[0, 1, 100, 100] = 30000 - data[0, 2, 100, 100] = 60000 - data[0, 3, 100, 100] = 61000 - data[0, 4, 100, 100] = 61000 - gdq[0, 3, 100, 100] = DQFLAGS["SATURATED"] - gdq[0, 4, 100, 100] = DQFLAGS["SATURATED"] - out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( - data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS - ) + data[0, 0, 1, 1] = 10000 + data[0, 1, 1, 1] = 20000 + data[0, 2, 1, 1] = 60000 + data[0, 3, 1, 1] = 61000 + data[0, 4, 1, 1] = 61000 + gdq[0, 3, 1, 1] = DQFLAGS['SATURATED'] + gdq[0, 4, 1, 1] = DQFLAGS['SATURATED'] + + data[0, 0, 0, 1] = 59800 + data[0, 1, 0, 1] = 59900 + data[0, 2, 0, 1] = 60000 + data[0, 3, 0, 1] = 61000 + data[0, 4, 0, 1] = 61000 + gdq[0, 3, 0, 1] = DQFLAGS['SATURATED'] + gdq[0, 4, 0, 1] = DQFLAGS['SATURATED'] + out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs(data, gdq, read_noise, rej_threshold, + rej_threshold, rej_threshold, nframes, + False, 200, 10, DQFLAGS) # assert(4 == np.max(out_gdq)) # no CR was found + result = out_gdq[0, :, 1, 1] assert np.array_equal( - [0, 0, DQFLAGS["JUMP_DET"], DQFLAGS["SATURATED"], DQFLAGS["SATURATED"]], out_gdq[0, :, 100, 100] - ) - + [0, 0, DQFLAGS['JUMP_DET'], DQFLAGS['SATURATED'], DQFLAGS['SATURATED']], + out_gdq[0, :, 1, 1]) + assert np.array_equal( + [0, 0, 0, DQFLAGS['SATURATED'], DQFLAGS['SATURATED']], + out_gdq[0, :, 0, 1]) def test_6grps_satat6_crat1(setup_cube): ngroups = 6 @@ -672,43 +743,23 @@ def test_10grps_satat8_crsat3and6(setup_cube): def test_median_with_saturation(setup_cube): ngroups = 10 # crmag = 1000 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=5 * np.sqrt(2)) - nframes = 1 - data[0, 0, 100, 100] = 0 - data[0, 1, 100, 100] = 4500 - data[0, 2, 100, 100] = 9100 - data[0, 3, 100, 100] = 13800 - data[0, 4, 100, 100] = 18600 - data[0, 5, 100, 100] = 40000 # CR - data[0, 6, 100, 100] = 44850 - data[0, 7, 100, 100] = 49900 - data[0, 8:10, 100, 100] = 60000 - gdq[0, 7:10, 100, 100] = DQFLAGS["SATURATED"] - out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( - data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS - ) - assert np.array_equal([0, 0, 0, 0, 0, 4, 0, 2, 2, 2], out_gdq[0, :, 100, 100]) - - -def test_median_with_saturation_even_num_sat_frames(setup_cube): - ngroups = 10 - # crmag = 1000 - data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, readnoise=5 * np.sqrt(2)) + data, gdq, nframes, read_noise, rej_threshold = setup_cube(ngroups, nrows=2, ncols=2, readnoise=5 * np.sqrt(2)) nframes = 1 - data[0, 0, 100, 100] = 0 - data[0, 1, 100, 100] = 4500 - data[0, 2, 100, 100] = 9100 - data[0, 3, 100, 100] = 13800 - data[0, 4, 100, 100] = 18600 - data[0, 5, 100, 100] = 40000 # CR - data[0, 6, 100, 100] = 44850 - data[0, 7, 100, 100] = 49900 - data[0, 8:10, 100, 100] = 60000 - gdq[0, 6:10, 100, 100] = DQFLAGS["SATURATED"] - out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( - data, gdq, read_noise, rej_threshold, rej_threshold, rej_threshold, nframes, False, 200, 10, DQFLAGS - ) - assert np.array_equal([0, 0, 0, 0, 0, 4, 2, 2, 2, 2], out_gdq[0, :, 100, 100]) + data[0, 0, 1, 1] = 0 + data[0, 1, 1, 1] = 4700 + data[0, 2, 1, 1] = 9400 + data[0, 3, 1, 1] = 14300 + data[0, 4, 1, 1] = 19100 + data[0, 5, 1, 1] = 40000 # CR + data[0, 6, 1, 1] = 44850 + data[0, 7, 1, 1] = 49900 + data[0, 8:10, 1, 1] = 60000 + gdq[0, 7:10, 1, 1] = DQFLAGS['SATURATED'] + out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs(data, gdq, read_noise, rej_threshold, + rej_threshold, rej_threshold, nframes, + False, 200, 10, DQFLAGS) + gdq_value = out_gdq[0, :, 1, 1] + assert (np.array_equal([0, 0, 0, 0, 0, 4, 0, 2, 2, 2], out_gdq[0, :, 1, 1])) def test_median_with_saturation_odd_number_final_difference(setup_cube): @@ -855,7 +906,7 @@ def test_10grps_1cr_afterjump(setup_cube): data[0, 8, 100, 100] = 1190 data[0, 9, 100, 100] = 1209 - after_jump_flag_e1 = 1.0 + after_jump_flag_e1 = 0.0 out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, @@ -891,7 +942,7 @@ def test_10grps_1cr_afterjump_2group(setup_cube): data[0, 8, 100, 100] = 1190 data[0, 9, 100, 100] = 1209 - after_jump_flag_e1 = 1.0 + after_jump_flag_e1 = 0.0 out_gdq, row_below_gdq, rows_above_gdq, total_crs, stddev = find_crs( data, gdq, @@ -1009,15 +1060,17 @@ def test_median_func(): arr = np.array([1.0, 2.0, 3.0, 4.0, 5]) assert calc_med_first_diffs(arr) == 2.5 # 3d array, no nans - arr = np.zeros(5 * 2 * 2).reshape(5, 2, 2) - arr[:, 0, 0] = np.array([1.0, 2.0, 3.0, 4.0, 5]) - assert calc_med_first_diffs(arr)[0, 0] == 2.5 + arr = np.zeros(5 * 2 * 2).reshape(1, 5, 2, 2) + arr[0, :, 0, 0] = np.array([1.0, 2.0, 3.0, 5.0, 4]) + arr[0, 1, 0, 1] = 1.0 + result = calc_med_first_diffs(arr) + assert result[0, 0] == 2.5 # 1d, with nans arr = np.array([1.0, 2.0, 3.0, np.nan, 4.0, 5, np.nan]) assert calc_med_first_diffs(arr) == 2.5 # 3d, with nans - arr = np.zeros(7 * 2 * 2).reshape(7, 2, 2) - arr[:, 0, 0] = np.array([1.0, 2.0, 3.0, np.nan, 4.0, 5, np.nan]) + arr = np.zeros(7 * 2 * 2).reshape(1, 7, 2, 2) + arr[0, :, 0, 0] = np.array([1.0, 2.0, 3.0, np.nan, 4.0, 5, np.nan]) assert calc_med_first_diffs(arr)[0, 0] == 2.5 # single pix with exactly 4 good diffs, should also clip 1 pix and return median @@ -1025,43 +1078,119 @@ def test_median_func(): arr = np.array([1.0, 2.0, 3.0, 4.0]) assert calc_med_first_diffs(arr) == 2 # 3d array, no nans - arr = np.zeros(4 * 2 * 2).reshape(4, 2, 2) - arr[:, 0, 0] = np.array([1.0, 2.0, 3.0, 4.0]) + arr = np.zeros(4 * 2 * 2).reshape(1, 4, 2, 2) + arr[0, :, 0, 0] = np.array([1.0, 2.0, 3.0, 4.0]) assert calc_med_first_diffs(arr)[0, 0] == 2 # 1d, with nans arr = np.array([1.0, 2.0, 3.0, np.nan, 4.0, np.nan]) assert calc_med_first_diffs(arr) == 2 # 3d, with nans - arr = np.zeros(6 * 2 * 2).reshape(6, 2, 2) - arr[:, 0, 0] = np.array([1.0, 2.0, 3.0, np.nan, 4.0, np.nan]) + arr = np.zeros(6 * 2 * 2).reshape(1, 6, 2, 2) + arr[0, :, 0, 0] = np.array([1.0, 2.0, 3.0, np.nan, 4.0, np.nan]) assert calc_med_first_diffs(arr)[0, 0] == 2 # single pix with exactly 3 good diffs, should compute median without clipping arr = np.array([1.0, 2.0, 3.0]) assert calc_med_first_diffs(arr) == 2 # 3d array, no nans - arr = np.zeros(3 * 2 * 2).reshape(3, 2, 2) - arr[:, 0, 0] = np.array([1.0, 2.0, 3.0]) + arr = np.zeros(3 * 2 * 2).reshape(1, 3, 2, 2) + arr[0, :, 0, 0] = np.array([1.0, 2.0, 3.0]) assert calc_med_first_diffs(arr)[0, 0] == 2 # 1d, with nans arr = np.array([1.0, 2.0, 3.0, np.nan, np.nan]) assert calc_med_first_diffs(arr) == 2 # 3d, with nans - arr = np.zeros(5 * 2 * 2).reshape(5, 2, 2) - arr[:, 0, 0] = np.array([1.0, 2.0, 3.0, np.nan, np.nan]) + arr = np.zeros(5 * 2 * 2).reshape(1, 5, 2, 2) + arr[0, :, 0, 0] = np.array([1.0, 2.0, 3.0, np.nan, np.nan]) assert calc_med_first_diffs(arr)[0, 0] == 2 # # single pix with exactly 2 good diffs, should return the element with the minimum abs val arr = np.array([-1.0, -2.0]) assert calc_med_first_diffs(arr) == -1 # 3d array, no nans - arr = np.zeros(2 * 2 * 2).reshape(2, 2, 2) - arr[:, 0, 0] = np.array([-1.0, -2.0]) + arr = np.zeros(2 * 2 * 2).reshape(1, 2, 2, 2) + arr[0, :, 0, 0] = np.array([-1.0, -2.0]) assert calc_med_first_diffs(arr)[0, 0] == -1 # 1d, with nans arr = np.array([-1.0, -2.0, np.nan, np.nan]) assert calc_med_first_diffs(arr) == -1 # 3d, with nans - arr = np.zeros(4 * 2 * 2).reshape(4, 2, 2) - arr[:, 0, 0] = np.array([-1.0, -2.0, np.nan, np.nan]) + arr = np.zeros(4 * 2 * 2).reshape(1, 4, 2, 2) + arr[0, :, 0, 0] = np.array([-1.0, -2.0, np.nan, np.nan]) assert calc_med_first_diffs(arr)[0, 0] == -1 +@pytest.mark.skip("Used for local testing") +def test_sigma_clip(): + hdul = fits.open('lrs_TSOjump_sigmaclip5_00_refpix.fits') + data = hdul['SCI'].data * 4.0 + gdq = hdul['GROUPDQ'].data + indata = data[:53, :, :, :] + ingdq = gdq[:53, :, :, :] + read_noise = np.ones(shape=(indata.shape[2], indata.shape[3]), dtype=np.float32) * 5.9 * 4.0 + hdul.close() + gdq, row_below_gdq, row_above_gdq, total_primary_crs, stddev = find_crs(indata, ingdq, read_noise, 3, + 4, 5, 1, + False, 1000, + 10, DQFLAGS, + after_jump_flag_e1=0.0, + after_jump_flag_n1=0, + after_jump_flag_e2=0.0, + after_jump_flag_n2=0, + copy_arrs=True, minimum_groups=3, minimum_sigclip_groups=50,) + fits.writeto("outgdq.fits", gdq, overwrite=True) +@pytest.mark.skip("Used for local testing") +def test_first_grp_flag_issue(): + nints = 8 + nrows = 2 + ncols = 2 + ngroups = 10 + readnoise = 2 + data = np.zeros(shape=(nints, ngroups, nrows, ncols), dtype=np.float32) + data = np.random.normal(0, readnoise, size=(nints, ngroups, nrows, ncols)) + read_noise = np.full((nrows, ncols), readnoise, dtype=np.float32) + gdq = np.zeros(shape=(nints, ngroups, nrows, ncols), dtype=np.uint32) + + gdq[:, 0, :, :] = DQFLAGS['DO_NOT_USE'] + gdq[1:, 1, :, :] = DQFLAGS['DO_NOT_USE'] + gdq[:, -1, :, :] = DQFLAGS['DO_NOT_USE'] + gdq, row_below_gdq, row_above_gdq, total_total_crs, stddev = \ + find_crs(data, gdq, read_noise, 3, 4, 5, 1, False, 1000, 10, DQFLAGS, + after_jump_flag_e1=0.0, after_jump_flag_n1=0, + after_jump_flag_e2=0.0, after_jump_flag_n2=0, + copy_arrs=True, minimum_groups=3, minimum_sigclip_groups=50) + fits.writeto("outgdq.fits",gdq, overwrite=True) +@pytest.mark.skip("Used for local testing") +def test_5grp_TSO(): + nints=20 + nrows = 2 + ncols = 2 + ngroups = 5 + readnoise = 25 + data = np.random.normal(0, 0.1 * readnoise, size=(nints, ngroups, nrows, ncols)) + read_noise = np.full((nrows, ncols), readnoise, dtype=np.float32) + gdq = np.zeros(shape=(nints, ngroups, nrows, ncols), dtype=np.uint32) + np.expand_dims(gdq, axis=0) + np.expand_dims(data, axis=0) + gdq[:, 0, :, :] = DQFLAGS['DO_NOT_USE'] + gdq[:, -1, :, :] = DQFLAGS['DO_NOT_USE'] + data[0, :, 0, 0] = [21500, 37600, 52082, 65068, 58627] + data[0, :, 0, 1] = [21500, 37600, 52082, 65068, 58627] + gdq, row_below_gdq, row_above_gdq, total_primary_crs, stddev = \ + find_crs(data, gdq, read_noise, 3, 4, 5, 1, False, 1000, 10, DQFLAGS, + after_jump_flag_e1=0.0, after_jump_flag_n1=0, + after_jump_flag_e2=0.0, after_jump_flag_n2=0, + copy_arrs=True, minimum_groups=3, minimum_sigclip_groups=5000) + fits.writeto("new_gdq.fits", gdq, overwrite=True) +@pytest.mark.skip("Used for local testing") +def test_5grp_realTSO(): + hdul = fits.open("obs2508_cutout_jump.fits") + gdq = hdul['groupdq'].data + data = hdul['sci'].data + readnoise = 25 + read_noise = np.full((3, 3), readnoise, dtype=np.float32) + + gdq, row_below_gdq, row_above_gdq, total_total_crs, stddev = \ + find_crs(data, gdq, read_noise, 3, 4, 5, 1, False, 1000, 10, DQFLAGS, + after_jump_flag_e1=0.0, after_jump_flag_n1=0, + after_jump_flag_e2=0.0, after_jump_flag_n2=0, + copy_arrs=True, minimum_groups=3, minimum_sigclip_groups=15000) + fits.writeto("new_gdq_cutout.fits", gdq, overwrite=True)