Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix time filtering transpose issue #18

Merged
merged 2 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions hera_filters/dspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,12 @@ def fourier_filter(x, data, wgts, filter_centers, filter_half_widths, mode, ridg
is multiplied by a value of (1 + ridge_alpha)). Only used in the following linear modes
(dpss_leastsq, dft_leastsq, dpss_solve, dft_solve, dpss_matrix, dft_matrix). Reasonable values
for ridge_alpha when using the DPSS and DFT modes for inpainting wide gaps are between 1e-5 and 1e-2,
but will depend on factors such as the noise level in the data and the flagging mask.
but will depend on factors such as the noise level in the data and the flagging mask. Implementation
differs slightly from the standard ridge regression in that the regularization parameter is
usually added to the diagonal of the XTX matrix before the matrix is inverted. This is done to
attempt to standardize the regularization parameter when different weighting schemes are applied.
This definition of ridge regression is equivalent to the definition used in the scikit-learn
when using the DPSS basis and uniform unity weighting.
fit_intercept: bool, optional
If true, subtracts off average of the data before fitting model to the data.
Default is False. Can be useful if the data is not centered around zero and
Expand Down Expand Up @@ -513,6 +518,7 @@ def fourier_filter(x, data, wgts, filter_centers, filter_half_widths, mode, ridg
# the transposes are undone below after filtering is complete
data = data.T
wgts = wgts.T

if 'cache' not in filter_kwargs:
cache = {}
else:
Expand Down Expand Up @@ -553,7 +559,10 @@ def fourier_filter(x, data, wgts, filter_centers, filter_half_widths, mode, ridg

if fit_intercept:
# subtract off mean of data
mean = np.sum(data * wgts, axis=tuple(filter_dims), keepdims=True) / np.sum(wgts, axis=tuple(filter_dims), keepdims=True)
if 0 in filter_dims and not filter2d:
mean = np.sum(data * wgts, axis=tuple([1]), keepdims=True) / np.sum(wgts, axis=tuple([1]), keepdims=True)
else:
mean = np.sum(data * wgts, axis=tuple(filter_dims), keepdims=True) / np.sum(wgts, axis=tuple(filter_dims), keepdims=True)
data = np.copy(data) # make a copy so we don't modify the original data
data -= mean

Expand Down
17 changes: 17 additions & 0 deletions hera_filters/tests/test_dspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,23 @@ def test_regularized_regression():
assert np.all(np.isclose(mdls[0], mdls[1]))
assert np.all(np.isclose(mdls[0], mdls[2]))

# Transpose data to check filtering the other axis
d = np.asarray([d]).T
w = np.asarray([w]).T

for mode in ['dpss_solve', 'dpss_leastsq', 'dpss_matrix']:
mdl_reg_demean, res_reg_demean, _ = dspec.fourier_filter(freqs, d, w, [0.], [700e-9], suppression_factors=[0.],
mode=mode, ridge_alpha=1e-3, eigenval_cutoff=[1e-12], fit_intercept=True, filter_dims=0)
mdls.append(mdl_reg_demean)
mdl_reg, res_reg, _ = dspec.fourier_filter(freqs, d, w, [0.], [700e-9], suppression_factors=[0.],
mode=mode, ridge_alpha=1e-3, eigenval_cutoff=[1e-12], filter_dims=0)

# Check that the demeaned regularized regression has a smaller residual norm in the flagged region than
# the non-demeaned regularized regression. This is because ridge regression reduces the amplitude of the
# coefficients, leading to a near-zero mean in the flagged region, which can be a poor prediction of the
# inpainted region given non-zero mean data.
assert np.linalg.norm((d - mdl_reg_demean)[~w[:, 0].astype(bool)]) < np.linalg.norm((d - mdl_reg)[~w[:, 0].astype(bool)])


def test_vis_clean():
# validate that fourier_filter in various clean modes gives close values to vis_clean with equivalent parameters!
Expand Down
Loading