diff --git a/bin/picca_cf.py b/bin/picca_cf.py index dbf272727..bacf8278b 100755 --- a/bin/picca_cf.py +++ b/bin/picca_cf.py @@ -231,6 +231,18 @@ def main(cmdargs): help='Rebin factor for deltas. If not None, deltas will ' 'be rebinned by that factor') + parser.add_argument('--varlss-mod-factor', type=float, default=None, + help='Modifies weights with this factor. Requires ' + 'IVAR column to be present in deltas and an input' + ' attributes file.') + parser.add_argument('--attributes', type=str, default=None, + help='Attributes file with VAR_FUNC extension with ' + 'lambda, eta, var_lss columns.') + parser.add_argument('--attributes2', type=str, default=None, required=False, + help='Attributes file for 2nd tracer') + parser.add_argument('--renormalize-deltas', action="store_true", + help="Stacks deltas and renormalizes deltas.") + args = parser.parse_args(cmdargs) if args.nproc is None: @@ -264,18 +276,12 @@ def main(cmdargs): ### Read data 1 print('z_min_sources', args.z_min_sources) - data, num_data, z_min, z_max = io.read_deltas(args.in_dir, - cf.nside, - cf.lambda_abs, - cf.alpha, - cf.z_ref, - cosmo, - max_num_spec=args.nspec, - no_project=args.no_project, - nproc=args.nproc, - rebin_factor=args.rebin_factor, - z_min_qso=args.z_min_sources, - z_max_qso=args.z_max_sources,) + data, num_data, z_min, z_max = io.read_deltas( + args.in_dir, cf.nside, cf.lambda_abs, cf.alpha, cf.z_ref, cosmo, + max_num_spec=args.nspec, no_project=args.no_project, nproc=args.nproc, + rebin_factor=args.rebin_factor, z_min_qso=args.z_min_sources, + z_max_qso=args.z_max_sources, varlss_mod_factor=args.varlss_mod_factor, + attributes=args.attributes, renormalize_deltas=args.renormalize_deltas) del z_max cf.data = data cf.num_data = num_data @@ -290,6 +296,8 @@ def main(cmdargs): cf.alpha2 = args.z_evol2 if args.in_dir2 is None: args.in_dir2 = args.in_dir + if args.attributes2 is None: + args.attributes2 = args.attributes if args.lambda_abs2: cf.lambda_abs2 = constants.ABSORBER_IGM[args.lambda_abs2] else: @@ -307,7 +315,10 @@ def main(cmdargs): nproc=args.nproc, rebin_factor=args.rebin_factor, z_min_qso=args.z_min_sources, - z_max_qso=args.z_max_sources,) + z_max_qso=args.z_max_sources, + varlss_mod_factor=args.varlss_mod_factor, + attributes=args.attributes2, + renormalize_deltas=args.renormalize_deltas) del z_max2 cf.data2 = data2 cf.num_data2 = num_data2 diff --git a/bin/picca_dmat.py b/bin/picca_dmat.py index 959d6bf37..8685d9391 100755 --- a/bin/picca_dmat.py +++ b/bin/picca_dmat.py @@ -249,6 +249,18 @@ def main(cmdargs): action='store_true', help='Ignore redshift evolution when computing distortion matrix') + parser.add_argument('--varlss-mod-factor', type=float, default=None, + help='Modifies weights with this factor. Requires ' + 'IVAR column to be present in deltas and an input' + ' attributes file.') + parser.add_argument('--attributes', type=str, default=None, + help='Attributes file with VAR_FUNC extension with ' + 'lambda, eta, var_lss columns.') + parser.add_argument('--attributes2', type=str, default=None, required=False, + help='Attributes file for 2nd tracer') + parser.add_argument('--renormalize-deltas', action="store_true", + help="Stacks deltas and renormalizes deltas.") + args = parser.parse_args(cmdargs) if args.nproc is None: @@ -310,7 +322,10 @@ def main(cmdargs): nproc=args.nproc, rebin_factor=args.rebin_factor, z_min_qso=args.z_min_sources, - z_max_qso=args.z_max_sources) + z_max_qso=args.z_max_sources, + varlss_mod_factor=args.varlss_mod_factor, + attributes=args.attributes, + renormalize_deltas=args.renormalize_deltas) del z_max cf.data = data cf.num_data = num_data @@ -325,6 +340,8 @@ def main(cmdargs): cf.alpha2 = args.z_evol2 if args.in_dir2 is None: args.in_dir2 = args.in_dir + if args.attributes2 is None: + args.attributes2 = args.attributes if args.lambda_abs2: cf.lambda_abs2 = constants.ABSORBER_IGM[args.lambda_abs2] else: @@ -341,7 +358,10 @@ def main(cmdargs): nproc=args.nproc, rebin_factor=args.rebin_factor, z_min_qso=args.z_min_sources, - z_max_qso=args.z_max_sources) + z_max_qso=args.z_max_sources, + varlss_mod_factor=args.varlss_mod_factor, + attributes=args.attributes2, + renormalize_deltas=args.renormalize_deltas) del z_max2 cf.data2 = data2 cf.num_data2 = num_data2 diff --git a/bin/picca_xcf.py b/bin/picca_xcf.py index 63c693356..07e17c646 100755 --- a/bin/picca_xcf.py +++ b/bin/picca_xcf.py @@ -252,6 +252,16 @@ def main(cmdargs): help='Rebin factor for deltas. If not None, deltas will ' 'be rebinned by that factor') + parser.add_argument('--varlss-mod-factor', type=float, default=None, + help='Modifies weights with this factor. Requires ' + 'IVAR column to be present in deltas and an input' + ' attributes file.') + parser.add_argument('--attributes', type=str, default=None, + help='Attributes file with VAR_FUNC extension with ' + 'lambda, eta, var_lss columns.') + parser.add_argument('--renormalize-deltas', action="store_true", + help="Stacks deltas and renormalizes deltas.") + args = parser.parse_args(cmdargs) if args.nproc is None: @@ -310,7 +320,10 @@ def main(cmdargs): nproc=args.nproc, rebin_factor=args.rebin_factor, z_min_qso=args.z_min_sources, - z_max_qso=args.z_max_sources) + z_max_qso=args.z_max_sources, + varlss_mod_factor=args.varlss_mod_factor, + attributes=args.attributes, + renormalize_deltas=args.renormalize_deltas) xcf.data = data xcf.num_data = num_data userprint("") diff --git a/bin/picca_xdmat.py b/bin/picca_xdmat.py index 1da22d49b..b1daf5787 100755 --- a/bin/picca_xdmat.py +++ b/bin/picca_xdmat.py @@ -248,6 +248,17 @@ def main(cmdargs): action='store_true', help='Ignore redshift evolution when computing distortion matrix') + + parser.add_argument('--varlss-mod-factor', type=float, default=None, + help='Modifies weights with this factor. Requires ' + 'IVAR column to be present in deltas and an input' + ' attributes file.') + parser.add_argument('--attributes', type=str, default=None, + help='Attributes file with VAR_FUNC extension with ' + 'lambda, eta, var_lss columns.') + parser.add_argument('--renormalize-deltas', action="store_true", + help="Stacks deltas and renormalizes deltas.") + args = parser.parse_args(cmdargs) if args.nproc is None: args.nproc = cpu_count() // 2 @@ -303,7 +314,10 @@ def main(cmdargs): nproc=args.nproc, rebin_factor=args.rebin_factor, z_min_qso=args.z_min_sources, - z_max_qso=args.z_max_sources) + z_max_qso=args.z_max_sources, + varlss_mod_factor=args.varlss_mod_factor, + attributes=args.attributes, + renormalize_deltas=args.renormalize_deltas) xcf.data = data xcf.num_data = num_data userprint("\n") diff --git a/py/picca/data.py b/py/picca/data.py index 46fa9e394..c870afd26 100644 --- a/py/picca/data.py +++ b/py/picca/data.py @@ -432,7 +432,10 @@ def from_fitsio(cls, hdu, pk1d_type=False): weights = None cont = None else: - ivar = None + if 'IVAR' in hdu.get_colnames(): + ivar = hdu['IVAR'][:].astype(float) + else: + ivar = None exposures_diff = None mean_snr = None mean_reso = None @@ -556,7 +559,10 @@ def from_image(cls, hdul, pk1d_type=False, z_min_qso=0, z_max_qso=10): else: raise KeyError("Did not find LOGLAM or LAMBDA in delta file") - ivar = Nones + if 'IVAR' in hdul: + ivar = hdul["IVAR"][:].astype(float) + else: + ivar = Nones exposures_diff = Nones mean_snr = Nones mean_reso = Nones @@ -643,16 +649,20 @@ def project(self): self.delta -= mean_delta + res - def rebin(self, factor, dwave=0.8): + def rebin(self, factor): """Rebin deltas by an integer factor Args: factor: int Factor to rebin deltas (new_bin_size = factor * old_bin_size) - dwave: float - Delta lambda of original deltas """ wave = 10**np.array(self.log_lambda) + dwaves = np.round(np.diff(wave), decimals=3) + dwave = np.min(dwaves) + + # This check would not work if there are gaps. + # if not np.allclose(dwave, dwaves): + # raise ValueError('Delta rebinning only implemented for linear lambda bins') start = wave.min() - dwave / 2 num_bins = np.ceil(((wave[-1] - wave[0]) / dwave + 1) / factor) diff --git a/py/picca/io.py b/py/picca/io.py index 8789483d1..1229f7272 100644 --- a/py/picca/io.py +++ b/py/picca/io.py @@ -21,7 +21,8 @@ import warnings from multiprocessing import Pool -from .utils import userprint +from .utils import ( + userprint, modify_weights_with_varlss_factor, stack_flux_and_normalize) from .data import Delta, QSO from .pk1d.prep_pk1d import exp_diff, spectral_resolution from .pk1d.prep_pk1d import spectral_resolution_desi @@ -228,8 +229,8 @@ def read_blinding(in_dir): elif len(in_dir) > 5 and in_dir[-5:] == '.fits': files += glob.glob(in_dir) else: - files += glob.glob(in_dir + '/*.fits') + glob.glob(in_dir - + '/*.fits.gz') + files += glob.glob(in_dir + '/delta*.fits') + glob.glob(in_dir + + '/delta*.fits.gz') filename = files[0] hdul = fitsio.FITS(filename) if "LAMBDA" in hdul: # This is for ImageHDU format @@ -245,7 +246,7 @@ def read_blinding(in_dir): return blinding -def read_delta_file(filename, z_min_qso=0, z_max_qso=10, rebin_factor=None): +def read_delta_file(filename, z_min_qso=0, z_max_qso=10): """Extracts deltas from a single file. Args: filename: str @@ -254,8 +255,6 @@ def read_delta_file(filename, z_min_qso=0, z_max_qso=10, rebin_factor=None): Specifies the minimum redshift for QSOs z_max_qso: float - default: 10 Specifies the maximum redshift for QSOs - rebin_factor: int - default: None - Factor to rebin the lambda grid by. If None, no rebinning is done. Returns: deltas: A dictionary with the data. Keys are the healpix numbers of each @@ -269,39 +268,17 @@ def read_delta_file(filename, z_min_qso=0, z_max_qso=10, rebin_factor=None): else: deltas = [Delta.from_fitsio(hdu) for hdu in hdul[1:] if z_min_qso 5 and in_dir[-5:] == '.fits': files += sorted(glob.glob(in_dir)) else: - files += sorted(glob.glob(in_dir + '/*.fits') + glob.glob(in_dir + - '/*.fits.gz')) + files += sorted( + glob.glob(in_dir + '/delta*.fits') + glob.glob(in_dir + '/delta*.fits.gz')) files = sorted(files) - if rebin_factor is not None: - userprint(f"Rebinning deltas by a factor of {rebin_factor}\n") - - arguments = [(f, z_min_qso, z_max_qso, rebin_factor) for f in files] + arguments = [(f, z_min_qso, z_max_qso) for f in files] pool = Pool(processes=nproc) results = pool.starmap(read_delta_file, arguments) pool.close() @@ -384,6 +366,27 @@ def read_deltas(in_dir, userprint("\n") + # Recalculate weights + if varlss_mod_factor is not None: + userprint(f"Recalculating weights with eta_lss={varlss_mod_factor}\n") + if not attributes: + raise ValueError('Recalculating weights require an attributes file.') + modify_weights_with_varlss_factor(deltas, attributes, varlss_mod_factor) + + # Renormalize deltas + if renormalize_deltas: + userprint(f"Stacking and renormalizing deltas\n") + z_min, z_max = 0, 10 + stack_flux_and_normalize( + deltas, lambda_abs * (1 + z_min) + 4.0, lambda_abs * (1 + z_max) - 4.0, + dlambda=8.0) + + # Rebin + if rebin_factor is not None: + userprint(f"Rebinning deltas by a factor of {rebin_factor}\n") + for delta_ in deltas: + delta_.rebin(rebin_factor) + # compute healpix numbers phi = [delta.ra for delta in deltas] theta = [np.pi / 2. - delta.dec for delta in deltas] diff --git a/py/picca/utils.py b/py/picca/utils.py index 95ce0579d..d2609b573 100644 --- a/py/picca/utils.py +++ b/py/picca/utils.py @@ -13,9 +13,11 @@ import sys import numpy as np import fitsio -import scipy.interpolate as interpolate +from astropy.io import fits import iminuit +from scipy import interpolate + def userprint(*args, **kwds): """Defines an extension of the print function. @@ -491,3 +493,63 @@ def unred(wave, ebv, R_V=3.1, LMC2=False, AVGLMC=False): corr = 1. / (10.**(0.4 * curve)) return corr + + +def modify_weights_with_varlss_factor(data, attributes, varlss_mod_factor): + """Recalculates the weights using eta and var_lss from attributes file. + Multiplies var_lss with varlss_mod_factor. + + Args: + data: list(Delta) + A dictionary with the data. Changes in place. Keys are the healpix + numbers of each spectrum. Values are lists of delta instances. + attributes: str + Path to attributes file. + varlss_mod_factor: float + Multiplicative factor for var_lss + """ + att = fits.open(attributes) # need astropy's case-insensitivity to work with qsonic and picca attributes files + try: + varfunc = att['VAR_FUNC'] + except OSError: + varfunc = att['VAR_FUNC-fit'] + try: + loglam = np.log10(varfunc.data['lambda']) + except KeyError: + loglam = varfunc.data['loglam'] + interp_eta = interpolate.interp1d( + loglam, varfunc.data['eta'], fill_value='extrapolate', kind='cubic') + interp_varlss = interpolate.interp1d( + loglam, varfunc.data['var_lss'], fill_value='extrapolate', kind='cubic') + + for delta in data: + if delta.ivar is None: + raise Exception("Need IVAR in deltas") + eta = interp_eta(delta.log_lambda) + var_lss = varlss_mod_factor * interp_varlss(delta.log_lambda) + delta.weights = delta.ivar / (eta + delta.ivar * var_lss) + + +def stack_flux_and_normalize(data, l1, l2, dlambda): + n = round((l2 - l1) / dlambda + 1.1) + stack_delta = np.zeros(n) + stack_weight = np.zeros(n) + + for delta in data: + wave = 10**delta.log_lambda + bins = ((wave - l1) / dlambda + 0.5).astype(np.int64) + stack_delta += np.bincount( + bins, weights=delta.delta * delta.weights, minlength=n) + stack_weight += np.bincount(bins, weights=delta.weights, minlength=n) + + w = stack_weight != 0 + stack_delta = stack_delta[w] + stack_weight = stack_weight[w] + stack_delta /= stack_weight + wave = np.linspace(l1, l2, n)[w] + spl = interpolate.interp1d( + wave, stack_delta + 1, fill_value='extrapolate', kind='nearest') + + for delta in data: + wave = 10**delta.log_lambda + delta.delta = (1 + delta.delta) / spl(wave) - 1