From 29ff0cb31eeaad2865ed526706c25c4b06de5912 Mon Sep 17 00:00:00 2001 From: Andrei Cuceu Date: Thu, 14 Nov 2024 17:01:40 -0500 Subject: [PATCH] Update fast metal matrices --- vega/metals.py | 137 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 42 deletions(-) diff --git a/vega/metals.py b/vega/metals.py index 7de0ff2..40f0307 100644 --- a/vega/metals.py +++ b/vega/metals.py @@ -1,16 +1,13 @@ -import numpy as np import copy -from astropy.io import fits -from cachetools import cached, LRUCache -from cachetools.keys import hashkey +import numpy as np +from astropy.io import fits from picca import constants as picca_constants +from scipy.sparse import csr_matrix -from . import power_spectrum -from . import correlation_func as corr_func from . import coordinates -from . import utils -from . import pktoxi +from . import correlation_func as corr_func +from . import pktoxi, power_spectrum, utils class Metals: @@ -209,8 +206,7 @@ def compute(self, pars, pk_lin, component): # Apply the metal matrix if self.new_metals: - xi = (self.rp_metal_dmats[(name1, name2)] - @ xi.reshape(self.rp_nbins, self.rt_nbins)).flatten() + xi = self.rp_metal_dmats[(name1, name2)] @ xi else: xi = self._data.metal_mats[(name1, name2)].dot(xi) @@ -246,15 +242,15 @@ def get_forest_weights(self, main_tracer): with fits.open(main_tracer['weights-path']) as hdul: stack_table = hdul[1].data - wave = 10**stack_table["LOGLAM"] + logwave = stack_table["LOGLAM"] weights = stack_table["WEIGHT"] rebin_factor = self.metal_matrix_config.getint('rebin_factor', None) if rebin_factor is not None: - wave = self.rebin(wave, rebin_factor) + logwave = self.rebin(logwave, rebin_factor) weights = self.rebin(weights, rebin_factor) - return wave, weights + return logwave, weights def get_qso_weights(self, tracer): assert tracer['type'] == 'discrete' @@ -284,7 +280,8 @@ def get_rp_pairs(self, z1, z2): if 'discrete' not in self.main_tracer_types: rp_pairs = np.abs(rp_pairs) - return rp_pairs + mean_distance = ((r1[:, None] + r2[None, :]) / 2).ravel() + return rp_pairs, mean_distance def get_forest_weight_scaling(self, z, true_abs, assumed_abs): true_alpha = self.metal_matrix_config.getfloat(f'alpha_{true_abs}') @@ -295,9 +292,9 @@ def get_forest_weight_scaling(self, z, true_abs, assumed_abs): def compute_fast_metal_dmat(self, true_abs_1, true_abs_2): # Initialize tracer 1 redshift and weights if self.main_tracer_types[0] == 'continuous': - wave1, weights1 = self.get_forest_weights(self._corr_item.tracer1) - true_z1 = wave1 / picca_constants.ABSORBER_IGM[true_abs_1] - 1. - assumed_z1 = wave1 / picca_constants.ABSORBER_IGM[self.main_tracers[0]] - 1. + logwave1, weights1 = self.get_forest_weights(self._corr_item.tracer1) + true_z1 = 10**logwave1 / picca_constants.ABSORBER_IGM[true_abs_1] - 1. + assumed_z1 = 10**logwave1 / picca_constants.ABSORBER_IGM[self.main_tracers[0]] - 1. scaling_1 = self.get_forest_weight_scaling(true_z1, true_abs_1, self.main_tracers[0]) else: true_z1, weights1 = self.get_qso_weights(self._corr_item.tracer1) @@ -306,9 +303,9 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2): # Initialize tracer 2 redshift and weights if self.main_tracer_types[1] == 'continuous': - wave2, weights2 = self.get_forest_weights(self._corr_item.tracer2) - true_z2 = wave2 / picca_constants.ABSORBER_IGM[true_abs_2] - 1. - assumed_z2 = wave2 / picca_constants.ABSORBER_IGM[self.main_tracers[1]] - 1. + logwave2, weights2 = self.get_forest_weights(self._corr_item.tracer2) + true_z2 = 10**logwave2 / picca_constants.ABSORBER_IGM[true_abs_2] - 1. + assumed_z2 = 10**logwave2 / picca_constants.ABSORBER_IGM[self.main_tracers[1]] - 1. scaling_2 = self.get_forest_weight_scaling(true_z2, true_abs_2, self.main_tracers[1]) else: true_z2, weights2 = self.get_qso_weights(self._corr_item.tracer2) @@ -316,8 +313,8 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2): scaling_2 = 1. # Compute rp pairs - true_rp_pairs = self.get_rp_pairs(true_z1, true_z2) - assumed_rp_pairs = self.get_rp_pairs(assumed_z1, assumed_z2) + true_rp_pairs, true_mean_distance = self.get_rp_pairs(true_z1, true_z2) + assumed_rp_pairs, assumed_mean_distance = self.get_rp_pairs(assumed_z1, assumed_z2) # Compute weights weights = ((weights1 * scaling_1)[:, None] * (weights2 * scaling_2)[None, :]).ravel() @@ -327,12 +324,72 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2): self._coordinates.rp_min, self._coordinates.rp_max, self.rp_nbins + 1) # Compute the distortion matrix - dmat, _, __ = np.histogram2d( + rp_1d_dmat, _, __ = np.histogram2d( assumed_rp_pairs, true_rp_pairs, bins=(rp_bin_edges, rp_bin_edges), weights=weights) # Normalize (sum of weights should be one for each input rp,rt) - sum_true_weight, _ = np.histogram(true_rp_pairs, bins=rp_bin_edges, weights=weights) - dmat *= ((sum_true_weight > 0) / (sum_true_weight + (sum_true_weight == 0)))[None, :] + sum_rp_1d_dmat = np.sum(rp_1d_dmat, axis=0) + rp_1d_dmat /= (sum_rp_1d_dmat + (sum_rp_1d_dmat==0)) + + # independently, we compute the r_trans distortion matrix + rt_bin_edges = np.linspace(0, self._coordinates.rt_max, self.rt_nbins + 1) + + # we have input_dist , output_dist and weight. + # we don't need to store the absolute comoving distances + # but the ratio between output and input. + # we rebin that to compute the rest faster. + # histogram of distance scaling with proper weights: + # dist*theta = r_trans + # theta_max = r_trans_max/dist + # solid angle contibuting for each distance propto + # theta_max**2 = (r_trans_max/dist)**2 propto 1/dist**2 + # we weight the distances with this additional factor + # using the input or the output distance in the solid angle weight + # gives virtually the same result + # distance_ratio_weights,distance_ratio_bins = + # np.histogram(output_dist/input_dist,bins=4*rtbins.size, + # weights=weights/input_dist**2*(input_rpcf.r_par_min)) + # we also select only distance ratio for which the input_rp + # (that of the true separation of the absorbers) is small, so that this + # fast matrix calculation is accurate where it matters the most + distance_ratio_weights, distance_ratio_bins = np.histogram( + assumed_mean_distance / true_mean_distance, bins=4*rt_bin_edges.size, + weights=weights/true_mean_distance**2*(np.abs(true_rp_pairs) < 20.) + ) + distance_ratios = (distance_ratio_bins[1:] + distance_ratio_bins[:-1]) / 2 + + # now we need to scan as a function of separation angles, or equivalently rt. + rt_bin_centers = (rt_bin_edges[:-1] + rt_bin_edges[1:]) / 2 + rt_bin_half_size = self._coordinates.rt_binsize / 2 + + # we are oversampling the correlation function rt grid to correctly compute bin migration. + oversample = 7 + # the -2/oversample term is needed to get a even-spaced grid + delta_rt = np.linspace( + -rt_bin_half_size, rt_bin_half_size*(1 - 2 / oversample), oversample)[None, :] + rt_1d_dmat = np.zeros((self.rt_nbins, self.rt_nbins)) + + for i, rt in enumerate(rt_bin_centers): + # the weight is proportional to rt+delta_rt to get the correct solid angle effect + # inside the bin (but it's almost a negligible effect) + rt_1d_dmat[:, i], _ = np.histogram( + (distance_ratios[:, None] * (rt + delta_rt)[None, :]).ravel(), bins=rt_bin_edges, + weights=(distance_ratio_weights[:, None] * (rt + delta_rt)[None, :]).ravel() + ) + + # normalize + sum_rt_1d_dmat = np.sum(rt_1d_dmat, axis=0) + rt_1d_dmat /= (sum_rt_1d_dmat + (sum_rt_1d_dmat == 0)) + + # now that we have both distortion along r_par and r_trans, we have to combine them + # we just multiply the two matrices, with indices splitted for rt and rp + # full_index = rt_index + cf.num_bins_r_trans * rp_index + # rt_index = full_index%cf.num_bins_r_trans + # rp_index = full_index//cf.num_bins_r_trans + num_bins_total = self.rp_nbins * self.rt_nbins + dmat = csr_matrix( + np.einsum('ij,kl->ikjl', rp_1d_dmat, rt_1d_dmat).reshape(num_bins_total, num_bins_total) + ) # Mean assumed weights sum_assumed_weight, _ = np.histogram(assumed_rp_pairs, bins=rp_bin_edges, weights=weights) @@ -346,26 +403,22 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2): assumed_rp_pairs, bins=rp_bin_edges, weights=weights * ((true_z1[:, None] + true_z2[None, :]) / 2.).ravel() ) + r_par_eff_1d = sum_assumed_weight_rp / (sum_assumed_weight + (sum_assumed_weight == 0)) + z_eff_1d = sum_weight_z / (sum_assumed_weight + (sum_assumed_weight == 0)) - rp_eff = sum_assumed_weight_rp / (sum_assumed_weight + (sum_assumed_weight == 0)) - z_eff = sum_weight_z / (sum_assumed_weight + (sum_assumed_weight == 0)) + # r_trans has no weights here + r1 = np.arange(self.rt_nbins) * self._coordinates.rt_max / self.rt_nbins + r2 = (1 + np.arange(self.rt_nbins)) * self._coordinates.rt_max / self.rt_nbins - num_bins_total = self.rp_nbins * self.rt_nbins - full_rp_eff = np.zeros(num_bins_total) - full_rt_eff = np.zeros(num_bins_total) - full_z_eff = np.zeros(num_bins_total) - - rp_indices = np.arange(self.rp_nbins) - rt_bins = np.arange( - self._coordinates.rt_binsize / 2, self._coordinates.rt_max, - self._coordinates.rt_binsize - ) + # this is to account for the solid angle effect on the mean + r_trans_eff_1d = (2 * (r2**3 - r1**3)) / (3 * (r2**2 - r1**2)) - for j in range(self.rt_nbins): - indices = j + self.rt_nbins * rp_indices + full_index = np.arange(num_bins_total) + rt_index = full_index % self.rt_nbins + rp_index = full_index // self.rt_nbins - full_rp_eff[indices] = rp_eff - full_rt_eff[indices] = rt_bins[j] - full_z_eff[indices] = z_eff + full_rp_eff = r_par_eff_1d[rp_index] + full_rt_eff = r_trans_eff_1d[rt_index] + full_z_eff = z_eff_1d[rp_index] return dmat, full_rp_eff, full_rt_eff, full_z_eff