Update fast metal matrices

andreicuceu · Nov 14, 2024 · 29ff0cb · 29ff0cb
1 parent 8a88131
commit 29ff0cb
Showing 1 changed file with 95 additions and 42 deletions.
diff --git a/vega/metals.py b/vega/metals.py
@@ -1,16 +1,13 @@
-import numpy as np
 import copy
-from astropy.io import fits
-from cachetools import cached, LRUCache
-from cachetools.keys import hashkey
 
+import numpy as np
+from astropy.io import fits
 from picca import constants as picca_constants
+from scipy.sparse import csr_matrix
 
-from . import power_spectrum
-from . import correlation_func as corr_func
 from . import coordinates
-from . import utils
-from . import pktoxi
+from . import correlation_func as corr_func
+from . import pktoxi, power_spectrum, utils
 
 
 class Metals:
@@ -209,8 +206,7 @@ def compute(self, pars, pk_lin, component):
 
             # Apply the metal matrix
             if self.new_metals:
-                xi = (self.rp_metal_dmats[(name1, name2)]
-                      @ xi.reshape(self.rp_nbins, self.rt_nbins)).flatten()
+                xi = self.rp_metal_dmats[(name1, name2)] @ xi
             else:
                 xi = self._data.metal_mats[(name1, name2)].dot(xi)
 
@@ -246,15 +242,15 @@ def get_forest_weights(self, main_tracer):
         with fits.open(main_tracer['weights-path']) as hdul:
             stack_table = hdul[1].data
 
-        wave = 10**stack_table["LOGLAM"]
+        logwave = stack_table["LOGLAM"]
         weights = stack_table["WEIGHT"]
 
         rebin_factor = self.metal_matrix_config.getint('rebin_factor', None)
         if rebin_factor is not None:
-            wave = self.rebin(wave, rebin_factor)
+            logwave = self.rebin(logwave, rebin_factor)
             weights = self.rebin(weights, rebin_factor)
 
-        return wave, weights
+        return logwave, weights
 
     def get_qso_weights(self, tracer):
         assert tracer['type'] == 'discrete'
@@ -284,7 +280,8 @@ def get_rp_pairs(self, z1, z2):
         if 'discrete' not in self.main_tracer_types:
             rp_pairs = np.abs(rp_pairs)
 
-        return rp_pairs
+        mean_distance = ((r1[:, None] + r2[None, :]) / 2).ravel()
+        return rp_pairs, mean_distance
 
     def get_forest_weight_scaling(self, z, true_abs, assumed_abs):
         true_alpha = self.metal_matrix_config.getfloat(f'alpha_{true_abs}')
@@ -295,9 +292,9 @@ def get_forest_weight_scaling(self, z, true_abs, assumed_abs):
     def compute_fast_metal_dmat(self, true_abs_1, true_abs_2):
         # Initialize tracer 1 redshift and weights
         if self.main_tracer_types[0] == 'continuous':
-            wave1, weights1 = self.get_forest_weights(self._corr_item.tracer1)
-            true_z1 = wave1 / picca_constants.ABSORBER_IGM[true_abs_1] - 1.
-            assumed_z1 = wave1 / picca_constants.ABSORBER_IGM[self.main_tracers[0]] - 1.
+            logwave1, weights1 = self.get_forest_weights(self._corr_item.tracer1)
+            true_z1 = 10**logwave1 / picca_constants.ABSORBER_IGM[true_abs_1] - 1.
+            assumed_z1 = 10**logwave1 / picca_constants.ABSORBER_IGM[self.main_tracers[0]] - 1.
             scaling_1 = self.get_forest_weight_scaling(true_z1, true_abs_1, self.main_tracers[0])
         else:
             true_z1, weights1 = self.get_qso_weights(self._corr_item.tracer1)
@@ -306,18 +303,18 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2):
 
         # Initialize tracer 2 redshift and weights
         if self.main_tracer_types[1] == 'continuous':
-            wave2, weights2 = self.get_forest_weights(self._corr_item.tracer2)
-            true_z2 = wave2 / picca_constants.ABSORBER_IGM[true_abs_2] - 1.
-            assumed_z2 = wave2 / picca_constants.ABSORBER_IGM[self.main_tracers[1]] - 1.
+            logwave2, weights2 = self.get_forest_weights(self._corr_item.tracer2)
+            true_z2 = 10**logwave2 / picca_constants.ABSORBER_IGM[true_abs_2] - 1.
+            assumed_z2 = 10**logwave2 / picca_constants.ABSORBER_IGM[self.main_tracers[1]] - 1.
             scaling_2 = self.get_forest_weight_scaling(true_z2, true_abs_2, self.main_tracers[1])
         else:
             true_z2, weights2 = self.get_qso_weights(self._corr_item.tracer2)
             assumed_z2 = true_z2
             scaling_2 = 1.
 
         # Compute rp pairs
-        true_rp_pairs = self.get_rp_pairs(true_z1, true_z2)
-        assumed_rp_pairs = self.get_rp_pairs(assumed_z1, assumed_z2)
+        true_rp_pairs, true_mean_distance = self.get_rp_pairs(true_z1, true_z2)
+        assumed_rp_pairs, assumed_mean_distance = self.get_rp_pairs(assumed_z1, assumed_z2)
 
         # Compute weights
         weights = ((weights1 * scaling_1)[:, None] * (weights2 * scaling_2)[None, :]).ravel()
@@ -327,12 +324,72 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2):
             self._coordinates.rp_min, self._coordinates.rp_max, self.rp_nbins + 1)
 
         # Compute the distortion matrix
-        dmat, _, __ = np.histogram2d(
+        rp_1d_dmat, _, __ = np.histogram2d(
             assumed_rp_pairs, true_rp_pairs, bins=(rp_bin_edges, rp_bin_edges), weights=weights)
 
         # Normalize (sum of weights should be one for each input rp,rt)
-        sum_true_weight, _ = np.histogram(true_rp_pairs, bins=rp_bin_edges, weights=weights)
-        dmat *= ((sum_true_weight > 0) / (sum_true_weight + (sum_true_weight == 0)))[None, :]
+        sum_rp_1d_dmat = np.sum(rp_1d_dmat, axis=0)
+        rp_1d_dmat /= (sum_rp_1d_dmat + (sum_rp_1d_dmat==0))
+
+        # independently, we compute the r_trans distortion matrix
+        rt_bin_edges = np.linspace(0, self._coordinates.rt_max, self.rt_nbins + 1)
+
+        # we have input_dist , output_dist and weight.
+        # we don't need to store the absolute comoving distances
+        # but the ratio between output and input.
+        # we rebin that to compute the rest faster.
+        # histogram of distance scaling with proper weights:
+        # dist*theta = r_trans
+        # theta_max  = r_trans_max/dist
+        # solid angle contibuting for each distance propto
+        # theta_max**2 = (r_trans_max/dist)**2 propto 1/dist**2
+        # we weight the distances with this additional factor
+        # using the input or the output distance in the solid angle weight
+        # gives virtually the same result
+        # distance_ratio_weights,distance_ratio_bins =
+        # np.histogram(output_dist/input_dist,bins=4*rtbins.size,
+        # weights=weights/input_dist**2*(input_rp<cf.r_par_max)*(input_rp>cf.r_par_min))
+        # we also select only distance ratio for which the input_rp
+        # (that of the true separation of the absorbers) is small, so that this
+        # fast matrix calculation is accurate where it matters the most
+        distance_ratio_weights, distance_ratio_bins = np.histogram(
+            assumed_mean_distance / true_mean_distance, bins=4*rt_bin_edges.size,
+            weights=weights/true_mean_distance**2*(np.abs(true_rp_pairs) < 20.)
+        )
+        distance_ratios = (distance_ratio_bins[1:] + distance_ratio_bins[:-1]) / 2
+
+        # now we need to scan as a function of separation angles, or equivalently rt.
+        rt_bin_centers = (rt_bin_edges[:-1] + rt_bin_edges[1:]) / 2
+        rt_bin_half_size = self._coordinates.rt_binsize / 2
+
+        # we are oversampling the correlation function rt grid to correctly compute bin migration.
+        oversample = 7
+        # the -2/oversample term is needed to get a even-spaced grid
+        delta_rt = np.linspace(
+            -rt_bin_half_size, rt_bin_half_size*(1 - 2 / oversample), oversample)[None, :]
+        rt_1d_dmat = np.zeros((self.rt_nbins, self.rt_nbins))
+
+        for i, rt in enumerate(rt_bin_centers):
+            # the weight is proportional to rt+delta_rt to get the correct solid angle effect
+            # inside the bin (but it's almost a negligible effect)
+            rt_1d_dmat[:, i], _ = np.histogram(
+                (distance_ratios[:, None] * (rt + delta_rt)[None, :]).ravel(), bins=rt_bin_edges,
+                weights=(distance_ratio_weights[:, None] * (rt + delta_rt)[None, :]).ravel()
+            )
+
+        # normalize
+        sum_rt_1d_dmat = np.sum(rt_1d_dmat, axis=0)
+        rt_1d_dmat /= (sum_rt_1d_dmat + (sum_rt_1d_dmat == 0))
+
+        # now that we have both distortion along r_par and r_trans, we have to combine them
+        # we just multiply the two matrices, with indices splitted for rt and rp
+        # full_index = rt_index + cf.num_bins_r_trans * rp_index
+        # rt_index   = full_index%cf.num_bins_r_trans
+        # rp_index  = full_index//cf.num_bins_r_trans
+        num_bins_total = self.rp_nbins * self.rt_nbins
+        dmat = csr_matrix(
+            np.einsum('ij,kl->ikjl', rp_1d_dmat, rt_1d_dmat).reshape(num_bins_total, num_bins_total)
+        )
 
         # Mean assumed weights
         sum_assumed_weight, _ = np.histogram(assumed_rp_pairs, bins=rp_bin_edges, weights=weights)
@@ -346,26 +403,22 @@ def compute_fast_metal_dmat(self, true_abs_1, true_abs_2):
             assumed_rp_pairs, bins=rp_bin_edges,
             weights=weights * ((true_z1[:, None] + true_z2[None, :]) / 2.).ravel()
         )
+        r_par_eff_1d = sum_assumed_weight_rp / (sum_assumed_weight + (sum_assumed_weight == 0))
+        z_eff_1d = sum_weight_z / (sum_assumed_weight + (sum_assumed_weight == 0))
 
-        rp_eff = sum_assumed_weight_rp / (sum_assumed_weight + (sum_assumed_weight == 0))
-        z_eff = sum_weight_z / (sum_assumed_weight + (sum_assumed_weight == 0))
+        # r_trans has no weights here
+        r1 = np.arange(self.rt_nbins) * self._coordinates.rt_max / self.rt_nbins
+        r2 = (1 + np.arange(self.rt_nbins)) * self._coordinates.rt_max / self.rt_nbins
 
-        num_bins_total = self.rp_nbins * self.rt_nbins
-        full_rp_eff = np.zeros(num_bins_total)
-        full_rt_eff = np.zeros(num_bins_total)
-        full_z_eff = np.zeros(num_bins_total)
-
-        rp_indices = np.arange(self.rp_nbins)
-        rt_bins = np.arange(
-            self._coordinates.rt_binsize / 2, self._coordinates.rt_max,
-            self._coordinates.rt_binsize
-        )
+        # this is to account for the solid angle effect on the mean
+        r_trans_eff_1d = (2 * (r2**3 - r1**3)) / (3 * (r2**2 - r1**2))
 
-        for j in range(self.rt_nbins):
-            indices = j + self.rt_nbins * rp_indices
+        full_index = np.arange(num_bins_total)
+        rt_index = full_index % self.rt_nbins
+        rp_index = full_index // self.rt_nbins
 
-            full_rp_eff[indices] = rp_eff
-            full_rt_eff[indices] = rt_bins[j]
-            full_z_eff[indices] = z_eff
+        full_rp_eff = r_par_eff_1d[rp_index]
+        full_rt_eff = r_trans_eff_1d[rt_index]
+        full_z_eff = z_eff_1d[rp_index]
 
         return dmat, full_rp_eff, full_rt_eff, full_z_eff