From 5a785f107050be19d8a691117d9f726b674f3b27 Mon Sep 17 00:00:00 2001 From: Alastair Hewitt Date: Mon, 20 Oct 2025 23:19:15 -0400 Subject: [PATCH 1/2] Allow trimming oversized commutator covariances --- README.md | 13 ++++- docs/issues/phase5-simulations-theory.md | 1 + scripts/compute_commutator.py | 71 +++++++++++++++++++++++- scripts/run_null_sims.py | 44 ++++++++++++++- scripts/run_order_A_to_B.py | 33 ++++++----- scripts/run_order_B_to_A.py | 33 ++++++----- src/comet/simulations.py | 4 +- 7 files changed, 166 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 09436a2..017b247 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,12 @@ earlier. Inspect the terminal summary for the covariance size and record the random seed alongside the command in your lab notebook. + *Reusing legacy binning:* if you already have a long-running + covariance generated with the pre-preregistration CLI defaults (for + example, a 69×69 matrix from `--nlb 50`), rerun both ordering scripts + with `--disable-prereg` and matching `--nlb`/`--lmax` settings so the + Δ bandpowers align with that covariance. + 5. **Form the commutator residual and null statistic.** ```bash micromamba run -n comet python scripts/compute_commutator.py \ @@ -196,9 +202,12 @@ earlier. --cov artifacts/cov_delta_full.npy \ --out-delta artifacts/delta_ell_full.npy \ --out-summary artifacts/summary_full.json - ``` + ``` The resulting JSON contains the Δ vector length and the stabilized - χ ("z") statistic for the null test. + χ ("z") statistic for the null test. When reusing an older + covariance that has one or two extra high-ℓ bins, add + `--trim-covariance` to drop those trailing rows/columns so the + matrix matches the Δ bandpowers. 6. **Assemble the science cross-spectrum.** Average the two orderings, compare to theory, and compute per-bin significances: diff --git a/docs/issues/phase5-simulations-theory.md b/docs/issues/phase5-simulations-theory.md index 955da10..181a4f1 100644 --- a/docs/issues/phase5-simulations-theory.md +++ b/docs/issues/phase5-simulations-theory.md @@ -38,3 +38,4 @@ issues: ## Verification - `pytest` (with simulation dependencies available) executes the regression suite, confirming both the loader and covariance estimator satisfy the roadmap acceptance criteria for Phase 5.【F:tests/test_simulation_covariance.py†L12-L36】【F:tests/test_theory_loader.py†L17-L157】 +- Manual production run: `scripts/run_null_sims.py --theory artifacts/theory_planck_smica_lensing.npz --nsims 1000 --nside 2048` completed after approximately three days on a 12-thread Zen 5 workstation and produced `artifacts/cov_delta_full.npy` with shape 69×69 when invoked with the legacy CLI binning (`--disable-prereg --nlb 50`), matching the publication configuration logged by the CLI summary. Later science stages can reuse that covariance either by running both ordering scripts with the same CLI geometry or, when only a couple of trailing high-ℓ bins are extra, by trimming them via `scripts/compute_commutator.py --trim-covariance`. diff --git a/scripts/compute_commutator.py b/scripts/compute_commutator.py index ed971b1..07c556c 100755 --- a/scripts/compute_commutator.py +++ b/scripts/compute_commutator.py @@ -53,6 +53,16 @@ def _save_npy(arr: np.ndarray, path: Path): np.save(path.as_posix(), arr) +def _load_order_metadata(path: Path) -> dict | None: + sidecar = path.with_suffix(".json") + if not sidecar.exists(): + return None + try: + return json.loads(sidecar.read_text()) + except json.JSONDecodeError: + return None + + def main(): ap = argparse.ArgumentParser(description="Compute commutator Δ and Z") ap.add_argument("--order-a", default="artifacts/order_A_to_B.npz") @@ -60,6 +70,15 @@ def main(): ap.add_argument("--cov", default="artifacts/cov_delta.npy") ap.add_argument("--out-delta", default="artifacts/delta_ell.npy") ap.add_argument("--out-summary", default="artifacts/summary.json") + ap.add_argument( + "--trim-covariance", + action="store_true", + help=( + "Allow dropping trailing bins from a larger covariance matrix so it matches " + "the Δ bandpowers. Use only when you are certain the first bins share the " + "same geometry." + ), + ) args = ap.parse_args() A = np.load(args.order_a)["cl"] @@ -72,10 +91,57 @@ def main(): z = None cov_path = Path(args.cov) + covariance_trimmed = False if cov_path.exists(): C = np.load(cov_path) - if C.shape[0] != C.shape[1] or C.shape[0] != delta.size: - raise ValueError(f"Covariance shape {C.shape} incompatible with delta {delta.shape}") + if C.ndim != 2: + raise ValueError(f"Covariance must be 2-D; received array with shape {C.shape}") + if C.shape[0] != C.shape[1]: + raise ValueError(f"Covariance must be square; received shape {C.shape}") + if C.shape[0] != delta.size: + if args.trim_covariance and C.shape[0] > delta.size: + C = C[: delta.size, : delta.size] + covariance_trimmed = True + else: + meta_a = _load_order_metadata(Path(args.order_a)) + meta_b = _load_order_metadata(Path(args.order_b)) + order_details = [] + if meta_a: + order_details.append( + f"{Path(args.order_a).name}: nbins={meta_a.get('nbins')} " + f"nside={meta_a.get('nside')}" + ) + if meta_b: + order_details.append( + f"{Path(args.order_b).name}: nbins={meta_b.get('nbins')} " + f"nside={meta_b.get('nside')}" + ) + hint_lines = [ + ( + "Ensure the null simulations and bandpower orderings use the same " + "binning and mask." + ), + ( + "Re-run scripts/run_null_sims.py with the prereg configuration or CLI " + "parameters that match your orderings." + ), + ( + "To reuse a legacy covariance produced with the CLI defaults, rerun both " + "ordering scripts with --disable-prereg and matching --nlb/--lmax " + "settings." + ), + ] + hint = " ".join(hint_lines) + context = "; ".join(order_details) if order_details else "" + if context: + hint = f"{hint} ({context})" + extra_hint = "" + if C.shape[0] > delta.size: + extra_hint = " Pass --trim-covariance to drop unmatched high-ℓ bins." + raise ValueError( + f"Covariance shape {C.shape} incompatible with delta {delta.shape}. " + f"{hint}{extra_hint}" + ) z = _stable_z(delta, C) summary = { @@ -89,6 +155,7 @@ def main(): "outputs": { "delta": args.out_delta, }, + "covariance_trimmed": covariance_trimmed, } _save_json(summary, Path(args.out_summary)) print(json.dumps({"msg": f"delta bins={delta.size} z={summary['z']}"}, sort_keys=True)) diff --git a/scripts/run_null_sims.py b/scripts/run_null_sims.py index a7799ec..f961610 100755 --- a/scripts/run_null_sims.py +++ b/scripts/run_null_sims.py @@ -15,6 +15,7 @@ from comet.theory import load_theory from commutator_common import ( build_mask, + load_bins_from_prereg, nm_bins_from_params, read_map, save_npy, @@ -38,6 +39,19 @@ def main(): ap.add_argument("--quick-nside", type=int, default=256) ap.add_argument("--nlb", type=int, default=50) ap.add_argument("--lmax", type=int, default=None) + ap.add_argument("--threshold-sigma", type=float, default=5.0) + ap.add_argument("--apod-arcmin", type=float, default=30.0) + ap.add_argument( + "--prereg", + type=Path, + default=Path("config/prereg.yaml"), + help="Path to prereg YAML for bin configuration", + ) + ap.add_argument( + "--disable-prereg", + action="store_true", + help="Ignore prereg metadata and use CLI binning/mask parameters", + ) ap.add_argument("--theory", type=Path, required=True, help="Path to theory Cl file (.npz/.txt)") ap.add_argument("--nsims", type=int, default=200) # bumped for better conditioning ap.add_argument("--seed", type=int, default=1234) @@ -51,8 +65,32 @@ def main(): cmb = read_map(d / args.cmb, quick_nside=args.quick_nside) phi = read_map(d / args.phi, quick_nside=args.quick_nside) nside = hp.get_nside(cmb) - mask = (build_mask(cmb) * build_mask(phi)).astype(float) - bins = nm_bins_from_params(nside=nside, lmax=args.lmax, nlb=args.nlb) + mask = ( + build_mask( + cmb, + threshold_sigma=args.threshold_sigma, + apod_arcmin=args.apod_arcmin, + ) + * build_mask( + phi, + threshold_sigma=args.threshold_sigma, + apod_arcmin=args.apod_arcmin, + ) + ).astype(float) + mask = np.clip(mask, 0.0, 1.0) + + bins = None + bins_meta = None + if not args.disable_prereg: + try: + bins, bins_meta = load_bins_from_prereg(args.prereg, nside=nside) + except FileNotFoundError: + pass + except Exception as exc: # pragma: no cover - keep CLI resilient + summary_line(f"failed to load prereg bins: {exc}; falling back to CLI params") + + if bins is None: + bins = nm_bins_from_params(nside=nside, lmax=args.lmax, nlb=args.nlb) theory = load_theory(args.theory) sim_lmax, field_lmax = resolve_simulation_bandlimits( @@ -60,6 +98,7 @@ def main(): requested_lmax=args.lmax, theory_lmax=theory.lmax, nside=nside, + bins_meta=bins_meta, ) geom = SimulationGeometry( mask=mask, @@ -75,6 +114,7 @@ def main(): summary_line( f"wrote {args.out_cov} with shape {cov.shape} nside={nside}" f" lmax={sim_lmax} field_lmax={field_lmax} nsims={args.nsims}" + f" bins={cov.shape[0]}" ) diff --git a/scripts/run_order_A_to_B.py b/scripts/run_order_A_to_B.py index d626c3a..85446e0 100755 --- a/scripts/run_order_A_to_B.py +++ b/scripts/run_order_A_to_B.py @@ -39,6 +39,11 @@ def main(): default=Path("config/prereg.yaml"), help="Path to prereg YAML for bin configuration", ) + ap.add_argument( + "--disable-prereg", + action="store_true", + help="Ignore prereg metadata and use CLI binning/mask parameters", + ) ap.add_argument("--out", default="artifacts/order_A_to_B.npz") args = ap.parse_args() @@ -64,12 +69,13 @@ def main(): bins = None bins_meta = None windows_cfg: WindowConfig | None = None - try: - bins, bins_meta = load_bins_from_prereg(args.prereg, nside=nside) - except FileNotFoundError: - pass - except Exception as exc: # pragma: no cover - keep fallback path available - summary_line(f"failed to load prereg bins: {exc}; falling back to CLI params") + if not args.disable_prereg: + try: + bins, bins_meta = load_bins_from_prereg(args.prereg, nside=nside) + except FileNotFoundError: + pass + except Exception as exc: # pragma: no cover - keep fallback path available + summary_line(f"failed to load prereg bins: {exc}; falling back to CLI params") if bins is None: bins = nm_bins_from_params( @@ -79,13 +85,14 @@ def main(): nlb=args.nlb, ) - try: - windows_cfg = load_windows_from_prereg(args.prereg) - except FileNotFoundError: - pass - except Exception as exc: # pragma: no cover - keep CLI resilient - summary_line(f"failed to load window config: {exc}; using defaults") - windows_cfg = None + if not args.disable_prereg: + try: + windows_cfg = load_windows_from_prereg(args.prereg) + except FileNotFoundError: + pass + except Exception as exc: # pragma: no cover - keep CLI resilient + summary_line(f"failed to load window config: {exc}; using defaults") + windows_cfg = None fallback_lmax: list[int | None] = [] if args.lmax is not None: diff --git a/scripts/run_order_B_to_A.py b/scripts/run_order_B_to_A.py index f6b3af3..0ca81e6 100755 --- a/scripts/run_order_B_to_A.py +++ b/scripts/run_order_B_to_A.py @@ -39,6 +39,11 @@ def main(): default=Path("config/prereg.yaml"), help="Path to prereg YAML for bin configuration", ) + ap.add_argument( + "--disable-prereg", + action="store_true", + help="Ignore prereg metadata and use CLI binning/mask parameters", + ) ap.add_argument("--out", default="artifacts/order_B_to_A.npz") args = ap.parse_args() @@ -64,12 +69,13 @@ def main(): bins = None bins_meta = None windows_cfg: WindowConfig | None = None - try: - bins, bins_meta = load_bins_from_prereg(args.prereg, nside=nside) - except FileNotFoundError: - pass - except Exception as exc: # pragma: no cover - summary_line(f"failed to load prereg bins: {exc}; falling back to CLI params") + if not args.disable_prereg: + try: + bins, bins_meta = load_bins_from_prereg(args.prereg, nside=nside) + except FileNotFoundError: + pass + except Exception as exc: # pragma: no cover + summary_line(f"failed to load prereg bins: {exc}; falling back to CLI params") if bins is None: bins = nm_bins_from_params( @@ -79,13 +85,14 @@ def main(): nlb=args.nlb, ) - try: - windows_cfg = load_windows_from_prereg(args.prereg) - except FileNotFoundError: - pass - except Exception as exc: # pragma: no cover - summary_line(f"failed to load window config: {exc}; using defaults") - windows_cfg = None + if not args.disable_prereg: + try: + windows_cfg = load_windows_from_prereg(args.prereg) + except FileNotFoundError: + pass + except Exception as exc: # pragma: no cover + summary_line(f"failed to load window config: {exc}; using defaults") + windows_cfg = None fallback_lmax: list[int | None] = [] if args.lmax is not None: diff --git a/src/comet/simulations.py b/src/comet/simulations.py index beee161..74ef697 100644 --- a/src/comet/simulations.py +++ b/src/comet/simulations.py @@ -3,7 +3,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any +from typing import Any, Mapping import numpy as np @@ -47,6 +47,7 @@ def resolve_simulation_bandlimits( requested_lmax: int | None, theory_lmax: int | None, nside: int, + bins_meta: Mapping[str, Any] | None = None, ) -> tuple[int, int]: """Determine consistent simulation and field band-limits for NaMaster runs.""" @@ -56,6 +57,7 @@ def resolve_simulation_bandlimits( default_lmax = 3 * nside - 1 bin_lmax = infer_bin_lmax( bins, + bins_meta=bins_meta, fallbacks=(requested_lmax, default_lmax, theory_lmax), ) From c7bffb8b6fc9eef657e1cc21d400f24dd4779496 Mon Sep 17 00:00:00 2001 From: Alastair Hewitt Date: Tue, 21 Oct 2025 00:16:41 -0400 Subject: [PATCH 2/2] fix lint --- src/comet/simulations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/comet/simulations.py b/src/comet/simulations.py index 6127e7a..21ba8a2 100644 --- a/src/comet/simulations.py +++ b/src/comet/simulations.py @@ -4,7 +4,7 @@ from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Mapping +from typing import Any import numpy as np