From ba7b9c0a8b2c878a0a25a4af9d9e23059cef68a8 Mon Sep 17 00:00:00 2001 From: segomezlo <92443572+sebastiangomezlopez@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:23:20 +0200 Subject: [PATCH] PyGRB: Propagating slide_id information across postprocessing plotting scripts (#4809) * Update pygrb_plotting_utils.py Adding the option to feed a pyplot figure object to ease debugging * Update pygrb_postprocessing_utils.py load_triggers: Adding the capability to select triggers with certain slide_id * Update pycbc_pygrb_plot_snr_timeseries pycbc_pygrb_plot_snr_timeseries: propagating the slide_id functionalities across plotting scripts. We only want to show slide_id=0 in this plots * Update pycbc_pygrb_plot_chisq_veto Update pycbc_pygrb_plot_chisq_veto: propagating slide_id functionalities across plotting scripts. Here we only want to display slide_id=0 * Update pycbc_pygrb_plot_coh_ifosnr pycbc_pygrb_plot_coh_ifosnr: propagating slide_id functionalities, here we only want to display slide_id=0 * Update pycbc_pygrb_plot_null_stats pycbc_pygrb_plot_null_stats: propagating slide_id information, here we only want to display slide_id=0 * Update pygrb_postprocessing_utils.py * Update pycbc_pygrb_plot_chisq_veto * Update pycbc_pygrb_plot_chisq_veto * Update pycbc_pygrb_plot_chisq_veto * Update pycbc_pygrb_plot_snr_timeseries * Update pygrb_postprocessing_utils.py Minor fixes to satisfy codeclimate * Update pygrb_postprocessing_utils.py * Update pygrb_postprocessing_utils.py * Update pygrb_postprocessing_utils.py * Update pygrb_postprocessing_utils.py * Update pycbc_pygrb_plot_snr_timeseries * Update pygrb_plotting_utils.py * Update pycbc_pygrb_plot_snr_timeseries * Update pygrb_plotting_utils.py * Update pygrb_plotting_utils.py * Propagating slide_id information to some pygrb plotting scripts * Update pygrb_postprocessing_utils.py * Modification to slides parser. Update to plotting scripts * Update pycbc_pygrb_plot_snr_timeseries * Update pygrb_postprocessing_utils.py * Update pygrb_postprocessing_utils.py * Update pycbc_pygrb_plot_snr_timeseries * Update pycbc_pygrb_plot_snr_timeseries * Update pycbc_pygrb_plot_snr_timeseries * Update pygrb_postprocessing_utils.py * Update pygrb_postprocessing_utils.py * Update pycbc_pygrb_plot_snr_timeseries * Update pygrb_postprocessing_utils.py * Update pygrb_postprocessing_utils.py * Update pycbc/results/pygrb_postprocessing_utils.py Co-authored-by: Francesco Pannarale --------- Co-authored-by: Francesco Pannarale --- bin/pygrb/pycbc_pygrb_plot_chisq_veto | 16 +++++--- bin/pygrb/pycbc_pygrb_plot_coh_ifosnr | 10 +++-- bin/pygrb/pycbc_pygrb_plot_null_stats | 16 +++++--- bin/pygrb/pycbc_pygrb_plot_snr_timeseries | 21 +++++++---- pycbc/results/pygrb_postprocessing_utils.py | 42 ++++++++++++++++++++- 5 files changed, 83 insertions(+), 22 deletions(-) diff --git a/bin/pygrb/pycbc_pygrb_plot_chisq_veto b/bin/pygrb/pycbc_pygrb_plot_chisq_veto index 2a624083a12..966798fc0f2 100644 --- a/bin/pygrb/pycbc_pygrb_plot_chisq_veto +++ b/bin/pygrb/pycbc_pygrb_plot_chisq_veto @@ -49,7 +49,7 @@ __program__ = "pycbc_pygrb_plot_chisq_veto" # Functions # ============================================================================= # Function to load trigger data: includes applying cut in reweighted SNR -def load_data(input_file, ifos, vetoes, opts, injections=False): +def load_data(input_file, ifos, vetoes, opts, injections=False, slide_id=None): """Load data from a trigger/injection file""" snr_type = opts.snr_type @@ -71,12 +71,14 @@ def load_data(input_file, ifos, vetoes, opts, injections=False): # This will eventually become load_injections trigs_or_injs = \ ppu.load_triggers(input_file, ifos, vetoes, - rw_snr_threshold=rw_snr_threshold) + rw_snr_threshold=rw_snr_threshold, + slide_id=slide_id) else: logging.info("Loading triggers...") trigs_or_injs = \ ppu.load_triggers(input_file, ifos, vetoes, - rw_snr_threshold=rw_snr_threshold) + rw_snr_threshold=rw_snr_threshold, + slide_id=slide_id) # Count surviving points num_trigs_or_injs = len(trigs_or_injs['network/reweighted_snr']) @@ -187,7 +189,9 @@ parser.add_argument("--snr-type", default='coherent', 'single'], help="SNR value to plot on x-axis.") ppu.pygrb_add_bestnr_cut_opt(parser) ppu.pygrb_add_bestnr_opts(parser) +ppu.pygrb_add_slide_opts(parser) opts = parser.parse_args() +ppu.slide_opts_helper(opts) init_logging(opts.verbose, format="%(asctime)s: %(levelname)s: %(message)s") @@ -249,10 +253,12 @@ if ifo and ifo not in ifos: raise RuntimeError(err_msg) # Extract trigger data -trig_data = load_data(trig_file, ifos, vetoes, opts) +trig_data = load_data(trig_file, ifos, vetoes, opts, + slide_id=opts.slide_id) # Extract (or initialize) injection data -inj_data = load_data(found_missed_file, ifos, vetoes, opts, injections=True) +inj_data = load_data(found_missed_file, ifos, vetoes, opts, + injections=True, slide_id=0) # Sanity checks if trig_data[snr_type] is None and inj_data[snr_type] is None: diff --git a/bin/pygrb/pycbc_pygrb_plot_coh_ifosnr b/bin/pygrb/pycbc_pygrb_plot_coh_ifosnr index 1770bb27e71..5a2b88321e2 100644 --- a/bin/pygrb/pycbc_pygrb_plot_coh_ifosnr +++ b/bin/pygrb/pycbc_pygrb_plot_coh_ifosnr @@ -54,7 +54,7 @@ __program__ = "pycbc_pygrb_plot_coh_ifosnr" # Functions # ============================================================================= # Function to load trigger data -def load_data(input_file, ifos, vetoes, opts, injections=False): +def load_data(input_file, ifos, vetoes, opts, injections=False, slide_id=None): """Load data from a trigger/injection file""" # Initialize the dictionary @@ -75,6 +75,7 @@ def load_data(input_file, ifos, vetoes, opts, injections=False): ifos, vetoes, rw_snr_threshold=opts.newsnr_threshold, + slide_id=slide_id ) else: logging.info("Loading triggers...") @@ -83,6 +84,7 @@ def load_data(input_file, ifos, vetoes, opts, injections=False): ifos, vetoes, rw_snr_threshold=opts.newsnr_threshold, + slide_id=slide_id ) # Load SNR data @@ -186,7 +188,9 @@ parser.add_argument( help="Output file a zoomed in version of the plot.", ) ppu.pygrb_add_bestnr_cut_opt(parser) +ppu.pygrb_add_slide_opts(parser) opts = parser.parse_args() +ppu.slide_opts_helper(opts) init_logging(opts.verbose, format="%(asctime)s: %(levelname)s: %(message)s") @@ -232,10 +236,10 @@ ifos, vetoes = ppu.extract_ifos_and_vetoes( ) # Extract trigger data -trig_data = load_data(trig_file, ifos, vetoes, opts) +trig_data = load_data(trig_file, ifos, vetoes, opts, slide_id=opts.slide_id) # Extract (or initialize) injection data -inj_data = load_data(found_file, ifos, vetoes, opts, injections=True) +inj_data = load_data(found_file, ifos, vetoes, opts, injections=True, slide_id=0) # Generate plots logging.info("Plotting...") diff --git a/bin/pygrb/pycbc_pygrb_plot_null_stats b/bin/pygrb/pycbc_pygrb_plot_null_stats index 187b366962d..2a3f1b95029 100644 --- a/bin/pygrb/pycbc_pygrb_plot_null_stats +++ b/bin/pygrb/pycbc_pygrb_plot_null_stats @@ -47,7 +47,7 @@ __program__ = "pycbc_pygrb_plot_null_stats" # Functions # ============================================================================= # Function to load trigger data -def load_data(input_file, ifos, vetoes, opts, injections=False): +def load_data(input_file, ifos, vetoes, opts, injections=False, slide_id=None): """Load data from a trigger/injection file""" null_stat_type = opts.y_variable @@ -63,12 +63,14 @@ def load_data(input_file, ifos, vetoes, opts, injections=False): # This will eventually become ppu.load_injections() trigs_or_injs = \ ppu.load_triggers(input_file, ifos, vetoes, - rw_snr_threshold=opts.newsnr_threshold) + rw_snr_threshold=opts.newsnr_threshold, + slide_id=slide_id) else: logging.info("Loading triggers...") trigs_or_injs = \ ppu.load_triggers(input_file, ifos, vetoes, - rw_snr_threshold=opts.newsnr_threshold) + rw_snr_threshold=opts.newsnr_threshold, + slide_id=slide_id) # Coherent SNR is always used data['coherent'] = trigs_or_injs['network/coherent_snr'] @@ -140,7 +142,9 @@ parser.add_argument("-y", "--y-variable", default=None, help="Quantity to plot on the vertical axis.") ppu.pygrb_add_null_snr_opts(parser) ppu.pygrb_add_bestnr_cut_opt(parser) +ppu.pygrb_add_slide_opts(parser) opts = parser.parse_args() +ppu.slide_opts_helper(opts) init_logging(opts.verbose, format="%(asctime)s: %(levelname)s: %(message)s") @@ -182,10 +186,12 @@ ifos, vetoes = ppu.extract_ifos_and_vetoes(trig_file, opts.veto_files, opts.veto_category) # Extract trigger data -trig_data = load_data(trig_file, ifos, vetoes, opts) +trig_data = load_data(trig_file, ifos, vetoes, opts, + slide_id=opts.slide_id) # Extract (or initialize) injection data -inj_data = load_data(found_missed_file, ifos, vetoes, opts, injections=True) +inj_data = load_data(found_missed_file, ifos, vetoes, opts, + injections=True, slide_id=0) # Generate plots logging.info("Plotting...") diff --git a/bin/pygrb/pycbc_pygrb_plot_snr_timeseries b/bin/pygrb/pycbc_pygrb_plot_snr_timeseries index 7fe26f270f3..2f8266cca32 100644 --- a/bin/pygrb/pycbc_pygrb_plot_snr_timeseries +++ b/bin/pygrb/pycbc_pygrb_plot_snr_timeseries @@ -50,7 +50,7 @@ __program__ = "pycbc_pygrb_plot_snr_timeseries" # ============================================================================= # Load trigger data def load_data(input_file, ifos, vetoes, rw_snr_threshold=None, - injections=False): + injections=False, slide_id=None): """Load data from a trigger/injection file""" trigs_or_injs = None @@ -60,12 +60,14 @@ def load_data(input_file, ifos, vetoes, rw_snr_threshold=None, # This will eventually become load_injections trigs_or_injs = \ ppu.load_triggers(input_file, ifos, vetoes, - rw_snr_threshold=rw_snr_threshold) + rw_snr_threshold=rw_snr_threshold, + slide_id=slide_id) else: logging.info("Loading triggers...") trigs_or_injs = \ ppu.load_triggers(input_file, ifos, vetoes, - rw_snr_threshold=rw_snr_threshold) + rw_snr_threshold=rw_snr_threshold, + slide_id=slide_id) return trigs_or_injs @@ -107,7 +109,9 @@ parser.add_argument("-y", "--y-variable", default=None, choices=['coherent', 'single', 'reweighted', 'null'], help="Quantity to plot on the vertical axis.") ppu.pygrb_add_bestnr_cut_opt(parser) +ppu.pygrb_add_slide_opts(parser) opts = parser.parse_args() +ppu.slide_opts_helper(opts) init_logging(opts.verbose, format="%(asctime)s: %(levelname)s: %(message)s") @@ -136,21 +140,24 @@ ifos, vetoes = ppu.extract_ifos_and_vetoes(trig_file, opts.veto_files, # points to show the impact of the cut, otherwise remove points with # reweighted SNR below threshold if snr_type == 'reweighted': - trig_data = load_data(trig_file, ifos, vetoes) + trig_data = load_data(trig_file, ifos, vetoes, + slide_id=opts.slide_id) trig_data['network/reweighted_snr'] = \ reweightedsnr_cut(trig_data['network/reweighted_snr'], opts.newsnr_threshold) - inj_data = load_data(inj_file, ifos, vetoes, injections=True) + inj_data = load_data(inj_file, ifos, vetoes, injections=True, + slide_id=0) if inj_data is not None: inj_data['network/reweighted_snr'] = \ reweightedsnr_cut(inj_data['network/reweighted_snr'], opts.newsnr_threshold) else: trig_data = load_data(trig_file, ifos, vetoes, - rw_snr_threshold=opts.newsnr_threshold) + rw_snr_threshold=opts.newsnr_threshold, + slide_id=opts.slide_id) inj_data = load_data(inj_file, ifos, vetoes, rw_snr_threshold=opts.newsnr_threshold, - injections=True) + injections=True, slide_id=0) # Specify HDF file keys for x quantity (time) and y quantity (SNR) if snr_type == 'single': diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 8f9fd82fb0b..99e562f1df5 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -92,10 +92,29 @@ def pygrb_initialize_plot_parser(description=None): parser.add_argument('--plot-caption', default=None, help="If provided, use the given string as the plot " + "caption") - return parser +def pygrb_add_slide_opts(parser): + """Add to parser object arguments related to short timeslides""" + parser.add_argument("--slide-id", type=str, default='0', + help="If all, the plotting scripts will use triggers" + + "from all short slides.") + + +def slide_opts_helper(args): + """ + This function overwrites the types of input slide_id information + when loading data in postprocessing scripts. + """ + if args.slide_id.isdigit(): + args.slide_id = int(args.slide_id) + elif args.slide_id.lower() == "all": + args.slide_id = None + else: + raise ValueError("--slide-id must be the string all or an int") + + def pygrb_add_injmc_opts(parser): """Add to parser object the arguments used for Monte-Carlo on distance.""" if parser is None: @@ -175,6 +194,20 @@ def pygrb_add_bestnr_cut_opt(parser): "Default 0: all events are considered.") +# ============================================================================= +# Wrapper to pick triggers with certain slide_ids +# ============================================================================= +def slide_filter(trig_file, data, slide_id=None): + """ + This function adds the capability to select triggers with specific + slide_ids during the postprocessing stage of PyGRB. + """ + if slide_id is None: + return data + mask = numpy.where(trig_file['network/slide_id'][:] == slide_id)[0] + return data[mask] + + # ============================================================================= # Wrapper to read segments files # ============================================================================= @@ -359,7 +392,8 @@ def dataset_iterator(g, prefix=''): yield from dataset_iterator(item, path) -def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None): +def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None, + slide_id=None): """Loads triggers from PyGRB output file, returning a dictionary""" trigs = HFile(input_file, 'r') @@ -410,6 +444,10 @@ def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None): else: trigs_dict[path] = dset[above_thresh] + if trigs_dict[path].size == trigs['network/slide_id'][:].size: + trigs_dict[path] = slide_filter(trigs, trigs_dict[path], + slide_id=slide_id) + return trigs_dict