From b705fa8f2aecf7baa77a3db299faa0df4b497a9f Mon Sep 17 00:00:00 2001 From: Jacob Buchanan <83317255+jakeb245@users.noreply.github.com> Date: Thu, 19 Oct 2023 07:51:02 -0400 Subject: [PATCH] PyGRB writing/loading timeslides (#4427) * Timeslides written and loaded * More commented stuff * Make event manager write slide offset * Slide offset is actually slide offset now * Write full timeslide information * Try writing full slide info another way * Load timeslides using new file output * Load fix + keep sanity checks * Correct number of ids * "slide shift" sounds better than "slide offset" * Bring back slide id * Codeclimate fix * Move time slides column to search group * Load time slides from search group * Avoid layers in output files * Try different placement in eventmgr * Try ifo/search group * Add search group copying to trig cluster * Get number of sets right for bar updating * Add more search group copying to work with bins --- bin/pycbc_multi_inspiral | 6 +++--- bin/pygrb/pycbc_grb_trig_cluster | 11 ++++++++++- bin/pygrb/pycbc_grb_trig_combiner | 12 +++++++++--- pycbc/events/eventmgr.py | 6 ++++-- pycbc/results/pygrb_postprocessing_utils.py | 19 +++++++++---------- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/bin/pycbc_multi_inspiral b/bin/pycbc_multi_inspiral index 53f940529e8..02090589d37 100755 --- a/bin/pycbc_multi_inspiral +++ b/bin/pycbc_multi_inspiral @@ -280,7 +280,7 @@ with ctx: 'bank_chisq': None, 'bank_chisq_dof': None, 'cont_chisq': None, - 'slide_id': int + 'slide_id': None } ifo_names = sorted(ifo_out_vals.keys()) network_out_types = { @@ -303,13 +303,13 @@ with ctx: 'nifo': None, 'my_network_chisq': None, 'reweighted_snr': None, - 'slide_id': int + 'slide_id': None } network_names = sorted(network_out_vals.keys()) event_mgr = EventManagerCoherent( args, args.instruments, ifo_names, [ifo_out_types[n] for n in ifo_names], network_names, - [network_out_types[n] for n in network_names]) + [network_out_types[n] for n in network_names], time_slides=time_slides) logging.info("Read in template bank") bank = waveform.FilterBank( args.bank_file, flen, delta_f, complex64, low_frequency_cutoff=flow, diff --git a/bin/pygrb/pycbc_grb_trig_cluster b/bin/pygrb/pycbc_grb_trig_cluster index 952cad43c2a..0bb8f16a1a7 100644 --- a/bin/pygrb/pycbc_grb_trig_cluster +++ b/bin/pygrb/pycbc_grb_trig_cluster @@ -72,7 +72,8 @@ def slice_hdf5(inputfile, outfile, include, verbose=False): ) for ifo in ifos } - nsets = sum(isinstance(item, h5py.Dataset) for + nsets = sum(isinstance(item, h5py.Dataset) + or isinstance(item, h5py.Group) for group in h5in.values() for item in group.values()) msg = "Slicing {} network events into new file".format(nevents) @@ -88,6 +89,10 @@ def slice_hdf5(inputfile, outfile, include, verbose=False): compression_opts=old.compression_opts, ) bar.update() + elif isinstance(old, h5py.Group): + if "search" in old.name: + h5in.copy(h5in[old.name], h5out, old.name) + bar.update() for ifo in ifos: idx = numpy.in1d(h5in[ifo]["event_id"][()], ifo_index[ifo]) for old in h5in[ifo].values(): @@ -99,6 +104,10 @@ def slice_hdf5(inputfile, outfile, include, verbose=False): compression_opts=old.compression_opts, ) bar.update() + elif isinstance(old, h5py.Group): + if "search" in old.name: + h5in.copy(h5in[old.name], h5out, old.name) + bar.update() bar.close() if verbose: print("Slice written to {}".format(outfile)) diff --git a/bin/pygrb/pycbc_grb_trig_combiner b/bin/pygrb/pycbc_grb_trig_combiner index edddc507627..84cdf0b4ca5 100644 --- a/bin/pygrb/pycbc_grb_trig_combiner +++ b/bin/pygrb/pycbc_grb_trig_combiner @@ -279,6 +279,11 @@ def bin_events(inputfile, bins, outdir, filetag, compression_opts=old.compression_opts, ) bar.update() + elif isinstance(old, h5py.Group): + # If search group, copy it all over + if "search" in old.name: + h5in.copy(h5in[old.name], h5out, old.name) + bar.update() for ifo in ifos: idx = numpy.in1d(h5in[ifo]["event_id"][()], ifo_index[ifo]) for old in h5in[ifo].values(): @@ -290,6 +295,10 @@ def bin_events(inputfile, bins, outdir, filetag, compression_opts=old.compression_opts, ) bar.update() + elif isinstance(old, h5py.Group): + if "search" in old.name: + h5in.copy(h5in[old.name], h5out, old.name) + bar.update() bar.close() if verbose: print("{} written to {}".format(bin_, outf)) @@ -492,9 +501,6 @@ vprint("-- Merging events") if args.short_slides and args.long_slides: raise NotImplementedError -elif args.short_slides and not args.long_slides: - raise NotImplementedError - else: outfilename = "{}-{}_ALL_TIMES-{}-{}.h5".format( args.ifo_tag, args.user_tag, start, end-start, diff --git a/pycbc/events/eventmgr.py b/pycbc/events/eventmgr.py index 56ad265c1d1..9d84af403f9 100644 --- a/pycbc/events/eventmgr.py +++ b/pycbc/events/eventmgr.py @@ -596,7 +596,7 @@ def add_template_events_to_ifo(self, ifo, columns, vectors): class EventManagerCoherent(EventManagerMultiDetBase): def __init__(self, opt, ifos, column, column_types, network_column, - network_column_types, psd=None, **kwargs): + network_column_types, time_slides, psd=None, **kwargs): super(EventManagerCoherent, self).__init__( opt, ifos, column, column_types, psd=None, **kwargs) self.network_event_dtype = \ @@ -612,6 +612,7 @@ def __init__(self, opt, ifos, column, column_types, network_column, self.event_index['network'] = 0 self.template_event_dict['network'] = numpy.array( [], dtype=self.network_event_dtype) + self.time_slides = time_slides def cluster_template_network_events(self, tcolumn, column, window_size, slide=0): @@ -727,6 +728,7 @@ def __setitem__(self, name, data): float(self.opt.sample_rate[ifo_str]) + \ self.opt.gps_start_time[ifo_str] f['time_index'] = ifo_events['time_index'] + f['slide_id'] = ifo_events['slide_id'] try: # Precessing template_sigmasq_plus = numpy.array( @@ -772,7 +774,7 @@ def __setitem__(self, name, data): f['chisq_dof'] = numpy.zeros(len(ifo_events)) f['template_hash'] = th[tid] - + f['search/time_slides'] = numpy.array(self.time_slides[ifo]) if self.opt.trig_start_time: f['search/start_time'] = numpy.array([ self.opt.trig_start_time[ifo]], dtype=numpy.int32) diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 999b44174e4..cc1a4416969 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -615,19 +615,18 @@ def load_injections(inj_file, vetoes, sim_table=False, label=None): # ============================================================================= # Function to load timeslides # ============================================================================= -def load_time_slides(xml_file): +def load_time_slides(hdf_file_path): """Loads timeslides from PyGRB output file as a dictionary""" + hdf_file = h5py.File(hdf_file_path, 'r') + ifos = extract_ifos(hdf_file_path) + ids = numpy.arange(len(hdf_file[f'{ifos[0]}/search/time_slides'])) + time_slide_dict = { + slide_id: { + ifo: hdf_file[f'{ifo}/search/time_slides'][slide_id] + for ifo in ifos} + for slide_id in ids} - # Get all timeslides: these are number_of_ifos * number_of_timeslides - time_slide = load_xml_table(xml_file, glsctables.TimeSlideTable.tableName) - # Get a list of unique timeslide dictionaries - time_slide_list = [dict(i) for i in time_slide.as_dict().values()] - # Turn it into a dictionary indexed by the timeslide ID - time_slide_dict = {int(time_slide.get_time_slide_id(ov)): ov - for ov in time_slide_list} # Check time_slide_ids are ordered correctly. - ids = _get_id_numbers(time_slide, - "time_slide_id")[::len(time_slide_dict[0].keys())] if not (numpy.all(ids[1:] == numpy.array(ids[:-1])+1) and ids[0] == 0): err_msg = "time_slide_ids list should start at zero and increase by " err_msg += "one for every element"