diff --git a/bin/pygrb/pycbc_grb_inj_finder b/bin/pygrb/pycbc_grb_inj_finder index 623e45c231e..230e09d3bc9 100644 --- a/bin/pygrb/pycbc_grb_inj_finder +++ b/bin/pygrb/pycbc_grb_inj_finder @@ -42,6 +42,7 @@ from ligo.segments.utils import fromsegwizard from pycbc import __version__ from pycbc.inject import InjectionSet from pycbc.io import FieldArray +from pycbc.results.pygrb_postprocessing_utils import template_hash_to_id __author__ = "Duncan Macleod " @@ -96,6 +97,8 @@ def read_hdf5_triggers(inputfiles, verbose=False): elif name.startswith("network") and \ NETWORK_IFO_EVENT_ID_REGEX.match(name): return + elif "template_id" in name: + return else: shape = obj.shape dtype = obj.dtype @@ -126,7 +129,7 @@ def read_hdf5_triggers(inputfiles, verbose=False): # copy dataset contents for filename in inputfiles: with h5py.File(filename, 'r') as h5in: - # Skipping emtpy files + # Skipping empty files if len(h5in['network']['end_time_gc'][:]): for dset in datasets: data = h5in[dset][:] @@ -174,6 +177,11 @@ parser.add_argument( nargs="+", help="path(s) to input injection file(s)", ) +parser.add_argument( + "--bank-file", + required=True, + help="full template bank file for mapping hashes to ids", +) parser.add_argument( "-o", "--output-dir", @@ -329,12 +337,21 @@ with h5py.File(outfilename, "w") as h5out: for x in out_trigs: xgroup = h5out.create_group(x) for col in out_trigs[x]: + if "template_id" == col: + continue xgroup.create_dataset( col, data=out_trigs[x][col], compression="gzip", compression_opts=9, ) - + # map and write template ids + ids = template_hash_to_id(h5out, args.bank_file) + h5out.create_dataset( + "network/template_id", + data=ids, + compression="gzip", + compression_opts=9, + ) if args.verbose: print("Found/missed written to {}".format(outfilename)) diff --git a/bin/pygrb/pycbc_grb_trig_combiner b/bin/pygrb/pycbc_grb_trig_combiner index 84cdf0b4ca5..e1687a3c237 100644 --- a/bin/pygrb/pycbc_grb_trig_combiner +++ b/bin/pygrb/pycbc_grb_trig_combiner @@ -36,6 +36,7 @@ from ligo import segments from ligo.segments.utils import fromsegwizard from pycbc import __version__ +from pycbc.results.pygrb_postprocessing_utils import template_hash_to_id __author__ = "Duncan Macleod " @@ -55,6 +56,8 @@ def _init_combined_file(h5file, attributes, datasets, **compression_kw): h5file.attrs.update(attributes) # create datasets for dset, (shape, dtype) in datasets.items(): + if "template_id" in dset: + continue h5file.create_dataset(dset, shape=shape, dtype=dtype, **compression_kw) @@ -207,6 +210,11 @@ def merge_hdf5_files(inputfiles, outputfile, verbose=False, **compression_kw): else: ifo_event_id_pos = None + # Template ID becomes unhelpful in the merged file. + # This will be handled separately. + if "template_id" in dset: + continue + # merge dataset position[dset] += _merge_dataset( h5in[dset], h5out[dset], position[dset], @@ -218,6 +226,12 @@ def merge_hdf5_files(inputfiles, outputfile, verbose=False, **compression_kw): while search_datasets: dataset_names.remove(search_datasets.pop()) + template_ids = template_hash_to_id(trigger_file=h5out, bank_path=args.bank_file) + h5out.create_dataset("network/template_id", + data=template_ids, + compression="gzip", + compression_opts=9) + if verbose: print("Merged triggers written to {}".format(outputfile)) return outputfile @@ -423,6 +437,11 @@ parser.add_argument( metavar="TRIGGER FILE", help="read in listed trigger files", ) +parser.add_argument( + "--bank-file", + required=True, + help="full template bank file for mapping hashes to ids", +) parser.add_argument( "-o", "--output-dir", diff --git a/bin/pygrb/pycbc_make_offline_grb_workflow b/bin/pygrb/pycbc_make_offline_grb_workflow index 3dd1ee9d831..59160f4f79c 100644 --- a/bin/pygrb/pycbc_make_offline_grb_workflow +++ b/bin/pygrb/pycbc_make_offline_grb_workflow @@ -273,9 +273,14 @@ all_files.extend(datafind_veto_files) # TEMPLATE BANK AND SPLIT BANK bank_files = _workflow.setup_tmpltbank_workflow(wflow, sciSegs, datafind_files, df_dir) +# Check there are not multiple banks +if len(bank_files) > 1: + raise NotImplementedError("Multiple banks not supported") +full_bank_file = bank_files[0] +# Note: setup_splittable_workflow requires a FileList as input splitbank_files = _workflow.setup_splittable_workflow(wflow, bank_files, df_dir, tags=["inspiral"]) -all_files.extend(bank_files) +all_files.append(full_bank_file) all_files.extend(splitbank_files) # INJECTIONS @@ -411,7 +416,7 @@ if post_proc_method == "PYGRB_OFFLINE": # The content and structure of pp_files are described in the definition # of _workflow.setup_pygrb_pp_workflow pp_files = _workflow.setup_pygrb_pp_workflow(wflow, pp_dir, seg_dir, - sciSegs[ifo][0], inspiral_files, + sciSegs[ifo][0], full_bank_file, inspiral_files, injs, inj_insp_files, inj_tags) sec_name = 'workflow-pygrb_pp_workflow' if not wflow.cp.has_section(sec_name): diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 8760a025224..531f7124a40 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -310,16 +310,17 @@ def _get_id_numbers(ligolw_table, column): # ============================================================================= # Function to build a dictionary (indexed by ifo) of time-slid vetoes # ============================================================================= -def _slide_vetoes(vetoes, slide_dict_or_list, slide_id): +def _slide_vetoes(vetoes, slide_dict_or_list, slide_id, ifos): """Build a dictionary (indexed by ifo) of time-slid vetoes""" # Copy vetoes - slid_vetoes = copy.deepcopy(vetoes) - - # Slide them - ifos = vetoes.keys() - for ifo in ifos: - slid_vetoes[ifo].shift(-slide_dict_or_list[slide_id][ifo]) + if vetoes is not None: + slid_vetoes = copy.deepcopy(vetoes) + # Slide them + for ifo in ifos: + slid_vetoes[ifo].shift(-slide_dict_or_list[slide_id][ifo]) + else: + slid_vetoes = {ifo: segments.segmentlist() for ifo in ifos} return slid_vetoes @@ -461,36 +462,39 @@ def sort_trigs(trial_dict, trigs, slide_dict, seg_dict): sorted_trigs = {} # Begin by sorting the triggers into each slide - # New seems pretty slow, so run it once and then use deepcopy - tmp_table = glsctables.New(glsctables.MultiInspiralTable) for slide_id in slide_dict: - sorted_trigs[slide_id] = copy.deepcopy(tmp_table) - for trig in trigs: - sorted_trigs[int(trig.time_slide_id)].append(trig) + sorted_trigs[slide_id] = [] + for slide_id, event_id in zip(trigs['network/slide_id'], + trigs['network/event_id']): + sorted_trigs[slide_id].append(event_id) for slide_id in slide_dict: # These can only *reduce* the analysis time curr_seg_list = seg_dict[slide_id] # Check the triggers are all in the analysed segment lists - for trig in sorted_trigs[slide_id]: - if trig.end_time not in curr_seg_list: + for event_id in sorted_trigs[slide_id]: + index = numpy.flatnonzero(trigs['network/event_id'] == event_id)[0] + end_time = trigs['network/end_time_gc'][index] + if end_time not in curr_seg_list: # This can be raised if the trigger is on the segment boundary, # so check if the trigger is within 1/100 of a second within # the list - if trig.get_end() + 0.01 in curr_seg_list: + if end_time + 0.01 in curr_seg_list: continue - if trig.get_end() - 0.01 in curr_seg_list: + if end_time - 0.01 in curr_seg_list: continue err_msg = "Triggers found in input files not in the list of " err_msg += "analysed segments. This should not happen." raise RuntimeError(err_msg) # END OF CHECK # - # The below line works like the inverse of .veto and only returns trigs - # that are within the segment specified by trial_dict[slide_id] - sorted_trigs[slide_id] = \ - sorted_trigs[slide_id].vetoed(trial_dict[slide_id]) + # Keep triggers that are in trial_dict + sorted_trigs[slide_id] = [event_id for event_id in + sorted_trigs[slide_id] + if trigs['network/end_time_gc'][ + trigs['network/event_id'] == event_id][0] + in trial_dict[slide_id]] return sorted_trigs @@ -498,8 +502,7 @@ def sort_trigs(trial_dict, trigs, slide_dict, seg_dict): # ============================================================================= # Extract basic trigger properties and store them as dictionaries # ============================================================================= -def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict, - opts): +def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict): """Extract and store as dictionaries time, SNR, and BestNR of time-slid triggers""" @@ -507,16 +510,6 @@ def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict, sorted_trigs = sort_trigs(trial_dict, trigs, slide_dict, seg_dict) logging.info("Triggers sorted.") - # Local copies of variables entering the BestNR definition - chisq_index = opts.chisq_index - chisq_nhigh = opts.chisq_nhigh - null_thresh = list(map(float, opts.null_snr_threshold.split(','))) - snr_thresh = opts.snr_threshold - sngl_snr_thresh = opts.sngl_snr_threshold - new_snr_thresh = opts.newsnr_threshold - null_grad_thresh = opts.null_grad_thresh - null_grad_val = opts.null_grad_val - # Build the 3 dictionaries trig_time = {} trig_snr = {} @@ -524,21 +517,12 @@ def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict, for slide_id in slide_dict: slide_trigs = sorted_trigs[slide_id] if slide_trigs: - trig_time[slide_id] = numpy.asarray(slide_trigs.get_end()).\ - astype(float) - trig_snr[slide_id] = numpy.asarray(slide_trigs.get_column('snr')) + trig_time[slide_id] = trigs['network/end_time_gc'][slide_trigs] + trig_snr[slide_id] = trigs['network/coherent_snr'][slide_trigs] else: trig_time[slide_id] = numpy.asarray([]) trig_snr[slide_id] = numpy.asarray([]) - trig_bestnr[slide_id] = get_bestnrs(slide_trigs, - q=chisq_index, - n=chisq_nhigh, - null_thresh=null_thresh, - snr_threshold=snr_thresh, - sngl_snr_threshold=sngl_snr_thresh, - chisq_threshold=new_snr_thresh, - null_grad_thresh=null_grad_thresh, - null_grad_val=null_grad_val) + trig_bestnr[slide_id] = trigs['network/reweighted_snr'][slide_trigs] logging.info("Time, SNR, and BestNR of triggers extracted.") return trig_time, trig_snr, trig_bestnr @@ -699,7 +683,7 @@ def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes): seg_buffer.coalesce() # Construct the ifo-indexed dictionary of slid veteoes - slid_vetoes = _slide_vetoes(vetoes, slide_dict, slide_id) + slid_vetoes = _slide_vetoes(vetoes, slide_dict, slide_id, ifos) # Construct trial list and check against buffer trial_dict[slide_id] = segments.segmentlist() @@ -804,3 +788,24 @@ def get_coinc_snr(trigs_or_injs, ifos): coinc_snr = numpy.sqrt(snr_sum_square) return coinc_snr + + +def template_hash_to_id(trigger_file, bank_path): + """ + This function converts the template hashes from a trigger file + into 'template_id's that represent indices of the + templates within the bank. + Parameters + ---------- + trigger_file: h5py File object for trigger file + bank_file: filepath for template bank + """ + with h5py.File(bank_path, "r") as bank: + hashes = bank['template_hash'][:] + ifos = [k for k in trigger_file.keys() if k != 'network'] + trig_hashes = trigger_file[f'{ifos[0]}/template_hash'][:] + trig_ids = numpy.zeros(trig_hashes.shape[0], dtype=int) + for idx, t_hash in enumerate(hashes): + matches = numpy.where(trig_hashes == t_hash) + trig_ids[matches] = idx + return trig_ids diff --git a/pycbc/workflow/grb_utils.py b/pycbc/workflow/grb_utils.py index 8f96f793d39..2d2113516b3 100644 --- a/pycbc/workflow/grb_utils.py +++ b/pycbc/workflow/grb_utils.py @@ -233,8 +233,8 @@ def get_sky_grid_scale( return out -def setup_pygrb_pp_workflow(wf, pp_dir, seg_dir, segment, insp_files, - inj_files, inj_insp_files, inj_tags): +def setup_pygrb_pp_workflow(wf, pp_dir, seg_dir, segment, bank_file, + insp_files, inj_files, inj_insp_files, inj_tags): """ Generate post-processing section of PyGRB offline workflow """ @@ -258,7 +258,7 @@ def setup_pygrb_pp_workflow(wf, pp_dir, seg_dir, segment, insp_files, job_instance = exe_class(wf.cp, "trig_combiner") # Create node for coherent no injections jobs node, trig_files = job_instance.create_node(wf.ifos, seg_dir, segment, - insp_files, pp_dir) + insp_files, pp_dir, bank_file) wf.add_node(node) pp_outs.append(trig_files) @@ -285,7 +285,7 @@ def setup_pygrb_pp_workflow(wf, pp_dir, seg_dir, segment, insp_files, if inj_tag in f.tags[1]]) node, inj_find_file = job_instance.create_node( tag_inj_files, tag_insp_files, - pp_dir) + bank_file, pp_dir) wf.add_node(node) inj_find_files.append(inj_find_file) pp_outs.append(inj_find_files) @@ -321,7 +321,7 @@ def __init__(self, cp, name): self.num_trials = int(cp.get('trig_combiner', 'num-trials')) def create_node(self, ifo_tag, seg_dir, segment, insp_files, - out_dir, tags=None): + out_dir, bank_file, tags=None): node = Node(self) node.add_opt('--verbose') node.add_opt("--ifo-tag", ifo_tag) @@ -331,6 +331,7 @@ def create_node(self, ifo_tag, seg_dir, segment, insp_files, node.add_input_list_opt("--input-files", insp_files) node.add_opt("--user-tag", "PYGRB") node.add_opt("--num-trials", self.num_trials) + node.add_input_opt("--bank-file", bank_file) # Prepare output file tag user_tag = f"PYGRB_GRB{self.trigger_name}" if tags: @@ -385,13 +386,14 @@ class PycbcGrbInjFinderExecutable(Executable): def __init__(self, cp, exe_name): super().__init__(cp=cp, name=exe_name) - def create_node(self, inj_files, inj_insp_files, + def create_node(self, inj_files, inj_insp_files, bank_file, out_dir, tags=None): if tags is None: tags = [] node = Node(self) node.add_input_list_opt('--input-files', inj_insp_files) node.add_input_list_opt('--inj-files', inj_files) + node.add_input_opt('--bank-file', bank_file) ifo_tag, desc, segment = filename_metadata(inj_files[0].name) desc = '_'.join(desc.split('_')[:-1]) out_name = "{}-{}_FOUNDMISSED-{}-{}.h5".format(