Fix sngls_minifollowup event selection/ordering [master] (#4823)

* Fix sngls_minifollowup event selection/ordering [master] * Remove testing printing * Still create stat class variable even when there are no triggers * Ranking -> statistic rename * minor bugs caught duiring review * oops, accidentally added to this PR
gwastro · Jul 29, 2024 · e4a9db6 · e4a9db6
1 parent ba7b9c0
commit e4a9db6
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 45 deletions.
diff --git a/bin/minifollowups/pycbc_sngl_minifollowup b/bin/minifollowups/pycbc_sngl_minifollowup
@@ -33,7 +33,7 @@ import pycbc.workflow.minifollowups as mini
 import pycbc.workflow as wf
 import pycbc.events
 from pycbc.workflow.core import resolve_url_to_file
-from pycbc.events import stat, veto, coinc
+from pycbc.events import stat, veto
 from pycbc.io import hdf
 
 parser = argparse.ArgumentParser(description=__doc__[1:])
@@ -71,7 +71,8 @@ parser.add_argument('--inspiral-data-analyzed-name',
                          "analyzed by each analysis job.")
 parser.add_argument('--min-sngl-ranking', type=float, default=6.5,
                     help="Minimum sngl-ranking to consider for loudest "
-                         "triggers. Default=6.5.")
+                         "triggers. Useful for efficiency savings. "
+                         "Default=6.5.")
 parser.add_argument('--non-coinc-time-only', action='store_true',
                     help="If given remove (veto) single-detector triggers "
                          "that occur during a time when at least one other "
@@ -175,7 +176,7 @@ if args.veto_file:
     logging.info('Getting file vetoes')
     # veto_mask is an array of indices into the trigger arrays
     # giving the surviving triggers
-    veto_file_idx, _ = events.veto.indices_within_segments(
+    veto_file_idx, _ = pycbc.events.veto.indices_within_segments(
         trigs.end_time,
         [args.veto_file],
         ifo=args.instrument,
@@ -237,48 +238,32 @@ if args.maximum_duration is not None:
 logging.info('Finding loudest clustered events')
 rank_method = stat.get_statistic_from_opts(args, [args.instrument])
 
-extra_kwargs = {}
-for inputstr in args.statistic_keywords:
-    try:
-        key, value = inputstr.split(':')
-        extra_kwargs[key] = value
-    except ValueError:
-        err_txt = "--statistic-keywords must take input in the " \
-                  "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \
-                  "Received {}".format(args.statistic_keywords)
-        raise ValueError(err_txt)
-
-logging.info("Calculating statistic for %d triggers", len(trigs.snr))
-sds = rank_method.single(trigs)
-stat = rank_method.rank_stat_single((args.instrument, sds), **extra_kwargs)
-logging.info("Clustering events over %.3fs window", args.cluster_window)
-cid = coinc.cluster_over_time(stat, trigs.end_time,
-                              args.cluster_window)
-trigs.apply_mask(cid)
-stat = stat[cid]
-if len(trigs.snr) < num_events:
-    num_events = len(trigs.snr)
-
-logging.info("Finding the loudest triggers")
-loudest_idx = sorted(numpy.argsort(stat)[::-1][:num_events])
-trigs.apply_mask(loudest_idx)
-stat = stat[loudest_idx]
+extra_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords)
+
+trigs.mask_to_n_loudest_clustered_events(
+    rank_method,
+    n_loudest=num_events,
+    cluster_window=args.cluster_window,
+    statistic_kwargs=extra_kwargs,
+)
 
 times = trigs.end_time
-tids = trigs.template_id
 
+if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool:
+    trigger_ids = numpy.flatnonzero(trigs.mask)
+else:
+    trigger_ids = trigs.mask
+
+trig_stat = trigs.stat
 # loop over number of loudest events to be followed up
-order = stat.argsort()[::-1]
+order = trig_stat.argsort()[::-1]
 for rank, num_event in enumerate(order):
-    logging.info('Processing event: %s', num_event)
+    logging.info('Processing event: %s', rank)
 
     files = wf.FileList([])
     time = times[num_event]
     ifo_time = '%s:%s' %(args.instrument, str(time))
-    if isinstance(trigs.mask, numpy.ndarray) and trigs.mask.dtype == bool:
-        tid = numpy.flatnonzero(trigs.mask)[num_event]
-    else:
-        tid = trigs.mask[num_event]
+    tid = trigger_ids[num_event]
     ifo_tid = '%s:%s' %(args.instrument, str(tid))
 
     layouts += (mini.make_sngl_ifo(workflow, sngl_file, tmpltbank_file,

diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py
@@ -626,6 +626,7 @@ def trig_dict(self):
                     mtrigs[k] = self.trigs[k][self.mask]
                 else:
                     mtrigs[k] = self.trigs[k][:]
+        mtrigs['ifo'] = self.ifo
         return mtrigs
 
     @classmethod
@@ -687,30 +688,35 @@ def and_masks(self, logic_mask):
         self.mask[and_indices.astype(np.uint64)] = True
 
     def mask_to_n_loudest_clustered_events(self, rank_method,
-                                           ranking_threshold=6,
+                                           statistic_threshold=None,
                                            n_loudest=10,
-                                           cluster_window=10):
+                                           cluster_window=10,
+                                           statistic_kwargs=None):
         """Edits the mask property of the class to point to the N loudest
         single detector events as ranked by ranking statistic.
 
         Events are clustered so that no more than 1 event within +/-
         cluster_window will be considered. Can apply a threshold on the
-        ranking using ranking_threshold
+        statistic using statistic_threshold
         """
 
+        if statistic_kwargs is None:
+            statistic_kwargs = {}
         sds = rank_method.single(self.trig_dict())
-        stat = rank_method.rank_stat_single((self.ifo, sds))
+        stat = rank_method.rank_stat_single(
+            (self.ifo, sds),
+            **statistic_kwargs
+        )
         if len(stat) == 0:
             # No triggers at all, so just return here
             self.apply_mask(np.array([], dtype=np.uint64))
+            self.stat = np.array([], dtype=np.uint64)
             return
 
         times = self.end_time
-        if ranking_threshold:
-            # Threshold on sngl_ranking
-            # Note that we can provide None or zero to do no thresholding
-            # but the default is to do some
-            keep = stat >= ranking_threshold
+        if statistic_threshold is not None:
+            # Threshold on statistic
+            keep = stat >= statistic_threshold
             stat = stat[keep]
             times = times[keep]
             self.apply_mask(keep)
@@ -744,6 +750,7 @@ def mask_to_n_loudest_clustered_events(self, rank_method,
         index.sort()
         # Apply to the existing mask
         self.apply_mask(index)
+        self.stat = stat[index]
 
     @property
     def mask_size(self):