gwastro · pannarale · Jul 21, 2024 · Jul 18, 2024 · Jul 19, 2024 · sebastiangomezlopez
diff --git a/bin/pygrb/pycbc_grb_trig_cluster b/bin/pygrb/pycbc_grb_trig_cluster
@@ -168,105 +168,145 @@ outfile = os.path.join(
     ),
 )
 
-# -- generate clustering bins -------------------
+# this list contains the indexing of clusters from all slides
+all_clusters = []
 
-nbins = int((end - start) // win + 1)
-bins = [[] for i in range(nbins)]
-loudsnr = numpy.zeros(nbins)
-loudtime = numpy.zeros(nbins)
-clusters = []
-
-# -- cluster ------------------------------------
+# load necessary information from all triggers
 
 with HFile(args.trig_file, "r") as h5f:
-    time = h5f["network"]["end_time_gc"][()]
-    snr = h5f["network"][args.rank_column][()]
+    all_times = h5f["network/end_time_gc"][()]
+    all_snrs = h5f[f"network/{args.rank_column}"][()]
+    slide_ids = h5f["network/slide_id"][()]
 
 # empty file (no triggers), so just copy the file
-if not time.size:
+if not all_times.size:
     shutil.copyfile(args.trig_file, outfile)
     msg = "trigger file is empty\n"
     msg += "copied input file to {}".format(outfile)
     logging.info(msg)
     sys.exit(0)
 
-# find loudest trigger in each bin
-for i in tqdm.tqdm(range(time.size), desc="Initialising bins",
-                   disable=not args.verbose, total=time.size, unit='triggers',
-                   **TQDM_KW):
-    t, s = time[i], snr[i]
-    idx = int(float(t - start) // win)
-    bins[idx].append(i)
-    if s > loudsnr[idx]:
-        loudsnr[idx] = s
-        loudtime[idx] = t
-
-prev = -1
-nxt_ = 1
-first = True
-last = False
-add_cluster = clusters.append
-nclusters = 0
-
-# cluster
-bar = tqdm.tqdm(bins, desc="Clustering bins",
-                disable=not args.verbose, total=nbins, unit='bins',
-                postfix=dict(nclusters=0), **TQDM_KW)
-for i, bin_ in enumerate(bar):
-    if not bin_:  # empty
-        continue
-
-    for idx in bin_:
-        t, s = time[idx], snr[idx]
-
-        if s < loudsnr[i]:  # not loudest in own bin
+# -- cluster ------------------------------------
+
+unique_slide_ids = numpy.unique(slide_ids)
+max_slide_id = max(unique_slide_ids)
+msg = 'Clustering '+str(len(slide_ids))+' triggers from '
+msg += str(len(unique_slide_ids))+' slides'
+logging.info(msg)
+
+for slide_id in unique_slide_ids:
+    # indices to slice current slide
+    slide_id_pos = numpy.where(slide_ids == slide_id)[0]
+    # all time and snr values for the current slide
+    time = all_times[slide_id_pos]
+    snr = all_snrs[slide_id_pos]
+
+    # generate clustering bins
+    nbins = int((end - start) // win + 1)
+    bins = [[] for i in range(nbins)]
+    loudsnr = numpy.zeros(nbins)
+    loudtime = numpy.zeros(nbins)
+    # list to index clusters for current slide
+    clusters = []
+
+    # find loudest trigger in each bin, for the current slide
+    for i in tqdm.tqdm(range(time.size),
+                       desc="Initialising bins",
+                       disable=not args.verbose,
+                       total=time.size,
+                       unit='triggers',
+                       **TQDM_KW):
+        t, s = time[i], snr[i]
+        idx = int(float(t - start) // win)
+        bins[idx].append(i)
+        if s > loudsnr[idx]:
+            loudsnr[idx] = s
+            loudtime[idx] = t
+
+    prev = -1
+    nxt_ = 1
+    first = True
+    last = False
+    add_cluster = clusters.append
+    nclusters = 0
+
+    # cluster
+    bar = tqdm.tqdm(bins,
+                    desc="Clustering bins",
+                    disable=not args.verbose,
+                    total=nbins,
+                    unit='bins',
+                    postfix=dict(nclusters=0),
+                    **TQDM_KW)
+    for i, bin_ in enumerate(bar):
+        if not bin_:  # empty
             continue
 
-        # check loudest event in previous bin
-        if not first:
-            prevt = loudtime[prev]
-            if prevt and abs(prevt - t) < win and s < loudsnr[prev]:
-                continue
+        for idx in bin_:
+            t, s = time[idx], snr[idx]
 
-        # check loudest event in next bin
-        if not last:
-            nextt = loudtime[nxt_]
-            if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]:
+            if s < loudsnr[i]:  # not loudest in own bin
                 continue
 
-        loudest = True
-
-        # check all events in previous bin
-        if not first and prevt and abs(prevt - t) < win:
-            for id2 in bins[prev]:
-                if abs(time[id2] - t) < win and s < snr[id2]:
-                    loudest = False
-                    break
-
-        # check all events in next bin
-        if loudest and not last and nextt and abs(nextt - t) < win:
-            for id2 in bins[nxt_]:
-                if abs(time[id2] - t) < win and s < snr[id2]:
-                    loudest = False
-                    break
-
-        # this is loudest in its vicinity, keep it
-        if loudest:
-            add_cluster(idx)
-            nclusters += 1
-            bar.set_postfix(nclusters=nclusters)
+            # check loudest event in previous bin
+            if not first:
+                prevt = loudtime[prev]
+                if prevt and abs(prevt - t) < win and s < loudsnr[prev]:
+                    continue
+
+            # check loudest event in next bin
+            if not last:
+                nextt = loudtime[nxt_]
+                if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]:
+                    continue
+
+            loudest = True
+
+            # check all events in previous bin
+            if not first and prevt and abs(prevt - t) < win:
+                for id2 in bins[prev]:
+                    if abs(time[id2] - t) < win and s < snr[id2]:
+                        loudest = False
+                        break
+
+            # check all events in next bin
+            if loudest and not last and nextt and abs(nextt - t) < win:
+                for id2 in bins[nxt_]:
+                    if abs(time[id2] - t) < win and s < snr[id2]:
+                        loudest = False
+                        break
+
+            # this is loudest in its vicinity, keep it
+            if loudest:
+                add_cluster(idx)
+                nclusters += 1
+                bar.set_postfix(nclusters=nclusters)
+
+        # update things for next time
+        first = False
+        last = i == nbins - 1
+        prev += 1
+        nxt_ += 1
+
+        bar.update()
+
+    # clusters is the indexing array for a specific slide_id
+    # all_clusters is the (absolute) indexing of all clustered triggers
+    # so look up the indices [clusters] within the absolute indexing array
+    # slide_id_pos which is built at each slide_id
+    all_clusters += list(slide_id_pos[clusters])
+    msg = 'Slide '+str(slide_id)+'/'+str(max_slide_id)
+    msg += ' has '+str(len(slide_id_pos))
+    msg += ' trigers that were clustered to '+str(len(clusters))
+    logging.info(msg)
 
-    # update things for next time
-    first = False
-    last = i == nbins - 1
-    prev += 1
-    nxt_ += 1
+logging.info('Total clustered triggers: '+str(len(all_clusters)))
 
-    bar.update()
+# -- write output --------------------------------
 
 slice_hdf5(
     args.trig_file,
     outfile,
-    numpy.asarray(clusters),
+    numpy.asarray(all_clusters),
     verbose=args.verbose,
 )