diff --git a/bin/pygrb/pycbc_grb_trig_cluster b/bin/pygrb/pycbc_grb_trig_cluster index c0fc0937474..5f0f1007256 100644 --- a/bin/pygrb/pycbc_grb_trig_cluster +++ b/bin/pygrb/pycbc_grb_trig_cluster @@ -168,105 +168,145 @@ outfile = os.path.join( ), ) -# -- generate clustering bins ------------------- +# this list contains the indexing of clusters from all slides +all_clusters = [] -nbins = int((end - start) // win + 1) -bins = [[] for i in range(nbins)] -loudsnr = numpy.zeros(nbins) -loudtime = numpy.zeros(nbins) -clusters = [] - -# -- cluster ------------------------------------ +# load necessary information from all triggers with HFile(args.trig_file, "r") as h5f: - time = h5f["network"]["end_time_gc"][()] - snr = h5f["network"][args.rank_column][()] + all_times = h5f["network/end_time_gc"][()] + all_snrs = h5f[f"network/{args.rank_column}"][()] + slide_ids = h5f["network/slide_id"][()] # empty file (no triggers), so just copy the file -if not time.size: +if not all_times.size: shutil.copyfile(args.trig_file, outfile) msg = "trigger file is empty\n" msg += "copied input file to {}".format(outfile) logging.info(msg) sys.exit(0) -# find loudest trigger in each bin -for i in tqdm.tqdm(range(time.size), desc="Initialising bins", - disable=not args.verbose, total=time.size, unit='triggers', - **TQDM_KW): - t, s = time[i], snr[i] - idx = int(float(t - start) // win) - bins[idx].append(i) - if s > loudsnr[idx]: - loudsnr[idx] = s - loudtime[idx] = t - -prev = -1 -nxt_ = 1 -first = True -last = False -add_cluster = clusters.append -nclusters = 0 - -# cluster -bar = tqdm.tqdm(bins, desc="Clustering bins", - disable=not args.verbose, total=nbins, unit='bins', - postfix=dict(nclusters=0), **TQDM_KW) -for i, bin_ in enumerate(bar): - if not bin_: # empty - continue - - for idx in bin_: - t, s = time[idx], snr[idx] - - if s < loudsnr[i]: # not loudest in own bin +# -- cluster ------------------------------------ + +unique_slide_ids = numpy.unique(slide_ids) +max_slide_id = max(unique_slide_ids) +msg = 'Clustering '+str(len(slide_ids))+' triggers from ' +msg += str(len(unique_slide_ids))+' slides' +logging.info(msg) + +for slide_id in unique_slide_ids: + # indices to slice current slide + slide_id_pos = numpy.where(slide_ids == slide_id)[0] + # all time and snr values for the current slide + time = all_times[slide_id_pos] + snr = all_snrs[slide_id_pos] + + # generate clustering bins + nbins = int((end - start) // win + 1) + bins = [[] for i in range(nbins)] + loudsnr = numpy.zeros(nbins) + loudtime = numpy.zeros(nbins) + # list to index clusters for current slide + clusters = [] + + # find loudest trigger in each bin, for the current slide + for i in tqdm.tqdm(range(time.size), + desc="Initialising bins", + disable=not args.verbose, + total=time.size, + unit='triggers', + **TQDM_KW): + t, s = time[i], snr[i] + idx = int(float(t - start) // win) + bins[idx].append(i) + if s > loudsnr[idx]: + loudsnr[idx] = s + loudtime[idx] = t + + prev = -1 + nxt_ = 1 + first = True + last = False + add_cluster = clusters.append + nclusters = 0 + + # cluster + bar = tqdm.tqdm(bins, + desc="Clustering bins", + disable=not args.verbose, + total=nbins, + unit='bins', + postfix=dict(nclusters=0), + **TQDM_KW) + for i, bin_ in enumerate(bar): + if not bin_: # empty continue - # check loudest event in previous bin - if not first: - prevt = loudtime[prev] - if prevt and abs(prevt - t) < win and s < loudsnr[prev]: - continue + for idx in bin_: + t, s = time[idx], snr[idx] - # check loudest event in next bin - if not last: - nextt = loudtime[nxt_] - if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]: + if s < loudsnr[i]: # not loudest in own bin continue - loudest = True - - # check all events in previous bin - if not first and prevt and abs(prevt - t) < win: - for id2 in bins[prev]: - if abs(time[id2] - t) < win and s < snr[id2]: - loudest = False - break - - # check all events in next bin - if loudest and not last and nextt and abs(nextt - t) < win: - for id2 in bins[nxt_]: - if abs(time[id2] - t) < win and s < snr[id2]: - loudest = False - break - - # this is loudest in its vicinity, keep it - if loudest: - add_cluster(idx) - nclusters += 1 - bar.set_postfix(nclusters=nclusters) + # check loudest event in previous bin + if not first: + prevt = loudtime[prev] + if prevt and abs(prevt - t) < win and s < loudsnr[prev]: + continue + + # check loudest event in next bin + if not last: + nextt = loudtime[nxt_] + if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]: + continue + + loudest = True + + # check all events in previous bin + if not first and prevt and abs(prevt - t) < win: + for id2 in bins[prev]: + if abs(time[id2] - t) < win and s < snr[id2]: + loudest = False + break + + # check all events in next bin + if loudest and not last and nextt and abs(nextt - t) < win: + for id2 in bins[nxt_]: + if abs(time[id2] - t) < win and s < snr[id2]: + loudest = False + break + + # this is loudest in its vicinity, keep it + if loudest: + add_cluster(idx) + nclusters += 1 + bar.set_postfix(nclusters=nclusters) + + # update things for next time + first = False + last = i == nbins - 1 + prev += 1 + nxt_ += 1 + + bar.update() + + # clusters is the indexing array for a specific slide_id + # all_clusters is the (absolute) indexing of all clustered triggers + # so look up the indices [clusters] within the absolute indexing array + # slide_id_pos which is built at each slide_id + all_clusters += list(slide_id_pos[clusters]) + msg = 'Slide '+str(slide_id)+'/'+str(max_slide_id) + msg += ' has '+str(len(slide_id_pos)) + msg += ' trigers that were clustered to '+str(len(clusters)) + logging.info(msg) - # update things for next time - first = False - last = i == nbins - 1 - prev += 1 - nxt_ += 1 +logging.info('Total clustered triggers: '+str(len(all_clusters))) - bar.update() +# -- write output -------------------------------- slice_hdf5( args.trig_file, outfile, - numpy.asarray(clusters), + numpy.asarray(all_clusters), verbose=args.verbose, )