-
Notifications
You must be signed in to change notification settings - Fork 349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PyGRB trigger clustering with short timeslides #4820
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,105 +168,145 @@ outfile = os.path.join( | |
), | ||
) | ||
|
||
# -- generate clustering bins ------------------- | ||
# this list contains the indexing of clusters from all slides | ||
all_clusters = [] | ||
|
||
nbins = int((end - start) // win + 1) | ||
bins = [[] for i in range(nbins)] | ||
loudsnr = numpy.zeros(nbins) | ||
loudtime = numpy.zeros(nbins) | ||
clusters = [] | ||
|
||
# -- cluster ------------------------------------ | ||
# load necessary information from all triggers | ||
|
||
with HFile(args.trig_file, "r") as h5f: | ||
time = h5f["network"]["end_time_gc"][()] | ||
snr = h5f["network"][args.rank_column][()] | ||
all_times = h5f["network/end_time_gc"][()] | ||
all_snrs = h5f[f"network/{args.rank_column}"][()] | ||
slide_ids = h5f["network/slide_id"][()] | ||
|
||
# empty file (no triggers), so just copy the file | ||
if not time.size: | ||
if not all_times.size: | ||
shutil.copyfile(args.trig_file, outfile) | ||
msg = "trigger file is empty\n" | ||
msg += "copied input file to {}".format(outfile) | ||
logging.info(msg) | ||
sys.exit(0) | ||
|
||
# find loudest trigger in each bin | ||
for i in tqdm.tqdm(range(time.size), desc="Initialising bins", | ||
disable=not args.verbose, total=time.size, unit='triggers', | ||
**TQDM_KW): | ||
t, s = time[i], snr[i] | ||
idx = int(float(t - start) // win) | ||
bins[idx].append(i) | ||
if s > loudsnr[idx]: | ||
loudsnr[idx] = s | ||
loudtime[idx] = t | ||
|
||
prev = -1 | ||
nxt_ = 1 | ||
first = True | ||
last = False | ||
add_cluster = clusters.append | ||
nclusters = 0 | ||
|
||
# cluster | ||
bar = tqdm.tqdm(bins, desc="Clustering bins", | ||
disable=not args.verbose, total=nbins, unit='bins', | ||
postfix=dict(nclusters=0), **TQDM_KW) | ||
for i, bin_ in enumerate(bar): | ||
if not bin_: # empty | ||
continue | ||
|
||
for idx in bin_: | ||
t, s = time[idx], snr[idx] | ||
|
||
if s < loudsnr[i]: # not loudest in own bin | ||
# -- cluster ------------------------------------ | ||
|
||
unique_slide_ids = numpy.unique(slide_ids) | ||
max_slide_id = max(unique_slide_ids) | ||
msg = 'Clustering '+str(len(slide_ids))+' triggers from ' | ||
msg += str(len(unique_slide_ids))+' slides' | ||
logging.info(msg) | ||
|
||
for slide_id in unique_slide_ids: | ||
# indices to slice current slide | ||
slide_id_pos = numpy.where(slide_ids == slide_id)[0] | ||
# all time and snr values for the current slide | ||
time = all_times[slide_id_pos] | ||
snr = all_snrs[slide_id_pos] | ||
|
||
# generate clustering bins | ||
nbins = int((end - start) // win + 1) | ||
bins = [[] for i in range(nbins)] | ||
loudsnr = numpy.zeros(nbins) | ||
loudtime = numpy.zeros(nbins) | ||
# list to index clusters for current slide | ||
clusters = [] | ||
|
||
# find loudest trigger in each bin, for the current slide | ||
for i in tqdm.tqdm(range(time.size), | ||
desc="Initialising bins", | ||
disable=not args.verbose, | ||
total=time.size, | ||
unit='triggers', | ||
**TQDM_KW): | ||
t, s = time[i], snr[i] | ||
idx = int(float(t - start) // win) | ||
bins[idx].append(i) | ||
if s > loudsnr[idx]: | ||
loudsnr[idx] = s | ||
loudtime[idx] = t | ||
|
||
prev = -1 | ||
nxt_ = 1 | ||
first = True | ||
last = False | ||
add_cluster = clusters.append | ||
nclusters = 0 | ||
|
||
# cluster | ||
bar = tqdm.tqdm(bins, | ||
desc="Clustering bins", | ||
disable=not args.verbose, | ||
total=nbins, | ||
unit='bins', | ||
postfix=dict(nclusters=0), | ||
**TQDM_KW) | ||
for i, bin_ in enumerate(bar): | ||
if not bin_: # empty | ||
continue | ||
|
||
Comment on lines
+189
to
244
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This whole block of code looks good to me |
||
# check loudest event in previous bin | ||
if not first: | ||
prevt = loudtime[prev] | ||
if prevt and abs(prevt - t) < win and s < loudsnr[prev]: | ||
continue | ||
for idx in bin_: | ||
t, s = time[idx], snr[idx] | ||
|
||
# check loudest event in next bin | ||
if not last: | ||
nextt = loudtime[nxt_] | ||
if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]: | ||
if s < loudsnr[i]: # not loudest in own bin | ||
continue | ||
|
||
loudest = True | ||
|
||
# check all events in previous bin | ||
if not first and prevt and abs(prevt - t) < win: | ||
for id2 in bins[prev]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# check all events in next bin | ||
if loudest and not last and nextt and abs(nextt - t) < win: | ||
for id2 in bins[nxt_]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# this is loudest in its vicinity, keep it | ||
if loudest: | ||
add_cluster(idx) | ||
nclusters += 1 | ||
bar.set_postfix(nclusters=nclusters) | ||
# check loudest event in previous bin | ||
if not first: | ||
prevt = loudtime[prev] | ||
if prevt and abs(prevt - t) < win and s < loudsnr[prev]: | ||
continue | ||
|
||
# check loudest event in next bin | ||
if not last: | ||
nextt = loudtime[nxt_] | ||
if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]: | ||
continue | ||
|
||
loudest = True | ||
|
||
# check all events in previous bin | ||
if not first and prevt and abs(prevt - t) < win: | ||
for id2 in bins[prev]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# check all events in next bin | ||
if loudest and not last and nextt and abs(nextt - t) < win: | ||
for id2 in bins[nxt_]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# this is loudest in its vicinity, keep it | ||
if loudest: | ||
add_cluster(idx) | ||
nclusters += 1 | ||
bar.set_postfix(nclusters=nclusters) | ||
|
||
# update things for next time | ||
first = False | ||
last = i == nbins - 1 | ||
prev += 1 | ||
nxt_ += 1 | ||
|
||
bar.update() | ||
|
||
Comment on lines
+245
to
+292
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mostly rearranging lines, looks good. |
||
# clusters is the indexing array for a specific slide_id | ||
# all_clusters is the (absolute) indexing of all clustered triggers | ||
# so look up the indices [clusters] within the absolute indexing array | ||
# slide_id_pos which is built at each slide_id | ||
all_clusters += list(slide_id_pos[clusters]) | ||
msg = 'Slide '+str(slide_id)+'/'+str(max_slide_id) | ||
msg += ' has '+str(len(slide_id_pos)) | ||
msg += ' trigers that were clustered to '+str(len(clusters)) | ||
logging.info(msg) | ||
|
||
# update things for next time | ||
first = False | ||
last = i == nbins - 1 | ||
prev += 1 | ||
nxt_ += 1 | ||
logging.info('Total clustered triggers: '+str(len(all_clusters))) | ||
|
||
bar.update() | ||
# -- write output -------------------------------- | ||
|
||
slice_hdf5( | ||
args.trig_file, | ||
outfile, | ||
numpy.asarray(clusters), | ||
numpy.asarray(all_clusters), | ||
verbose=args.verbose, | ||
) | ||
Comment on lines
+293
to
312
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting approach overall. I'm ok with merging once tests are passed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we still need this comment ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, because this where we cluster. I will add a comment to flag where the output is written.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I commented on lines you removed. Now I see the comment in line 189 of your new code