-
Notifications
You must be signed in to change notification settings - Fork 349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PyGRB trigger clustering with short timeslides #4820
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,105 +168,143 @@ outfile = os.path.join( | |
), | ||
) | ||
|
||
# -- generate clustering bins ------------------- | ||
# this list contains the indexing of clusters from all slides | ||
all_clusters = [] | ||
|
||
nbins = int((end - start) // win + 1) | ||
bins = [[] for i in range(nbins)] | ||
loudsnr = numpy.zeros(nbins) | ||
loudtime = numpy.zeros(nbins) | ||
clusters = [] | ||
# load necessary information from all triggers | ||
|
||
# -- cluster ------------------------------------ | ||
|
||
with HFile(args.trig_file, "r") as h5f: | ||
time = h5f["network"]["end_time_gc"][()] | ||
snr = h5f["network"][args.rank_column][()] | ||
with h5py.File(args.trig_file, "r") as h5f: | ||
all_times = h5f["network"]["end_time_gc"][()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is more a matter of taste. Would you consider rewriting it as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did the same with the slide_id |
||
all_snrs = h5f["network"][args.rank_column][()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In case you agree, here an f-string would do the job, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
slide_ids = h5f["network"]["slide_id"][()] | ||
|
||
# empty file (no triggers), so just copy the file | ||
if not time.size: | ||
if not all_times.size: | ||
shutil.copyfile(args.trig_file, outfile) | ||
msg = "trigger file is empty\n" | ||
msg += "copied input file to {}".format(outfile) | ||
logging.info(msg) | ||
sys.exit(0) | ||
|
||
# find loudest trigger in each bin | ||
for i in tqdm.tqdm(range(time.size), desc="Initialising bins", | ||
disable=not args.verbose, total=time.size, unit='triggers', | ||
**TQDM_KW): | ||
t, s = time[i], snr[i] | ||
idx = int(float(t - start) // win) | ||
bins[idx].append(i) | ||
if s > loudsnr[idx]: | ||
loudsnr[idx] = s | ||
loudtime[idx] = t | ||
|
||
prev = -1 | ||
nxt_ = 1 | ||
first = True | ||
last = False | ||
add_cluster = clusters.append | ||
nclusters = 0 | ||
|
||
# cluster | ||
bar = tqdm.tqdm(bins, desc="Clustering bins", | ||
disable=not args.verbose, total=nbins, unit='bins', | ||
postfix=dict(nclusters=0), **TQDM_KW) | ||
for i, bin_ in enumerate(bar): | ||
if not bin_: # empty | ||
continue | ||
|
||
for idx in bin_: | ||
t, s = time[idx], snr[idx] | ||
|
||
if s < loudsnr[i]: # not loudest in own bin | ||
# -- cluster ------------------------------------ | ||
|
||
unique_slide_ids = numpy.unique(slide_ids) | ||
max_slide_id = max(unique_slide_ids) | ||
msg = 'Clustering '+str(len(slide_ids))+' triggers from ' | ||
msg += str(len(unique_slide_ids))+' slides' | ||
logging.info(msg) | ||
|
||
for slide_id in unique_slide_ids: | ||
# indices to slice current slide | ||
slide_id_pos = numpy.where(slide_ids == slide_id)[0] | ||
# all time and snr values for the current slide | ||
time = all_times[slide_id_pos] | ||
snr = all_snrs[slide_id_pos] | ||
|
||
# generate clustering bins | ||
nbins = int((end - start) // win + 1) | ||
bins = [[] for i in range(nbins)] | ||
loudsnr = numpy.zeros(nbins) | ||
loudtime = numpy.zeros(nbins) | ||
# list to index clusters for current slide | ||
clusters = [] | ||
|
||
# find loudest trigger in each bin, for the current slide | ||
for i in tqdm.tqdm(range(time.size), | ||
desc="Initialising bins", | ||
disable=not args.verbose, | ||
total=time.size, | ||
unit='triggers', | ||
**TQDM_KW): | ||
t, s = time[i], snr[i] | ||
idx = int(float(t - start) // win) | ||
bins[idx].append(i) | ||
if s > loudsnr[idx]: | ||
loudsnr[idx] = s | ||
loudtime[idx] = t | ||
|
||
prev = -1 | ||
nxt_ = 1 | ||
first = True | ||
last = False | ||
add_cluster = clusters.append | ||
nclusters = 0 | ||
|
||
# cluster | ||
bar = tqdm.tqdm(bins, | ||
desc="Clustering bins", | ||
disable=not args.verbose, | ||
total=nbins, | ||
unit='bins', | ||
postfix=dict(nclusters=0), | ||
**TQDM_KW) | ||
for i, bin_ in enumerate(bar): | ||
if not bin_: # empty | ||
continue | ||
|
||
Comment on lines
+189
to
244
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This whole block of code looks good to me |
||
# check loudest event in previous bin | ||
if not first: | ||
prevt = loudtime[prev] | ||
if prevt and abs(prevt - t) < win and s < loudsnr[prev]: | ||
continue | ||
for idx in bin_: | ||
t, s = time[idx], snr[idx] | ||
|
||
# check loudest event in next bin | ||
if not last: | ||
nextt = loudtime[nxt_] | ||
if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]: | ||
if s < loudsnr[i]: # not loudest in own bin | ||
continue | ||
|
||
loudest = True | ||
|
||
# check all events in previous bin | ||
if not first and prevt and abs(prevt - t) < win: | ||
for id2 in bins[prev]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# check all events in next bin | ||
if loudest and not last and nextt and abs(nextt - t) < win: | ||
for id2 in bins[nxt_]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# this is loudest in its vicinity, keep it | ||
if loudest: | ||
add_cluster(idx) | ||
nclusters += 1 | ||
bar.set_postfix(nclusters=nclusters) | ||
|
||
# update things for next time | ||
first = False | ||
last = i == nbins - 1 | ||
prev += 1 | ||
nxt_ += 1 | ||
# check loudest event in previous bin | ||
if not first: | ||
prevt = loudtime[prev] | ||
if prevt and abs(prevt - t) < win and s < loudsnr[prev]: | ||
continue | ||
|
||
# check loudest event in next bin | ||
if not last: | ||
nextt = loudtime[nxt_] | ||
if nextt and abs(nextt - t) < win and s < loudsnr[nxt_]: | ||
continue | ||
|
||
loudest = True | ||
|
||
# check all events in previous bin | ||
if not first and prevt and abs(prevt - t) < win: | ||
for id2 in bins[prev]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# check all events in next bin | ||
if loudest and not last and nextt and abs(nextt - t) < win: | ||
for id2 in bins[nxt_]: | ||
if abs(time[id2] - t) < win and s < snr[id2]: | ||
loudest = False | ||
break | ||
|
||
# this is loudest in its vicinity, keep it | ||
if loudest: | ||
add_cluster(idx) | ||
nclusters += 1 | ||
bar.set_postfix(nclusters=nclusters) | ||
|
||
# update things for next time | ||
first = False | ||
last = i == nbins - 1 | ||
prev += 1 | ||
nxt_ += 1 | ||
|
||
bar.update() | ||
|
||
# clusters is the indexing array for a specific slide_id | ||
# all_clusters is the (absolute) indexing of all clustered triggers | ||
# so look up the indices [clusters] within the absolute indexing array | ||
# slide_id_pos which is built at each slide_id | ||
all_clusters += list(slide_id_pos[clusters]) | ||
msg = 'Slide '+str(slide_id)+'/'+str(max_slide_id) | ||
msg += ' has '+str(len(slide_id_pos)) | ||
msg += ' trigers that were clustered to '+str(len(clusters)) | ||
logging.info(msg) | ||
|
||
bar.update() | ||
logging.info('Total clustered triggers: '+str(len(all_clusters))) | ||
|
||
slice_hdf5( | ||
args.trig_file, | ||
outfile, | ||
numpy.asarray(clusters), | ||
numpy.asarray(all_clusters), | ||
verbose=args.verbose, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we still need this comment ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, because this where we cluster. I will add a comment to flag where the output is written.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I commented on lines you removed. Now I see the comment in line 189 of your new code