Skip to content

Commit

Permalink
Fixing a simple plotting but and reducing statmap inj job size so tha…
Browse files Browse the repository at this point in the history
…t they can be run with the lower disk requirements in the workflow. The later needs to be tested.
  • Loading branch information
Bhooshan Gadre committed Jul 6, 2023
1 parent 27cc987 commit be2bf07
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 137 deletions.
10 changes: 7 additions & 3 deletions bin/all_sky_search/pycbc_add_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ for f_in in files:
bg_exc_trig_ids[ifo] = np.concatenate([bg_exc_trig_ids[ifo],
-1 * np.ones_like(f_in['background_exc/stat'][:],
dtype=int)])
n_triggers = f['foreground/ifar'].size
n_triggers = f['foreground/stat'].size
logging.info('{} foreground events before clustering'.format(n_triggers))

for ifo in all_ifos:
Expand Down Expand Up @@ -266,15 +266,19 @@ def filter_dataset(h5file, name, idx):
# multiple files
for key in f['foreground'].keys():
if key not in all_ifos:
if f['foreground/%s' % key].size == 0:
# This is an indicator that the column was specifically
# not calculated
logging.info('foreground/%s dataset is being ignored', key)
continue
filter_dataset(f, 'foreground/%s' % key, cidx)
else: # key is an ifo
for k in f['foreground/%s' % key].keys():
filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)

fg_ifar_init = f['foreground/ifar'][:]
fg_ifar_exc_init = f['foreground/ifar_exc'][:]

n_triggers = fg_ifar_init.size
n_triggers = fg_ifar_exc_init.size

logging.info('Calculating event times to determine which types of coinc '
'are available')
Expand Down
67 changes: 4 additions & 63 deletions bin/all_sky_search/pycbc_coinc_statmap_inj
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,12 @@ parser.add_argument('--cluster-window', type=float, default=10,
'events [default=10s]')
parser.add_argument('--zero-lag-coincs', nargs='+',
help='Files containing the injection zerolag coincidences')
parser.add_argument('--mixed-coincs-inj-full', nargs='+',
help='Files containing the mixed injection/clean data '
'time slides')
parser.add_argument('--mixed-coincs-full-inj', nargs='+',
help='Files containing the mixed clean/injection data '
'time slides')
parser.add_argument('--full-data-background',
help='background file from full data for use in analyzing '
'injection coincs')
parser.add_argument('--veto-window', type=float, default=.1,
help='Time around each zerolag trigger to window out '
'[default=.1s]')
parser.add_argument('--ranking-statistic-threshold', type=float,
help='Minimum value of the ranking statistic to calculate'
' a unique inclusive background.')
parser.add_argument('--ifos', nargs='+',
help='List of ifos used in these coincidence files')
significance.insert_significance_option_group(parser)
Expand All @@ -54,14 +45,6 @@ logging.info("Loading coinc zerolag triggers")
zdata = pycbc.io.MultiifoStatmapData(files=args.zero_lag_coincs, ifos=args.ifos)
zdata = zdata.cluster(window)

logging.info("Loading coinc full inj triggers")
fidata = pycbc.io.MultiifoStatmapData(files=args.mixed_coincs_full_inj,
ifos=args.ifos).cluster(window)

logging.info("Loading coinc inj full triggers")
ifdata = pycbc.io.MultiifoStatmapData(files=args.mixed_coincs_inj_full,
ifos=args.ifos).cluster(window)

f = h5py.File(args.output_file, "w")

f.attrs['num_of_ifos'] = zdata.attrs['num_of_ifos']
Expand Down Expand Up @@ -118,59 +101,17 @@ if len(zdata) > 0:
ifar = numpy.zeros(len(ftimes), dtype=numpy.float32)
fap = numpy.zeros(len(ftimes), dtype=numpy.float32)

# We are relying on the injection data set to be the first one,
# this is determined
# by the argument order to pycbc_coinc_findtrigs
pivot_ifo = zdata.attrs['pivot']
ifsort = ifdata.data['%s/time' % pivot_ifo].argsort()
ifsorted = ifdata.data['%s/time' % pivot_ifo][ifsort]
if_start, if_end = numpy.searchsorted(ifsorted, start), \
numpy.searchsorted(ifsorted, end)

fisort = fidata.data['%s/time' % pivot_ifo].argsort()
fisorted = fidata.data['%s/time' % pivot_ifo][fisort]
fi_start, fi_end = numpy.searchsorted(fisorted, start), \
numpy.searchsorted(fisorted, end)

# most of the triggers are here so presort to speed up later sorting
bsort = back_stat.argsort()
dec_fac = dec_fac[bsort]
back_stat = back_stat[bsort]

for i, fstat in enumerate(zdata.stat):
# If the trigger is quiet enough, then don't calculate a separate
# background type, as it would not be significantly different
if args.ranking_statistic_threshold and \
fstat < args.ranking_statistic_threshold:
fnlouder[i] = fnlouder_exc[i]
ifar[i] = ifar_exc[i]
fap[i] = fap_exc[i]
continue

v1 = fisort[fi_start[i]:fi_end[i]]
v2 = ifsort[if_start[i]:if_end[i]]

inj_stat = numpy.concatenate(
[ifdata.stat[v2], fidata.stat[v1], back_stat])
inj_dec = numpy.concatenate(
[numpy.repeat(1, len(v1) + len(v2)), dec_fac])

_, fnlouder[i] = significance.get_n_louder(
inj_stat,
fstat,
inj_dec,
**significance_dict[ifo_key])

ifar[i] = background_time / (fnlouder[i] + 1)
fap[i] = 1 - numpy.exp(- coinc_time / ifar[i])
logging.info('processed %s, %s' % (i, fstat))

f['foreground/ifar'] = conv.sec_to_year(ifar)
f['foreground/fap'] = fap
else:
f['foreground/ifar_exc'] = numpy.array([])
f['foreground/fap_exc'] = numpy.array([])
f['foreground/ifar'] = numpy.array([])
f['foreground/fap'] = numpy.array([])

# Indicate that inclusive IFAR is not calculated
f['foreground/ifar'] = numpy.array([])
f['foreground/fap'] = numpy.array([])

logging.info("Done")
25 changes: 11 additions & 14 deletions bin/all_sky_search/pycbc_sngls_statmap_inj
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ parser.add_argument('--cluster-window', type=float, default=10,
parser.add_argument('--veto-window', type=float, default=.1,
help='Time around each zerolag trigger to window out '
'[default=.1s]')
parser.add_argument('--background-removal-statistic', type=float,
default=numpy.inf,
help="Remove any triggers with statistic higher "
"than this from the background. "
"Default=None removed")
significance.insert_significance_option_group(parser)
parser.add_argument('--output-file')
args = parser.parse_args()
Expand All @@ -71,7 +66,7 @@ ifo = args.ifos[0]
assert ifo + '/time' in all_trigs.data

logging.info("We have %s triggers" % len(all_trigs.stat))
logging.info("Clustering coinc triggers (inclusive of zerolag)")
logging.info("Clustering triggers")
all_trigs = all_trigs.cluster(args.cluster_window)

logging.info('getting background statistics')
Expand Down Expand Up @@ -121,6 +116,8 @@ back_cnum, fnlouder = significance.get_n_louder(
bkg_dec_facs,
**significance_dict[ifo])

bg_ifar = fg_time / (back_cnum + 1)

# Cumulative array of exclusive background triggers and the number
# of exclusive background triggers louder than each foreground trigger
back_cnum_exc, fnlouder_exc = significance.get_n_louder(
Expand All @@ -129,23 +126,23 @@ back_cnum_exc, fnlouder_exc = significance.get_n_louder(
bkg_exc_dec_facs,
**significance_dict[ifo])

fg_ifar = fg_time / (fnlouder + 1)
bg_ifar = fg_time / (back_cnum + 1)
fg_ifar_exc = fg_time_exc / (fnlouder_exc + 1)
bg_ifar_exc = fg_time_exc / (back_cnum_exc + 1)

# Not sure if these are actually used later as we
# don't do inclusive
f['background/ifar'] = conv.sec_to_year(bg_ifar)
f['background_exc/ifar'] = conv.sec_to_year(bg_ifar_exc)
f.attrs['background_time'] = fg_time
f.attrs['foreground_time'] = fg_time

# Indicate not doing inclusive IFAR/FAP
f['foreground/ifar'] = numpy.array([])
f['foreground/fap'] = numpy.array([])

f['background_exc/ifar'] = conv.sec_to_year(bg_ifar_exc)
f.attrs['background_time_exc'] = fg_time_exc
f.attrs['foreground_time_exc'] = fg_time_exc

logging.info("calculating foreground ifar/fap values")

fap = 1 - numpy.exp(- fg_time / fg_ifar)
f['foreground/ifar'] = conv.sec_to_year(fg_ifar)
f['foreground/fap'] = fap
fap_exc = 1 - numpy.exp(- fg_time_exc / fg_ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(fg_ifar_exc)
f['foreground/fap_exc'] = fap_exc
Expand Down
10 changes: 5 additions & 5 deletions bin/plotting/pycbc_plot_background_coincs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ parser.add_argument('--max-z', type=float, help='Optional maximum z value')
parser.add_argument('--grid-size', default=100, help="Number of hexbins", type=int)
parser.add_argument('--dpi', type=int, default=200)
parser.add_argument('--output-file')
args = parser.parse_args()
args = parser.parse_args()

f = h5py.File(args.coinc_file)
bdata = f['background_exc']
x = get_var(bdata, args.x_var)
Expand All @@ -46,8 +46,8 @@ if args.max_z is not None:
fig = pylab.figure()
ax = fig.gca()
if args.z_var == 'density':
hb = ax.hexbin(x, y, norm=LogNorm(), vmin=1, **hexbin_style)
fig.colorbar(hb, ticks=LogLocator(subs=range(10)))
hb = ax.hexbin(x, y, norm=LogNorm(), **hexbin_style)
fig.colorbar(hb, ticks=LogLocator(subs=range(10)))
elif args.z_var == 'ranking_stat':
hb = ax.hexbin(x, y, C=bdata['stat'][:], reduce_C_function=max, **hexbin_style)
fig.colorbar(hb)
Expand All @@ -56,4 +56,4 @@ ax.set_xlabel(args.x_var)
ax.set_ylabel(args.y_var)
ax.set_title("Coincident Background Triggers, %s" % args.z_var)
fig.savefig(args.output_file, dpi=args.dpi)

4 changes: 2 additions & 2 deletions bin/plotting/pycbc_plot_singles_vs_params
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ opts = parser.parse_args()

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

snr_filter = '(self.snr>%f)' % (opts.min_snr) if opts.min_snr > 0. else None
snr_filter = '(self.snr>%f)' % (opts.min_snr) if opts.min_snr > 0. else None
filts = [f for f in [snr_filter, opts.filter_string] if f is not None]
filter_func = ' & '.join(filts) if filts else None

Expand Down Expand Up @@ -127,7 +127,7 @@ ax = fig.gca()

if opts.z_var == 'density':
norm = LogNorm()
hb = ax.hexbin(x, y, norm=norm, vmin=1, **hexbin_style)
hb = ax.hexbin(x, y, norm=norm, **hexbin_style)
fig.colorbar(hb, ticks=LogLocator(subs=range(10)))
elif opts.z_var in ranking.sngls_ranking_function_dict:
cb_style = {}
Expand Down
76 changes: 26 additions & 50 deletions pycbc/workflow/coincidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,23 +130,21 @@ class PyCBCStatMapInjExecutable(Executable):
"""Calculate FAP, IFAR, etc"""

current_retention_level = Executable.MERGED_TRIGGERS
def create_node(self, zerolag, full_data,
injfull, fullinj, ifos, tags=None):
def create_node(self, coinc_files, full_data,
ifos, tags=None):
if tags is None:
tags = []
segs = zerolag.get_times_covered_by_files()
segs = coinc_files.get_times_covered_by_files()
seg = segments.segment(segs[0][0], segs[-1][1])

node = Node(self)
node.add_input_list_opt('--zero-lag-coincs', zerolag)
node.add_input_list_opt('--zero-lag-coincs', coinc_files)

if isinstance(full_data, list):
node.add_input_list_opt('--full-data-background', full_data)
else:
node.add_input_opt('--full-data-background', full_data)

node.add_input_list_opt('--mixed-coincs-inj-full', injfull)
node.add_input_list_opt('--mixed-coincs-full-inj', fullinj)
node.add_opt('--ifos', ifos)
node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags)
return node
Expand Down Expand Up @@ -383,16 +381,14 @@ def setup_statmap_inj(workflow, ifos, coinc_files, background_file,
tags=tags, out_dir=out_dir)

ifolist = ' '.join(ifos)
stat_node = statmap_exe.create_node(FileList(coinc_files['injinj']),
stat_node = statmap_exe.create_node(FileList(coinc_files),
background_file,
FileList(coinc_files['injfull']),
FileList(coinc_files['fullinj']),
ifolist)
workflow.add_node(stat_node)
return stat_node.output_files[0]


def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files,
def setup_interval_coinc_inj(workflow, hdfbank,
inj_trig_files, stat_files,
background_file, veto_file, veto_name,
out_dir, pivot_ifo, fixed_ifo, tags=None):
Expand All @@ -408,52 +404,32 @@ def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files,
factor = int(workflow.cp.get_opt_tags('workflow-coincidence',
'parallelization-factor', tags))

ffiles = {}
ifiles = {}
for ifo, ffi in zip(*full_data_trig_files.categorize_by_attr('ifo')):
ffiles[ifo] = ffi[0]
for ifo, ifi in zip(*inj_trig_files.categorize_by_attr('ifo')):
ifiles[ifo] = ifi[0]

injinj_files = FileList()
injfull_files = FileList()
fullinj_files = FileList()
# For the injfull and fullinj separation we take the pivot_ifo on one side,
# and the rest that are attached to the fixed_ifo on the other side

for ifo in ifiles: # ifiles is keyed on ifo
if ifo == pivot_ifo:
injinj_files.append(ifiles[ifo])
injfull_files.append(ifiles[ifo])
fullinj_files.append(ffiles[ifo])
else:
injinj_files.append(ifiles[ifo])
injfull_files.append(ffiles[ifo])
fullinj_files.append(ifiles[ifo])

combo = [(injinj_files, "injinj"),
(injfull_files, "injfull"),
(fullinj_files, "fullinj"),
]
bg_files = {'injinj':[], 'injfull':[], 'fullinj':[]}

for trig_files, ctag in combo:
findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
'coinc',
ifos=ifiles.keys(),
tags=tags + [ctag],
out_dir=out_dir)
for i in range(factor):
group_str = '%s/%s' % (i, factor)
coinc_node = findcoinc_exe.create_node(trig_files, hdfbank,
stat_files,
veto_file, veto_name,
group_str,
pivot_ifo,
fixed_ifo,
tags=['JOB'+str(i)])

bg_files[ctag] += coinc_node.output_files
workflow.add_node(coinc_node)
injinj_files.append(ifiles[ifo])

findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
'coinc',
ifos=ifiles.keys(),
tags=tags + ['injinj'],
out_dir=out_dir)
bg_files = []
for i in range(factor):
group_str = '%s/%s' % (i, factor)
coinc_node = findcoinc_exe.create_node(injinj_files, hdfbank,
stat_files,
veto_file, veto_name,
group_str,
pivot_ifo,
fixed_ifo,
tags=['JOB'+str(i)])
bg_files += coinc_node.output_files
workflow.add_node(coinc_node)

logging.info('...leaving coincidence for injections')

Expand Down

0 comments on commit be2bf07

Please sign in to comment.