Fixing a simple plotting but and reducing statmap inj job size so tha…

…t they can be run with the lower disk requirements in the workflow. The later needs to be tested.
bhooshan-gadre · Jul 6, 2023 · be2bf07 · be2bf07
1 parent 27cc987
commit be2bf07
Show file tree

Hide file tree

Showing 6 changed files with 55 additions and 137 deletions.
diff --git a/bin/all_sky_search/pycbc_add_statmap b/bin/all_sky_search/pycbc_add_statmap
@@ -226,7 +226,7 @@ for f_in in files:
                 bg_exc_trig_ids[ifo] = np.concatenate([bg_exc_trig_ids[ifo],
                     -1 * np.ones_like(f_in['background_exc/stat'][:],
                     dtype=int)])
-n_triggers = f['foreground/ifar'].size
+n_triggers = f['foreground/stat'].size
 logging.info('{} foreground events before clustering'.format(n_triggers))
 
 for ifo in all_ifos:
@@ -266,15 +266,19 @@ def filter_dataset(h5file, name, idx):
 # multiple files
 for key in f['foreground'].keys():
     if key not in all_ifos:
+        if f['foreground/%s' % key].size == 0:
+            # This is an indicator that the column was specifically
+            # not calculated
+            logging.info('foreground/%s dataset is being ignored', key)
+            continue
         filter_dataset(f, 'foreground/%s' % key, cidx)
     else:  # key is an ifo
         for k in f['foreground/%s' % key].keys():
             filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)
 
-fg_ifar_init = f['foreground/ifar'][:]
 fg_ifar_exc_init = f['foreground/ifar_exc'][:]
 
-n_triggers = fg_ifar_init.size
+n_triggers = fg_ifar_exc_init.size
 
 logging.info('Calculating event times to determine which types of coinc '
              'are available')

diff --git a/bin/all_sky_search/pycbc_coinc_statmap_inj b/bin/all_sky_search/pycbc_coinc_statmap_inj
@@ -19,21 +19,12 @@ parser.add_argument('--cluster-window', type=float, default=10,
                          'events [default=10s]')
 parser.add_argument('--zero-lag-coincs', nargs='+',
                     help='Files containing the injection zerolag coincidences')
-parser.add_argument('--mixed-coincs-inj-full', nargs='+',
-                    help='Files containing the mixed injection/clean data '
-                         'time slides')
-parser.add_argument('--mixed-coincs-full-inj', nargs='+',
-                    help='Files containing the mixed clean/injection data '
-                         'time slides')
 parser.add_argument('--full-data-background',
                     help='background file from full data for use in analyzing '
                          'injection coincs')
 parser.add_argument('--veto-window', type=float, default=.1,
                     help='Time around each zerolag trigger to window out '
                          '[default=.1s]')
-parser.add_argument('--ranking-statistic-threshold', type=float,
-                    help='Minimum value of the ranking statistic to calculate'
-                         ' a unique inclusive background.')
 parser.add_argument('--ifos', nargs='+',
                     help='List of ifos used in these coincidence files')
 significance.insert_significance_option_group(parser)
@@ -54,14 +45,6 @@ logging.info("Loading coinc zerolag triggers")
 zdata = pycbc.io.MultiifoStatmapData(files=args.zero_lag_coincs, ifos=args.ifos)
 zdata = zdata.cluster(window)
 
-logging.info("Loading coinc full inj triggers")
-fidata = pycbc.io.MultiifoStatmapData(files=args.mixed_coincs_full_inj,
-                                      ifos=args.ifos).cluster(window)
-
-logging.info("Loading coinc inj full triggers")
-ifdata = pycbc.io.MultiifoStatmapData(files=args.mixed_coincs_inj_full,
-                                      ifos=args.ifos).cluster(window)
-
 f = h5py.File(args.output_file, "w")
 
 f.attrs['num_of_ifos'] = zdata.attrs['num_of_ifos']
@@ -118,59 +101,17 @@ if len(zdata) > 0:
     ifar = numpy.zeros(len(ftimes), dtype=numpy.float32)
     fap = numpy.zeros(len(ftimes), dtype=numpy.float32)
 
-    # We are relying on the injection data set to be the first one,
-    # this is determined
-    # by the argument order to pycbc_coinc_findtrigs
-    pivot_ifo = zdata.attrs['pivot']
-    ifsort = ifdata.data['%s/time' % pivot_ifo].argsort()
-    ifsorted = ifdata.data['%s/time' % pivot_ifo][ifsort]
-    if_start, if_end = numpy.searchsorted(ifsorted, start), \
-                       numpy.searchsorted(ifsorted, end)
-
-    fisort = fidata.data['%s/time' % pivot_ifo].argsort()
-    fisorted = fidata.data['%s/time' % pivot_ifo][fisort]
-    fi_start, fi_end = numpy.searchsorted(fisorted, start), \
-                       numpy.searchsorted(fisorted, end)
-
     # most of the triggers are here so presort to speed up later sorting
     bsort = back_stat.argsort()
     dec_fac = dec_fac[bsort]
     back_stat = back_stat[bsort]
 
-    for i, fstat in enumerate(zdata.stat):
-        # If the trigger is quiet enough, then don't calculate a separate
-        # background type, as it would not be significantly different
-        if args.ranking_statistic_threshold and \
-                    fstat < args.ranking_statistic_threshold:
-            fnlouder[i] = fnlouder_exc[i]
-            ifar[i] = ifar_exc[i]
-            fap[i] = fap_exc[i]
-            continue
-
-        v1 = fisort[fi_start[i]:fi_end[i]]
-        v2 = ifsort[if_start[i]:if_end[i]]
-
-        inj_stat = numpy.concatenate(
-                     [ifdata.stat[v2], fidata.stat[v1], back_stat])
-        inj_dec = numpy.concatenate(
-                     [numpy.repeat(1, len(v1) + len(v2)), dec_fac])
-
-        _, fnlouder[i] = significance.get_n_louder(
-            inj_stat,
-            fstat,
-            inj_dec,
-            **significance_dict[ifo_key])
-
-        ifar[i] = background_time / (fnlouder[i] + 1)
-        fap[i] = 1 - numpy.exp(- coinc_time / ifar[i])
-        logging.info('processed %s, %s' % (i, fstat))
-
-    f['foreground/ifar'] = conv.sec_to_year(ifar)
-    f['foreground/fap'] = fap
 else:
     f['foreground/ifar_exc'] = numpy.array([])
     f['foreground/fap_exc'] = numpy.array([])
-    f['foreground/ifar'] = numpy.array([])
-    f['foreground/fap'] = numpy.array([])
+
+# Indicate that inclusive IFAR is not calculated
+f['foreground/ifar'] = numpy.array([])
+f['foreground/fap'] = numpy.array([])
 
 logging.info("Done")
diff --git a/bin/all_sky_search/pycbc_sngls_statmap_inj b/bin/all_sky_search/pycbc_sngls_statmap_inj
@@ -51,11 +51,6 @@ parser.add_argument('--cluster-window', type=float, default=10,
 parser.add_argument('--veto-window', type=float, default=.1,
                     help='Time around each zerolag trigger to window out '
                          '[default=.1s]')
-parser.add_argument('--background-removal-statistic', type=float,
-                    default=numpy.inf,
-                    help="Remove any triggers with statistic higher "
-                         "than this from the background. "
-                          "Default=None removed")
 significance.insert_significance_option_group(parser)
 parser.add_argument('--output-file')
 args = parser.parse_args()
@@ -71,7 +66,7 @@ ifo = args.ifos[0]
 assert ifo + '/time' in all_trigs.data
 
 logging.info("We have %s triggers" % len(all_trigs.stat))
-logging.info("Clustering coinc triggers (inclusive of zerolag)")
+logging.info("Clustering triggers")
 all_trigs = all_trigs.cluster(args.cluster_window)
 
 logging.info('getting background statistics')
@@ -121,6 +116,8 @@ back_cnum, fnlouder = significance.get_n_louder(
     bkg_dec_facs,
     **significance_dict[ifo])
 
+bg_ifar = fg_time / (back_cnum + 1)
+
 # Cumulative array of exclusive background triggers and the number
 # of exclusive background triggers louder than each foreground trigger
 back_cnum_exc, fnlouder_exc = significance.get_n_louder(
@@ -129,23 +126,23 @@ back_cnum_exc, fnlouder_exc = significance.get_n_louder(
     bkg_exc_dec_facs,
     **significance_dict[ifo])
 
-fg_ifar = fg_time / (fnlouder + 1)
-bg_ifar = fg_time / (back_cnum + 1)
 fg_ifar_exc = fg_time_exc / (fnlouder_exc + 1)
 bg_ifar_exc = fg_time_exc / (back_cnum_exc + 1)
 
+# Not sure if these are actually used later as we
+# don't do inclusive
 f['background/ifar'] = conv.sec_to_year(bg_ifar)
-f['background_exc/ifar'] = conv.sec_to_year(bg_ifar_exc)
 f.attrs['background_time'] = fg_time
 f.attrs['foreground_time'] = fg_time
+
+# Indicate not doing inclusive IFAR/FAP
+f['foreground/ifar'] = numpy.array([])
+f['foreground/fap'] = numpy.array([])
+
+f['background_exc/ifar'] = conv.sec_to_year(bg_ifar_exc)
 f.attrs['background_time_exc'] = fg_time_exc
 f.attrs['foreground_time_exc'] = fg_time_exc
 
-logging.info("calculating foreground ifar/fap values")
-
-fap = 1 - numpy.exp(- fg_time / fg_ifar)
-f['foreground/ifar'] = conv.sec_to_year(fg_ifar)
-f['foreground/fap'] = fap
 fap_exc = 1 - numpy.exp(- fg_time_exc / fg_ifar_exc)
 f['foreground/ifar_exc'] = conv.sec_to_year(fg_ifar_exc)
 f['foreground/fap_exc'] = fap_exc

diff --git a/bin/plotting/pycbc_plot_background_coincs b/bin/plotting/pycbc_plot_background_coincs
@@ -26,8 +26,8 @@ parser.add_argument('--max-z', type=float, help='Optional maximum z value')
 parser.add_argument('--grid-size', default=100, help="Number of hexbins", type=int)
 parser.add_argument('--dpi', type=int, default=200)
 parser.add_argument('--output-file')
-args = parser.parse_args()     
-                    
+args = parser.parse_args()
+
 f = h5py.File(args.coinc_file)
 bdata = f['background_exc']
 x = get_var(bdata, args.x_var)
@@ -46,8 +46,8 @@ if args.max_z is not None:
 fig = pylab.figure()
 ax = fig.gca()
 if args.z_var == 'density':
-    hb = ax.hexbin(x, y, norm=LogNorm(), vmin=1, **hexbin_style)
-    fig.colorbar(hb, ticks=LogLocator(subs=range(10)))    
+    hb = ax.hexbin(x, y, norm=LogNorm(), **hexbin_style)
+    fig.colorbar(hb, ticks=LogLocator(subs=range(10)))
 elif args.z_var == 'ranking_stat':
     hb = ax.hexbin(x, y, C=bdata['stat'][:], reduce_C_function=max, **hexbin_style)
     fig.colorbar(hb)
@@ -56,4 +56,4 @@ ax.set_xlabel(args.x_var)
 ax.set_ylabel(args.y_var)
 ax.set_title("Coincident Background Triggers, %s" % args.z_var)
 fig.savefig(args.output_file, dpi=args.dpi)
-                          
+
diff --git a/bin/plotting/pycbc_plot_singles_vs_params b/bin/plotting/pycbc_plot_singles_vs_params
@@ -84,7 +84,7 @@ opts = parser.parse_args()
 
 logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
 
-snr_filter = '(self.snr>%f)' % (opts.min_snr) if opts.min_snr > 0. else None 
+snr_filter = '(self.snr>%f)' % (opts.min_snr) if opts.min_snr > 0. else None
 filts = [f for f in [snr_filter, opts.filter_string] if f is not None]
 filter_func = ' & '.join(filts) if filts else None
 
@@ -127,7 +127,7 @@ ax = fig.gca()
 
 if opts.z_var == 'density':
     norm = LogNorm()
-    hb = ax.hexbin(x, y, norm=norm, vmin=1, **hexbin_style)
+    hb = ax.hexbin(x, y, norm=norm, **hexbin_style)
     fig.colorbar(hb, ticks=LogLocator(subs=range(10)))
 elif opts.z_var in ranking.sngls_ranking_function_dict:
     cb_style = {}

diff --git a/pycbc/workflow/coincidence.py b/pycbc/workflow/coincidence.py
@@ -130,23 +130,21 @@ class PyCBCStatMapInjExecutable(Executable):
     """Calculate FAP, IFAR, etc"""
 
     current_retention_level = Executable.MERGED_TRIGGERS
-    def create_node(self, zerolag, full_data,
-                    injfull, fullinj, ifos, tags=None):
+    def create_node(self, coinc_files, full_data,
+            ifos, tags=None):
         if tags is None:
             tags = []
-        segs = zerolag.get_times_covered_by_files()
+        segs = coinc_files.get_times_covered_by_files()
         seg = segments.segment(segs[0][0], segs[-1][1])
 
         node = Node(self)
-        node.add_input_list_opt('--zero-lag-coincs', zerolag)
+        node.add_input_list_opt('--zero-lag-coincs', coinc_files)
 
         if isinstance(full_data, list):
             node.add_input_list_opt('--full-data-background', full_data)
         else:
             node.add_input_opt('--full-data-background', full_data)
 
-        node.add_input_list_opt('--mixed-coincs-inj-full', injfull)
-        node.add_input_list_opt('--mixed-coincs-full-inj', fullinj)
         node.add_opt('--ifos', ifos)
         node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags)
         return node
@@ -383,16 +381,14 @@ def setup_statmap_inj(workflow, ifos, coinc_files, background_file,
                                             tags=tags, out_dir=out_dir)
 
     ifolist = ' '.join(ifos)
-    stat_node = statmap_exe.create_node(FileList(coinc_files['injinj']),
+    stat_node = statmap_exe.create_node(FileList(coinc_files),
                                         background_file,
-                                        FileList(coinc_files['injfull']),
-                                        FileList(coinc_files['fullinj']),
                                         ifolist)
     workflow.add_node(stat_node)
     return stat_node.output_files[0]
 
 
-def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files,
+def setup_interval_coinc_inj(workflow, hdfbank,
                              inj_trig_files, stat_files,
                              background_file, veto_file, veto_name,
                              out_dir, pivot_ifo, fixed_ifo, tags=None):
@@ -408,52 +404,32 @@ def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files,
     factor = int(workflow.cp.get_opt_tags('workflow-coincidence',
                                           'parallelization-factor', tags))
 
-    ffiles = {}
     ifiles = {}
-    for ifo, ffi in zip(*full_data_trig_files.categorize_by_attr('ifo')):
-        ffiles[ifo] = ffi[0]
     for ifo, ifi in zip(*inj_trig_files.categorize_by_attr('ifo')):
         ifiles[ifo] = ifi[0]
 
     injinj_files = FileList()
-    injfull_files = FileList()
-    fullinj_files = FileList()
-    # For the injfull and fullinj separation we take the pivot_ifo on one side,
-    # and the rest that are attached to the fixed_ifo on the other side
+
     for ifo in ifiles:  # ifiles is keyed on ifo
-        if ifo == pivot_ifo:
-            injinj_files.append(ifiles[ifo])
-            injfull_files.append(ifiles[ifo])
-            fullinj_files.append(ffiles[ifo])
-        else:
-            injinj_files.append(ifiles[ifo])
-            injfull_files.append(ffiles[ifo])
-            fullinj_files.append(ifiles[ifo])
-
-    combo = [(injinj_files, "injinj"),
-             (injfull_files, "injfull"),
-             (fullinj_files, "fullinj"),
-            ]
-    bg_files = {'injinj':[], 'injfull':[], 'fullinj':[]}
-
-    for trig_files, ctag in combo:
-        findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
-                                                 'coinc',
-                                                 ifos=ifiles.keys(),
-                                                 tags=tags + [ctag],
-                                                 out_dir=out_dir)
-        for i in range(factor):
-            group_str = '%s/%s' % (i, factor)
-            coinc_node = findcoinc_exe.create_node(trig_files, hdfbank,
-                                                   stat_files,
-                                                   veto_file, veto_name,
-                                                   group_str,
-                                                   pivot_ifo,
-                                                   fixed_ifo,
-                                                   tags=['JOB'+str(i)])
-
-            bg_files[ctag] += coinc_node.output_files
-            workflow.add_node(coinc_node)
+        injinj_files.append(ifiles[ifo])
+
+    findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
+                                            'coinc',
+                                            ifos=ifiles.keys(),
+                                            tags=tags + ['injinj'],
+                                            out_dir=out_dir)
+    bg_files = []
+    for i in range(factor):
+        group_str = '%s/%s' % (i, factor)
+        coinc_node = findcoinc_exe.create_node(injinj_files, hdfbank,
+                                               stat_files,
+                                               veto_file, veto_name,
+                                               group_str,
+                                               pivot_ifo,
+                                               fixed_ifo,
+                                               tags=['JOB'+str(i)])
+        bg_files += coinc_node.output_files
+        workflow.add_node(coinc_node)
 
     logging.info('...leaving coincidence for injections')