|
| 1 | +"""Script which injects a run number in every event of every tree in a file or |
| 2 | +a list of files. |
| 3 | +""" |
| 4 | + |
| 5 | +import os |
| 6 | +import argparse |
| 7 | +import tempfile |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +from tqdm import tqdm |
| 11 | +from ROOT import TFile # pylint: disable=E0611 |
| 12 | +from larcv import larcv # pylint: disable=W0611 |
| 13 | + |
| 14 | +# LArCV IO Manager configuration string |
| 15 | +CFG = """ |
| 16 | +IOManager: { |
| 17 | + Verbosity : 4 |
| 18 | + Name : "OutIO" |
| 19 | + IOMode : 2 |
| 20 | + InputFiles : [INPUT_PATH] |
| 21 | + OutFileName : OUTPUT_PATH |
| 22 | +} |
| 23 | +""" |
| 24 | + |
| 25 | + |
| 26 | +def initialize_manager(file_path, dest, overwrite, suffix): |
| 27 | + """Initialize an IOManager object given a configuration. |
| 28 | +
|
| 29 | + Parameters |
| 30 | + ---------- |
| 31 | + file_path : str |
| 32 | + Path to the input file |
| 33 | +
|
| 34 | + Returns |
| 35 | + ------- |
| 36 | + larcv.IOManager |
| 37 | + IOManager object |
| 38 | + """ |
| 39 | + # If the destination is provided, direct the output file there |
| 40 | + out_path = file_path |
| 41 | + if dest is not None: |
| 42 | + base = os.path.basename(file_path) |
| 43 | + out_path = f'{dest}/{base}' |
| 44 | + |
| 45 | + # If a suffix is provided, append |
| 46 | + assert suffix is None or not overwrite, ( |
| 47 | + "No point in providing a suffix if the original file is overwritten.") |
| 48 | + if suffix is not None: |
| 49 | + out_path = out_path.replace('.root', f'_{suffix}.root') |
| 50 | + elif overwrite: |
| 51 | + out_path = out_path.replace('.root', '_tmp.root') |
| 52 | + |
| 53 | + # Check that the output file does is not the same as the original file |
| 54 | + if file_path == out_path: |
| 55 | + raise ValueError( |
| 56 | + "The input file name and the output file name are the same. " |
| 57 | + "This is not allowed by the LArCV IOManager.") |
| 58 | + |
| 59 | + # Update the configuration with the input/output file names |
| 60 | + cfg = CFG |
| 61 | + cfg = cfg.replace('INPUT_PATH', file_path) |
| 62 | + cfg = cfg.replace('OUTPUT_PATH', out_path) |
| 63 | + |
| 64 | + # Create a temporary text file with the configuration |
| 65 | + tmp = tempfile.NamedTemporaryFile('w') |
| 66 | + tmp.write(cfg) |
| 67 | + tmp.flush() |
| 68 | + |
| 69 | + # Initialize the IOManager |
| 70 | + manager = larcv.IOManager(tmp.name) |
| 71 | + manager.initialize() |
| 72 | + |
| 73 | + return manager, out_path |
| 74 | + |
| 75 | + |
| 76 | +def main(source, source_list, dest, overwrite, run_number, suffix): |
| 77 | + """Checks the output of the SPINE process. |
| 78 | +
|
| 79 | + The script loops over the input files, fetch the list of keys in the file |
| 80 | + and injects a run number of each event in each file. |
| 81 | +
|
| 82 | + .. code-block:: bash |
| 83 | +
|
| 84 | + $ python3 bin/inject_run_number.py -S file_list.txt |
| 85 | + --overwrite --run_number 123 |
| 86 | +
|
| 87 | + Parameters |
| 88 | + ---------- |
| 89 | + source : List[str] |
| 90 | + List of paths to the input files |
| 91 | + source_list : str |
| 92 | + Path to a text file containing a list of data file paths |
| 93 | + dest : str |
| 94 | + Destination folder to write the files to |
| 95 | + overwrite : bool |
| 96 | + If `True`, overwrite the original files |
| 97 | + run_number : int |
| 98 | + Run number to inject in the input file list. If it is specied as -1, |
| 99 | + each file is assigned a unique run number |
| 100 | + suffix : str |
| 101 | + String to append to the end of the input file names to form the name |
| 102 | + of the output file with the updated run numbers |
| 103 | + """ |
| 104 | + # If using source list, read it in |
| 105 | + if source_list is not None: |
| 106 | + with open(source_list, 'r', encoding='utf-8') as f: |
| 107 | + source = f.read().splitlines() |
| 108 | + |
| 109 | + # Initialize the output text file |
| 110 | + #out_file = open(output, 'w', encoding='utf-8') |
| 111 | + |
| 112 | + # Loop over the list of files in the input |
| 113 | + print("\nUpdating the run numbers of input files.") |
| 114 | + for idx, file_path in enumerate(tqdm(source)): |
| 115 | + # Initialize the input/output processes |
| 116 | + io, out_path = initialize_manager(file_path, dest, overwrite, suffix) |
| 117 | + |
| 118 | + # Loop over entries, set the run number for every data product |
| 119 | + num_entries = io.get_n_entries() |
| 120 | + run = run_number if run_number > -1 else idx |
| 121 | + for e in range(num_entries): |
| 122 | + # Read existing content |
| 123 | + io.read_entry(e) |
| 124 | + |
| 125 | + # Update the run number |
| 126 | + io.set_id(run, 0, e + 1) |
| 127 | + |
| 128 | + # Save |
| 129 | + io.save_entry() |
| 130 | + |
| 131 | + # Finalize |
| 132 | + io.finalize() |
| 133 | + |
| 134 | + # If needed move the output file to where the |
| 135 | + if overwrite: |
| 136 | + os.rename(out_path, file_path) |
| 137 | + |
| 138 | + |
| 139 | +if __name__ == "__main__": |
| 140 | + # Parse the command-line arguments |
| 141 | + parser = argparse.ArgumentParser(description="Check dataset validity") |
| 142 | + |
| 143 | + group = parser.add_mutually_exclusive_group(required=True) |
| 144 | + group.add_argument('--source', '-s', |
| 145 | + help='Path or list of paths to data files', |
| 146 | + type=str, nargs="+") |
| 147 | + group.add_argument('--source-list', '-S', |
| 148 | + help='Path to a text file of data file paths', |
| 149 | + type=str) |
| 150 | + |
| 151 | + group = parser.add_mutually_exclusive_group(required=True) |
| 152 | + group.add_argument('--dest', |
| 153 | + help='Destination folder for the output file', |
| 154 | + type=str) |
| 155 | + group.add_argument('--overwrite', |
| 156 | + help='Overwrite the input file with the output file', |
| 157 | + action='store_true') |
| 158 | + |
| 159 | + parser.add_argument('--run-number', |
| 160 | + help='Run number to assign to every input file', |
| 161 | + type=int, required=True) |
| 162 | + |
| 163 | + parser.add_argument('--suffix', |
| 164 | + help='Suffix to append to the input file names', |
| 165 | + type=str) |
| 166 | + |
| 167 | + args = parser.parse_args() |
| 168 | + |
| 169 | + # Execute the main function |
| 170 | + main(args.source, args.source_list, args.dest, args.overwrite, |
| 171 | + args.run_number, args.suffix) |
0 commit comments