forked from htautau/hhntup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrid-batch
executable file
·123 lines (111 loc) · 3.81 KB
/
grid-batch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
import xaod
from rootpy.extern.argparse import ArgumentParser
parser = ArgumentParser(usage="%(prog)s [args] file1,file2,file3...")
parser.add_argument('-v', '--verbose', action="store_true",
help="verbose", default=False)
parser.add_argument('-e', '--events', type=int,
help="number of events to process", default=-1)
parser.add_argument('-d', '--dataset',
help="name of dataset being processed", required=True)
parser.add_argument('-m', '--metadata',
help="YAML file containing dataset definitions", required=True)
parser.add_argument('-s', '--student',
help="the file (excluding .py extension) containing a"
"class of the same name inheriting from rootpy.batch.Student", required=True)
parser.add_argument('--profile', action='store_true',
help="profile execution time of each student's work method", default=False)
parser.add_argument('--anti-match',
help="skip files matching this expression")
parser.add_argument('--match',
help="use files matching this expression")
parser.add_argument('files', nargs='+')
args, user_args = parser.parse_known_args()
import sys
import os
import ROOT
import glob
from higgstautau.batch import ATLASSupervisor
from higgstautau import datasets
from higgstautau.datasets import ATLASFileset
from higgstautau import utils
from higgstautau import log; log = log['grid-batch']
import multiprocessing
import yaml
from configobj import ConfigObj, flatten_errors
from validate import Validator
from fnmatch import fnmatch
sys.path.insert(0,'.')
files = []
for path in args.files:
if os.path.isdir(path):
files += utils.all_files_matching(path, '*.root*')
else:
files += path.split(',')
args.files = files
files = []
for file in args.files:
if args.match:
if fnmatch(file, args.match):
keep = True
else:
keep = False
else:
keep = True
if args.anti_match:
if fnmatch(file, args.anti_match):
keep = False
if keep:
files.append(file)
args.files = files
if args.metadata.endswith('.yml'):
with open(args.metadata, 'r') as configfile:
metadata = yaml.load(configfile)
else:
configspec = os.path.splitext(args.metadata)[0]+'.spec'
if not os.path.isfile(configspec):
sys.exit('%s does not exist' % configspec)
metadata = ConfigObj(args.metadata, configspec=configspec)
validator = Validator()
result = metadata.validate(validator, preserve_errors=True)
if result != True:
for entry in flatten_errors(metadata, result):
# each entry is a tuple
section_list, key, error = entry
if key is not None:
section_list.append(key)
else:
section_list.append('[missing section]')
section_string = ', '.join(section_list)
if error == False:
error = 'Missing value or section.'
print section_string, ' = ', error
sys.exit(1)
if args.dataset not in metadata:
sys.exit("dataset %s not defined in metadata!" % args.dataset)
meta = metadata[args.dataset]
fileset = ATLASFileset(
name=args.dataset,
datatype=datasets.TYPES[meta["type"]],
year=meta.get('year', None),
treename=meta["tree"],
files=args.files,
grl=meta.get('grl', None),
tags=None,
meta=None,
properties=None
)
for key, value in meta.items():
log.info('{0} = {1}'.format(key, value))
supervisor = ATLASSupervisor(
student=args.student,
outputname=args.dataset,
files=fileset.files,
metadata=fileset,
connection=None,
gridmode=True,
grl=fileset.grl,
events=args.events,
profile=args.profile,
options=user_args)
supervisor.run()