forked from qbuat/taunet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
condor_taunet
executable file
·140 lines (116 loc) · 5 KB
/
condor_taunet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
# Imports
import sys
import os
import getpass
import argparse
# Get username
username = getpass.getuser()
# First step: parse in arguments & configuration
parser = argparse.ArgumentParser()
parser.add_argument('--no-submit', action='store_true')
parser.add_argument('--outdir', type=str, default='launch_condor')
parser.add_argument('--queue', type=str, default='tomorrow')
parser.add_argument('--ncpus', type=str, default='1')
parser.add_argument('--ngpus', type=str, default='1')
parser.add_argument('--jobname', type=str, default='0')
argsKnown, myCommand = parser.parse_known_args()
myArguments = vars(argsKnown)
print( myArguments, myCommand)
# Detect problems:
if len(myCommand) < 1:
print ("ERROR: No executable script provided")
exit()
# Read arguments
myExec = myCommand[0]
myParams = ""
if len(myCommand) > 1:
myParams = " ".join(myCommand[1:])
myPath = os.getcwd()
# Second step: Auto-generate the shell script
# Check if submission folder exists
if not os.path.isdir(os.path.join(myPath, myArguments['outdir'])):
print ("WARNING: condor submission directory does not exist: creating")
os.mkdir(os.path.join(myPath, myArguments['outdir']))
# Create job name
# For now, use sequential book-keeping as protection
jobName = myExec + '_' + myParams
jobName = jobName.replace('-', '')
jobName = jobName.replace(' ','_')
jobName = jobName.replace('/','')
jobName = jobName.replace('.','')
# Override jobname if it's been specified
if myArguments['jobname'] != '0':
jobName = myArguments['jobname']
print ("condor_taunet >>>> Will use jobName: "+jobName)
jobName += "_job0"
iteration = 0
condorPath = os.path.join(myPath, myArguments['outdir'])
while os.path.exists(os.path.join(condorPath, 'jobScript_'+jobName+'.sh') ) :
# Strip away the last iteration name
jobName = jobName[: -1 * (len(str(iteration)))]
# Increment the job number, append to job name
iteration += 1
jobName += str(iteration)
if iteration != 0:
print ("condor_taunet >>>> Using job increment: "+str(iteration))
# Create output file folder:
jobPath = os.path.join(condorPath, jobName)
if not os.path.isdir(jobPath):
os.mkdir(jobPath)
# Basic set-up into local taunet area
jobScript = []
jobScript += ['#!/usr/bin/bash']
jobScript += ['cd ' + myPath]
# jobScript += ['export ATLAS_LOCAL_ROOT_BASE=/cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase']
# jobScript += ['source ${ATLAS_LOCAL_ROOT_BASE}/user/atlasLocalSetup.sh']
# jobScript += ['lsetup "root 6.18.04-x86_64-centos7-gcc8-opt" ']
jobScript += ['export OPENBLAS_MAIN_FREE=1']
# Create a new taunet area on the tmp (allows for unique stores for each job)
jobScript += ['mkdir -p /tmp/'+username+'/'+jobName+'/taunet']
# jobScript += ['mkdir -p /tmp/'+username+'/'+jobName+'/HAPPy']
# Disgusting bash statement: only copy files under version control
# This is to prevent unnecessary copying of files
# If file is under a given folder, a new folder needs to be generated
jobScript += ['for GITFILE in $(git ls-files); do if [[ $GITFILE == *"/"* ]]; then mkdir -p /tmp/'+username+'/'+jobName+'/taunet/${GITFILE%/*}; fi; cp $GITFILE /tmp/'+username+'/'+jobName+'/taunet/$GITFILE; done']
jobScript += ['cd ..']
jobScript += ['cp -r tesenv /tmp/'+username+'/'+jobName+'/']
jobScript += ['source /tmp/'+username+'/'+jobName+'/tesenv/bin/activate']
jobScript += ['cd '+'/tmp/'+username+'/'+jobName+'/taunet']
jobScript += ['python '+myExec+' '+myParams]
jobScript += ['cp data/*.npy '+jobPath]
jobScript += ['cp *.p '+jobPath]
jobScript += ['cp data/*.npy '+jobPath]
jobScript += ['cp cache/*.h5 '+jobPath]
jobScript += ['cp data/*.p '+jobPath]
jobScript += ['cp plots/*.pdf '+jobPath]
jobScript += ['cp final_MDN/plots/*.pdf '+jobPath]
jobScript += ['cd ..']
jobScript += ['rm -rf '+jobName]
# Create file
f = open(myArguments['outdir']+ "/jobScript_" + jobName + ".sh", "w+")
for line in jobScript:
f.write(line+"\n")
f.close()
# Third step: Auto-generate the submission file
jobDef = []
jobDef += ['executable = ' + myArguments['outdir'] + '/jobScript_'+jobName+'.sh']
jobDef += ['arguments = '] #+ myParams] # Don't pass parameters, they're in the .sh
jobDef += ['output = ' + myArguments['outdir'] + '/' + jobName + '.out']
jobDef += ['error = ' + myArguments['outdir'] + '/' + jobName + '.err']
jobDef += ['log = ' + myArguments['outdir'] + '/' + jobName + '.log']
jobDef += ['+JobFlavour = "'+ myArguments['queue']+'"']
if myArguments['ncpus'] != '1':
jobDef += ['request_CPUs = '+myArguments['ncpus']]
jobDef += ['request_GPUs = '+myArguments['ngpus']]
jobDef += ['queue']
f = open(myArguments['outdir']+ "/jobDef_" + jobName + ".su", "w+")
for line in jobDef:
f.write(line+"\n")
f.close()
# Submit the actual job
if myArguments['no_submit']:
print ('Dry Run: Command would have been:')
print ('condor_submit ' + myArguments['outdir'] + '/jobDef_' + jobName + '.su')
else:
os.system('condor_submit ' + myArguments['outdir'] + '/jobDef_' + jobName + '.su')