Skip to content

Commit

Permalink
added toil slurm cli flags
Browse files Browse the repository at this point in the history
  • Loading branch information
jfennick committed Aug 15, 2023
1 parent 88d35af commit e89544f
Showing 1 changed file with 45 additions and 3 deletions.
48 changes: 45 additions & 3 deletions src/wic/run_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,9 @@ def run_local(args: argparse.Namespace, rose_tree: RoseTree, cachedir: Optional[
# NOTE: Using --leave-outputs to disable --outdir
# See https://github.com/dnanexus/dx-cwl/issues/20
# --outdir has one or more bugs which will cause workflows to fail!!!
cmd = ['cwltool'] + parallel + quiet + cachedir_ + net + provenance + docker_cmd_ + write_summary + skip_schemas
cmd = ['cwltool'] + parallel + quiet + cachedir_ + net + write_summary + skip_schemas # + provenance + docker_cmd_
cmd += ['--leave-outputs',
'--singularity',
# '--js-console', # "Running with support for javascript console in expressions (DO NOT USE IN PRODUCTION)"
f'autogenerated/{yaml_stem}.cwl', f'autogenerated/{yaml_stem}_inputs.yml']
# TODO: Consider using the undocumented flag --fast-parser for known-good workflows,
Expand Down Expand Up @@ -152,11 +153,52 @@ def run_local(args: argparse.Namespace, rose_tree: RoseTree, cachedir: Optional[
net = ['--custom-net', args.custom_net] if args.custom_net else []
provenance = ['--provenance', 'provenance']
docker_cmd_ = [] if docker_cmd == 'docker' else ['--user-space-docker-cmd', docker_cmd]
cmd = ['toil-cwl-runner'] + net + provenance + docker_cmd_
cmd = ['toil-cwl-runner'] + net # + provenance + docker_cmd_
cmd += ['--outdir', 'outdir_toil',
# NOTE: "... the job store must be accessible by all worker nodes."
'--jobStore', f'file:./jobStore_{yaml_stem}', # NOTE: This is the equivalent of --cachedir
'--batchSystem', 'slurm',
# See https://github.com/DataBiosphere/toil/blob/d439924b4110cc572b5b996f0efa623ebb48155f/src/toil/batchSystems/slurm.py#L376
'--dont_allocate_mem',
# '--defaultMemory', '536870912', # 500MB
# The default amount of memory to request for a job (in bytes), by default is 2^31 = 2 gigabytes
# '--logDebug',
'--coalesceStatusCalls',
# Coalese status calls to prevent the batch system from
# being overloaded. Currently only supported for LSF.
# NOTE: If you do NOT use this option, the loop which polls the batch
# system for status updates becomes O(n^2) time complexity!
# NOTE: It appears --coalesceStatusCalls is now supported for lsf and slurm.
'--statePollingWait', '5', # For slurm, defaults to SchedulerTimeSlice*1.2 (default 30 seconds*1.2)
# Time, in seconds, to wait before doing a scheduler query for job state. Return cached results if within the waiting period. Only works for grid engine batch systems such as gridengine, htcondor, torque, slurm, and lsf.
'--retryCount', '1', # There appear to be random errors due to the panassas network file system.
# Number of times to retry a failing job before giving
# up and labeling job failed. default=0
'--maxCores', '128',
'--maxLocalJobs', '128', # i.e. infinity
# For batch systems that support a local queue for housekeeping jobs (Mesos, GridEngine, htcondor, lsf, slurm, torque). Specifies the maximum number of these housekeeping jobs to run on the local system. The default (equal to the
# number of cores) is a maximum of 24 concurrent local housekeeping jobs.
# '--runCwlInternalJobsOnWorkers', 'true',
# Whether to run CWL internal jobs (e.g. CWLScatter) on the worker nodes instead of the primary node. If false (default), then all such jobs are run on the primary node. Setting this to true can speed up the pipeline for very
# large workflows with many sub-workflows and/or scatters, provided that the worker pool is large enough.
'--disableAutoDeployment', # Using conda/docker/panassas NFS, so shouldn't need to deploy scripts.
# Should auto-deployment of the user script be deactivated? If True, the user script/package should be present at the same location on all workers. Default = False.
'--stats',
# Records statistics about the toil workflow to be used by 'toil stats'.
'--clusterStats', 'clusterStats.json',
# If enabled, writes out JSON resource usage statistics to a file. The default location for this file is the current working directory, but an absolute path can also be passed to specify where this file should be written. This
# options only applies when using scalable batch systems.
'--singularity',
'--workDir', 'workdir', # "This directory needs to exist on all machines running jobs."
# i.e. /run/user/$UID/coorddir This is a local /tmpfs (in-memory) NOT NFS
'--coordinationDir', 'coorddir', # "Absolute path to directory where Toil will keep state and lock files."
'--log-dir', 'logdir',
'--logFile', 'logfile',
# '--restart',
'--disableCaching',
'--disableProgress', # disable the progress bar in the terminal
# TODO: Check --clean, --cleanWorkDir, --restart
'--clean', 'always', # This effectively disables caching, but is reproducible
# '--clean', 'always', # This effectively disables caching, but is reproducible
f'autogenerated/{yaml_stem}.cwl', f'autogenerated/{yaml_stem}_inputs.yml']

print('Running ' + ' '.join(cmd))
Expand Down

0 comments on commit e89544f

Please sign in to comment.