Skip to content

Commit e07d4e6

Browse files
committed
feat: CLI easy-slurm command
1 parent f31e6d2 commit e07d4e6

File tree

5 files changed

+263
-18
lines changed

5 files changed

+263
-18
lines changed

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ pip install easy-slurm
2020

2121
## Usage
2222

23+
Easy Slurm provides a CLI / YAML interface, as well as a Python interface.
24+
25+
26+
### Python API
27+
2328
To submit a job, simply fill in the various parameters shown in the example below.
2429

2530
```python
@@ -62,8 +67,18 @@ All job files will be kept in the `job_dir` directory. Provide directory paths t
6267

6368
Full examples can be found [here](./examples), including a [simple example](./examples/simple) to run "training epochs" on a cluster.
6469

70+
71+
### CLI / YAML Interface
72+
6573
Jobs can also be fully configured using YAML files. See [`examples/simple_yaml`](./examples/simple_yaml).
6674

75+
<table>
76+
<tr>
77+
<td><code>job.yaml</code></td>
78+
</tr>
79+
<tr>
80+
<td>
81+
6782
```yaml
6883
job_dir: "$HOME/jobs/{date}-{job_name}"
6984
src: ["./src", "./assets"]
@@ -85,6 +100,22 @@ sbatch_options:
85100
nodes: 1
86101
resubmit_limit: 64 # Automatic resubmission limit.
87102
```
103+
</td>
104+
</tr>
105+
</table>
106+
107+
Then submit the job using:
108+
109+
```bash
110+
easy-slurm --job="job.yaml"
111+
```
112+
113+
One can override the parameters in the YAML file using command-line arguments. For example:
114+
115+
```bash
116+
easy-slurm --job="job.yaml" --src='["./src", "./assets", "./extra"]'
117+
```
118+
88119

89120
### Formatting
90121

@@ -109,6 +140,12 @@ easy_slurm.submit_job(
109140

110141
This helps in automatically creating descriptive, human-readable job names.
111142

143+
For the CLI / YAML interface, the same can be achieved using the `--config` argument:
144+
145+
```bash
146+
easy-slurm --job="job.yaml" --config="config.yaml"
147+
```
148+
112149
See the [documentation] for more information and examples.
113150

114151
[documentation]: https://yodaembedding.github.io/easy-slurm/

easy_slurm/run/submit.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import argparse
2+
import json
3+
from textwrap import dedent
4+
5+
import yaml
6+
7+
from easy_slurm.jobs import submit_job
8+
9+
10+
def format_help(help_text: str) -> str:
11+
return dedent(help_text).strip()
12+
13+
14+
ARGUMENTS = [
15+
{
16+
"args": ["--job"],
17+
"type": str,
18+
"help": format_help(
19+
"""
20+
Path to job config file.
21+
"""
22+
),
23+
},
24+
{
25+
"args": ["--job_dir"],
26+
"type": str,
27+
"help": format_help(
28+
"""
29+
Path to directory to keep all job files including
30+
``src.tar`` and auto-generated ``job.sh``.
31+
"""
32+
),
33+
},
34+
{
35+
"args": ["--src"],
36+
"type": json.loads,
37+
"help": format_help(
38+
"""
39+
Path to directories containing only source code.
40+
These will be archived in ``$JOB_DIR/src.tar`` and
41+
extracted during job run into ``$SLURM_TMPDIR``.
42+
"""
43+
),
44+
},
45+
{
46+
"args": ["--on_run"],
47+
"type": str,
48+
"help": format_help(
49+
"""
50+
Bash code executed in "on_run" stage, but only for new jobs
51+
that are running for the first time.
52+
Must be a single command only.
53+
Optionally, the command may gracefully handle interrupts.
54+
"""
55+
),
56+
},
57+
{
58+
"args": ["--on_run_resume"],
59+
"type": str,
60+
"help": format_help(
61+
"""
62+
Bash code executed in "on_run" stage, but only for jobs that
63+
are resuming from previous incomplete runs.
64+
Must be a single command only.
65+
Optionally, the command may gracefully handle interrupts.
66+
"""
67+
),
68+
},
69+
{
70+
"args": ["--setup"],
71+
"type": str,
72+
"help": format_help(
73+
"""
74+
Bash code executed in "setup" stage, but only for new jobs
75+
that are running for the first time.
76+
"""
77+
),
78+
},
79+
{
80+
"args": ["--setup_resume"],
81+
"type": str,
82+
"help": format_help(
83+
"""
84+
Bash code executed in "setup" stage, but only for jobs that
85+
are resuming from previous incomplete runs.
86+
To reuse the code from ``setup``, simply set this to
87+
``"setup"``, which calls the code inside the ``setup``
88+
function.
89+
"""
90+
),
91+
},
92+
{
93+
"args": ["--teardown"],
94+
"type": str,
95+
"help": format_help(
96+
"""
97+
Bash code executed in "teardown" stage.
98+
"""
99+
),
100+
},
101+
{
102+
"args": ["--sbatch_options"],
103+
"type": json.loads,
104+
"help": format_help(
105+
"""
106+
Dictionary of options to pass to sbatch.
107+
"""
108+
),
109+
},
110+
{
111+
"args": ["--cleanup_seconds"],
112+
"type": int,
113+
"help": format_help(
114+
"""
115+
Interrupts a job n seconds before timeout to run cleanup
116+
tasks (teardown, auto-schedule new job).
117+
Default is 120 seconds.
118+
"""
119+
),
120+
},
121+
{
122+
"args": ["--submit"],
123+
"type": bool,
124+
"help": format_help(
125+
"""
126+
Submit created job to scheduler. Set this to ``False`` if
127+
you are manually submitting the created ``$JOB_DIR`` later.
128+
Default is ``True``.
129+
"""
130+
),
131+
},
132+
{
133+
"args": ["--interactive"],
134+
"type": bool,
135+
"help": format_help(
136+
"""
137+
Run as a blocking interactive job. Default is ``False``.
138+
"""
139+
),
140+
},
141+
{
142+
"args": ["--resubmit_limit"],
143+
"type": int,
144+
"help": format_help(
145+
"""
146+
Maximum number of times to auto-submit a job for "resume".
147+
(Not entirely unlike submitting a resume for a job.)
148+
Default is 64 resubmissions.
149+
"""
150+
),
151+
},
152+
{
153+
"args": ["--config"],
154+
"type": str,
155+
"help": format_help(
156+
"""
157+
Path to config file for formatting.
158+
"""
159+
),
160+
},
161+
]
162+
163+
164+
JOB_CONFIG_KEYS = [
165+
"job_dir",
166+
"src",
167+
"on_run",
168+
"on_run_resume",
169+
"setup",
170+
"setup_resume",
171+
"teardown",
172+
"sbatch_options",
173+
"cleanup_seconds",
174+
"submit",
175+
"interactive",
176+
"resubmit_limit",
177+
"config",
178+
]
179+
180+
181+
def parse_args(argv=None):
182+
# Add hyphenated version of options.
183+
for argument in ARGUMENTS:
184+
h_args = [x.replace("_", "-") for x in argument["args"]]
185+
argument["args"].extend(x for x in h_args if x not in argument["args"])
186+
187+
parser = argparse.ArgumentParser()
188+
189+
for argument in ARGUMENTS:
190+
kwargs = {k: v for k, v in argument.items() if k != "args"}
191+
parser.add_argument(*argument["args"], **kwargs)
192+
193+
args = parser.parse_args(argv)
194+
195+
return args
196+
197+
198+
def main(argv=None):
199+
args = parse_args(argv)
200+
201+
if args.job:
202+
with open(args.job) as f:
203+
job_config = yaml.safe_load(f)
204+
205+
if args.config:
206+
with open(args.config) as f:
207+
job_config["config"] = yaml.safe_load(f)
208+
209+
job_config = {
210+
**{k: v for k, v in vars(args).items() if v is not None},
211+
**job_config,
212+
}
213+
214+
job_config = {k: v for k, v in job_config.items() if k in JOB_CONFIG_KEYS}
215+
216+
submit_job(**job_config)
217+
218+
219+
if __name__ == "__main__":
220+
main()

examples/simple_yaml/submit_job.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

examples/simple_yaml/submit_job.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
easy-slurm --job=job.yaml --config=assets/hparams.yaml

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ repository = "https://github.com/YodaEmbedding/easy-slurm"
1212
keywords = ["slurm", "sbatch"]
1313
readme = "README.md"
1414

15+
[tool.poetry.scripts]
16+
easy-slurm = "easy_slurm.run.submit:main"
17+
1518
[tool.poetry.dependencies]
1619
python = "^3.7"
1720

0 commit comments

Comments
 (0)