Skip to content

Commit 4d7a6fa

Browse files
authored
pav isolate command (#899)
* Initial isolate command * Fix get_last_test_id * Add isolate to built-in commands * Tweak parser action * Add first isolate command unit test * Ignore series and job directories when isolating * Add argument validation for isolate * Add error handling * Add unit test for isolating with archives * Ignore job and series dirs on tarball creation * Remove stray print function * Handle archive suffixes * Check for symlinks in isolate command unit tests * Add copytree_resolved function * Fix copytree_resolved * Add simple unit test for copytree_resolved * Progress towards isolate * Add more unit tests for copytree_resolved * Remove garbage * Improve copytree_resolved unit test * Fix style issue * Fix copytree_resolve unit tests * Fix bad variable names * Fix more style issues * Complete copytree_resolve unit tests * Use copytree_resolved in isolate cmd * Fix bad type annotation * Fix ignore_files argument * Output environment to file * Fix environment file path * Fix verbose output in run script * Write kickoff script to isolated test * Fix isolate command class methods * Fix _isolate method * Fix isolate command ID parsing * Fix missing import * Temporarily un-ignore job directory * Fix missing comma * Fix scheduler node min and max * Remove flatten option from copytree_resolved * Fix isolate kickoff script * Fix isolate command job name * Add run script to kickoff for pav isolate * Isolate run script * Fix missing import * Add newlines to build/run script * Fix ignore_files in copytree_resolved * Fix missing import * Fix missing import * Fix copytree_resolved symlink behavior * Fix syntax error * Fix copytree_resolved * Add more examples to copytree_resolved unit test * Fix copytree_resolved unit tests * Fix copytree_resolved * Use relative paths when creating symlinks * Style * Remove unnecessary return value * Refine isolate command unit tests * Fix isolate command unit tests * Add docstrings to unit tests * Add docstrings to isolate command * Incorporate node_range into kickoff.isolated script * Fix pav-lib.bash path * Remove accidents * Fix style issue * Add create_kickoff_script method to scheduler class * Fix style issues * Pass some unit tests * Pass logging unit test * Tweak written scripts slightly * Remove pavilion environment variables from kickoff script when isolating * Refine isolate command unit tests * Fix isolate command unit tests
1 parent 6e9e414 commit 4d7a6fa

File tree

10 files changed

+761
-103
lines changed

10 files changed

+761
-103
lines changed

lib/pavilion/cmd_utils.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import sys
99
import time
10+
import os
1011
from pathlib import Path
1112
from typing import List, TextIO, Union, Iterator, Optional
1213
from collections import defaultdict
@@ -523,18 +524,31 @@ def get_last_test_id(pav_cfg: "PavConfig", errfile: TextIO) -> Optional[TestID]:
523524
if last_series is None:
524525
return None
525526

526-
test_ids = list(last_series.tests.keys())
527+
id_pairs = list(last_series.tests.keys())
527528

528-
if len(test_ids) == 0:
529+
if len(id_pairs) == 0:
529530
output.fprint(
530531
errfile,
531532
f"Most recent series contains no tests.")
532533
return None
533534

534-
if len(test_ids) > 1:
535+
if len(id_pairs) > 1:
535536
output.fprint(
536537
errfile,
537538
f"Multiple tests exist in last series. Could not unambiguously identify last test.")
538539
return None
539540

540-
return TestID(test_ids[0])
541+
return TestID(str(id_pairs[0][1]))
542+
543+
544+
def list_files(path: Path, include_root: bool = False) -> Iterator[Path]:
545+
"""Recursively list all files in a directory, optionally including the directory itself."""
546+
547+
for root, dirs, files in os.walk(path):
548+
if include_root:
549+
yield Path(root)
550+
551+
for fname in files:
552+
yield Path(root) / fname
553+
for dname in dirs:
554+
yield Path(root) / dname

lib/pavilion/commands/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
'config': ('config', 'ConfigCommand'),
2424
'graph': ('graph', 'GraphCommand'),
2525
'group': ('group', 'GroupCommand'),
26+
'isolate': ('isolate', 'IsolateCommand'),
2627
'list': ('list_cmd', 'ListCommand'),
2728
'log': ('log', 'LogCommand'),
2829
'ls': ('ls', 'LSCommand'),

lib/pavilion/commands/isolate.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
from argparse import ArgumentParser, Namespace, Action
2+
from pathlib import Path
3+
import tarfile
4+
import sys
5+
import shutil
6+
import tempfile
7+
from typing import Iterable
8+
9+
from pavilion import output
10+
from pavilion import schedulers
11+
from pavilion.config import PavConfig
12+
from pavilion.test_run import TestRun
13+
from pavilion.test_ids import TestID
14+
from pavilion.cmd_utils import get_last_test_id, get_tests_by_id, list_files
15+
from pavilion.utils import copytree_resolved
16+
from pavilion.scriptcomposer import ScriptComposer
17+
from pavilion.errors import SchedulerPluginError
18+
from pavilion.schedulers.config import validate_config, calc_node_range
19+
from .base_classes import Command
20+
21+
22+
class IsolateCommand(Command):
23+
"""Isolates an existing test run in a form that can be run without Pavilion."""
24+
25+
IGNORE_FILES = ("series", "job")
26+
KICKOFF_FN = "kickoff.isolated"
27+
28+
def __init__(self):
29+
super().__init__(
30+
"isolate",
31+
"Isolate an existing test run.",
32+
short_help="Isolate a test run."
33+
)
34+
35+
def _setup_arguments(self, parser: ArgumentParser) -> None:
36+
"""Setup the argument parser for the isolate command."""
37+
38+
parser.add_argument(
39+
"test_id",
40+
type=TestID,
41+
nargs="?",
42+
help="test ID"
43+
)
44+
45+
parser.add_argument(
46+
"path",
47+
type=Path,
48+
help="isolation path"
49+
)
50+
51+
parser.add_argument(
52+
"-a",
53+
"--archive",
54+
action="store_true",
55+
default=False,
56+
help="archive the test"
57+
)
58+
59+
parser.add_argument(
60+
"-z",
61+
"--zip",
62+
default=False,
63+
help="compress the test archive",
64+
action="store_true"
65+
)
66+
67+
def run(self, pav_cfg: PavConfig, args: Namespace) -> int:
68+
"""Run the isolate command."""
69+
70+
if args.zip and not args.archive:
71+
output.fprint(self.errfile, "--archive must be specified to use --zip.")
72+
73+
return 1
74+
75+
test_id = args.test_id
76+
77+
if args.test_id is None:
78+
test_id = get_last_test_id(pav_cfg, self.errfile)
79+
80+
if test_id is None:
81+
output.fprint(self.errfile, "No last test found.", color=output.RED)
82+
83+
return 2
84+
85+
tests = get_tests_by_id(pav_cfg, [test_id], self.errfile)
86+
87+
if len(tests) == 0:
88+
output.fprint(self.errfile, "Could not find test '{}'".format(test_id))
89+
90+
return 3
91+
92+
elif len(tests) > 1:
93+
output.fprint(
94+
self.errfile, "Matched multiple tests. Printing file contents for first "
95+
"test only (test {})".format(tests[0].full_id),
96+
color=output.YELLOW)
97+
98+
return 4
99+
100+
test = next(iter(tests))
101+
102+
return self._isolate(pav_cfg, test, args.path, args.archive, args.zip)
103+
104+
@classmethod
105+
def _isolate(cls, pav_cfg: PavConfig, test: TestRun, dest: Path, archive: bool,
106+
zip: bool) -> int:
107+
"""Given a test run and a destination path, isolate that test run, optionally
108+
creating a tarball."""
109+
110+
if not test.path.is_dir():
111+
output.fprint(sys.stderr, "Directory '{}' does not exist."
112+
.format(test.path.as_posix()), color=output.RED)
113+
114+
return 5
115+
116+
if dest.exists():
117+
output.fprint(
118+
sys.stderr,
119+
f"Unable to isolate test {test.id}. Destination {dest} already exists.",
120+
color=output.RED)
121+
122+
return 6
123+
124+
if archive:
125+
cls._write_tarball(pav_cfg,
126+
test,
127+
dest,
128+
zip,
129+
cls.IGNORE_FILES)
130+
131+
else:
132+
try:
133+
copytree_resolved(test.path, dest, ignore_files=cls.IGNORE_FILES)
134+
except OSError as err:
135+
output.fprint(
136+
sys.stderr,
137+
f"Unable to isolate test {test.id} at {dest}: {err}",
138+
color=output.RED)
139+
140+
return 8
141+
142+
pav_lib_bash = pav_cfg.pav_root / 'bin' / TestRun.PAV_LIB_FN
143+
shutil.copyfile(pav_lib_bash, dest / TestRun.PAV_LIB_FN)
144+
145+
cls._write_kickoff_script(pav_cfg, test, dest / cls.KICKOFF_FN)
146+
147+
return 0
148+
149+
@classmethod
150+
def _write_tarball(cls, pav_cfg: PavConfig, test: TestRun, dest: Path, zip: bool,
151+
ignore_files: Iterable[str]) -> None:
152+
"""Given a test run object, create a tarball of its run directory in the specified
153+
location."""
154+
155+
if zip:
156+
if len(dest.suffixes) == 0:
157+
dest = dest.with_suffix(".tgz")
158+
159+
modestr = "w:gz"
160+
else:
161+
if len(dest.suffixes) == 0:
162+
dest = dest.with_suffix(".tar")
163+
164+
modestr = "w:"
165+
166+
with tempfile.TemporaryDirectory() as tmp:
167+
tmp = Path(tmp)
168+
tmp_dest = tmp / dest.stem
169+
tmp_dest.mkdir()
170+
copytree_resolved(test.path, tmp_dest, ignore_files=ignore_files)
171+
172+
# Copy Pavilion bash library into tarball
173+
pav_lib_bash = pav_cfg.pav_root / 'bin' / TestRun.PAV_LIB_FN
174+
shutil.copyfile(pav_lib_bash, tmp_dest / TestRun.PAV_LIB_FN)
175+
176+
cls._write_kickoff_script(pav_cfg, test, tmp_dest / cls.KICKOFF_FN)
177+
178+
try:
179+
with tarfile.open(dest, modestr) as tarf:
180+
for fname in list_files(tmp):
181+
tarf.add(
182+
fname,
183+
arcname=fname.relative_to(tmp),
184+
recursive=False)
185+
except (tarfile.TarError, OSError):
186+
output.fprint(
187+
sys.stderr,
188+
f"Unable to isolate test {test.id} at {dest}.",
189+
color=output.RED)
190+
191+
return 7
192+
193+
@classmethod
194+
def _write_kickoff_script(cls, pav_cfg: PavConfig, test: TestRun, script_path: Path) -> None:
195+
"""Write a special kickoff script that can be used to run the given test independently of
196+
Pavilion."""
197+
198+
try:
199+
sched = schedulers.get_plugin(test.scheduler)
200+
except SchedulerPluginError:
201+
output.fprint(
202+
sys.stderr,
203+
f"Unable to generate kickoff script for test {test_id}: unable to load scheduler"
204+
f" {test.scheduler}."
205+
)
206+
return 9
207+
208+
sched_config = validate_config(test.config['schedule'])
209+
node_range = calc_node_range(sched_config, sched_config['cluster_info']['node_count'])
210+
211+
script = sched.create_kickoff_script(
212+
pav_cfg,
213+
test,
214+
isolate=True)
215+
216+
script.write(script_path)

0 commit comments

Comments
 (0)