Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

719 ability to cancel series run by other users #779

Open
wants to merge 29 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
547942e
Add try_cancel method to TestSeries
hwikle-lanl Jul 17, 2024
e910e5b
Drop cancellation file on call to TestSeries.cancel
hwikle-lanl Jul 17, 2024
a00cd74
Add unit test for series cancellation
hwikle-lanl Jul 17, 2024
ac0e45e
Tweak unit test
hwikle-lanl Jul 17, 2024
7945ffb
Progress towards cancelling series
hwikle-lanl Jul 21, 2024
56f9554
blah
hwikle-lanl Jul 23, 2024
1b6dea9
Resolve all unit test errors
hwikle-lanl Jul 24, 2024
32e42ee
Blah
hwikle-lanl Jul 29, 2024
5ee55a5
Miscellaneous changes
hwikle-lanl Jul 29, 2024
0ffc8f0
Pass invalid series test
hwikle-lanl Jul 30, 2024
ad51559
Fix error when parsing durations
hwikle-lanl Jul 31, 2024
588e450
Fix off-by-one error
hwikle-lanl Jul 31, 2024
817b027
Fix style issues
hwikle-lanl Jul 31, 2024
eabbeb4
Fix error in cancel cooldown
hwikle-lanl Jul 31, 2024
b43fa74
Progress towards solution
hwikle-lanl Aug 1, 2024
19ebc5d
Pass remaining tests
hwikle-lanl Aug 1, 2024
f3ab990
Fix error in cancel
hwikle-lanl Aug 6, 2024
74997ec
Move general functions into micro
hwikle-lanl Sep 9, 2024
fd28ddd
Replace micro module with most recent version
hwikle-lanl Sep 30, 2024
2088cda
Pass failing unittests
hwikle-lanl Sep 30, 2024
2303413
Merge series run refactor
hwikle-lanl Nov 7, 2024
afa3044
Add time limiting
hwikle-lanl Nov 7, 2024
50f51ab
Pass all unit tests
hwikle-lanl Nov 7, 2024
18944c0
Remove what is extraneous
hwikle-lanl Nov 7, 2024
95160cf
Remove ill-advised code
hwikle-lanl Nov 7, 2024
1bc084a
Revert cmd_utils
hwikle-lanl Nov 7, 2024
9fda813
Progress
hwikle-lanl Nov 8, 2024
bf88e0e
Fix failing unit tests
hwikle-lanl Nov 8, 2024
2886261
Implement series cancellation for pav cancel sid
hwikle-lanl Nov 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions lib/pavilion/cancel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,29 @@

import io
from collections import defaultdict
from typing import List, TextIO
from operator import attrgetter
from itertools import filterfalse
from typing import List, TextIO, Iterable, Union, Iterator
import time

from pavilion import schedulers
from pavilion import utils
from pavilion.test_run import TestRun, load_tests
from pavilion import output
from pavilion.config import PavConfig
from pavilion.micro import do


def cancel_jobs(pav_cfg, tests: List[TestRun], errfile: TextIO = None) -> List[dict]:
def not_completed(tests: Iterator[Union[TestRun, "TestSeries"]]) -> List[TestRun]:
"""Return a list of only those tests in the input sequence
that have not completed running."""

return list(filterfalse(attrgetter("complete"), tests))

def cancel_jobs(
pav_cfg: PavConfig,
tests: Iterable[TestRun],
errfile: TextIO = None) -> List[dict]:
"""Collect all jobs from the given tests, and cancel them if all the tests
attached to those jobs have been cancelled.

Expand Down Expand Up @@ -65,22 +78,23 @@ def cancel_jobs(pav_cfg, tests: List[TestRun], errfile: TextIO = None) -> List[d
SERIES_WARN_EXPIRE = 60*60*24 # 24 hours


def cancel_tests(pav_cfg, tests: List, outfile: TextIO,
max_wait: float = 3.0, no_series_warning=False):
def cancel_tests(pav_cfg: PavConfig, tests: Iterable[TestRun], outfile: TextIO,
max_wait: float = 3.0, no_series_warning: bool = False) -> int:
"""Cancel all of the given tests, printing useful user messages and error information."""

user = utils.get_login()

tests = [test for test in tests if not test.complete]
tests = not_completed(tests)

# Cancel each test. Note that this does not cancel test jobs or builds.
cancelled_test_info = []

for test in tests:
# Don't try to cancel complete tests
test.cancel("Cancelled via cmdline by user '{}'".format(user))
cancelled_test_info.append(test)

if cancelled_test_info:
if len(cancelled_test_info) > 0:
test_count = len(tests)
output.draw_table(
title="Cancelling {} test{}".format(test_count, 's' if test_count > 1 else ''),
Expand All @@ -91,17 +105,17 @@ def cancel_tests(pav_cfg, tests: List, outfile: TextIO,
for test in cancelled_test_info])
else:
output.fprint(outfile, "No tests needed to be cancelled.")

return 0

timeout = time.time() + max_wait
wait_tests = list(tests)
wait_msg = True
while wait_tests and time.time() > timeout:
for test in wait_tests.copy():
if test.complete:
wait_tests.remove(test)

if wait_tests:
while len(wait_tests) > 0 and time.time() > timeout:
wait_tests = not_completed(wait_tests)

if len(wait_tests) > 0:
if wait_msg:
output.fprint(outfile, "Giving tests a moment to quit.", end='')
wait_msg = False
Expand All @@ -112,11 +126,12 @@ def cancel_tests(pav_cfg, tests: List, outfile: TextIO,

if not wait_msg:
output.fprint(outfile, 'Done')

output.fprint(outfile, '\n')

job_cancel_info = cancel_jobs(pav_cfg, tests, outfile)

if job_cancel_info:
if len(job_cancel_info) > 0:
jobs = len(job_cancel_info)
output.draw_table(
outfile=outfile,
Expand All @@ -141,3 +156,13 @@ def cancel_tests(pav_cfg, tests: List, outfile: TextIO,
break

return 0


def cancel_series(sers: Iterable["TestSeries"], errfile: TextIO = None) -> int:
"""Cancel all the series in the sequence."""

running_series = not_completed(sers)

do(lambda x: x.cancel(), running_series)

return 0
80 changes: 59 additions & 21 deletions lib/pavilion/cmd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import sys
import time
from pathlib import Path
from typing import List, TextIO, Union
from typing import List, TextIO, Union, Iterator
from collections import defaultdict

from pavilion import config
Expand All @@ -23,10 +23,62 @@
PavilionError, TestGroupError
from pavilion.test_run import TestRun, load_tests, TestAttributes
from pavilion.types import ID_Pair
from pavilion.micro import flatten

LOGGER = logging.getLogger(__name__)


def expand_range(test_range: str) -> List[str]:
"""Expand a given test or series range into a list of the individual
tests or series in that range"""

tests = []

if test_range == "all":
return ["all"]

elif '-' in test_range:
id_start, id_end = test_range.split('-', 1)

if id_start.startswith('s'):
series_range_start = int(id_start.replace('s',''))

if id_end.startswith('s'):
series_range_end = int(id_end.replace('s',''))
else:
series_range_end = int(id_end)

series_ids = range(series_range_start, series_range_end+1)

for sid in series_ids:
tests.append('s' + str(sid))
else:
test_range_start = int(id_start)
test_range_end = int(id_end)
test_ids = range(test_range_start, test_range_end+1)

for tid in test_ids:
tests.append(str(tid))
else:
tests.append(test_range)

return tests


def expand_ranges(ranges: Iterator[str]) -> Iterator[str]:
"""Given a sequence of test and series ranges, expand them
into a sequence of individual tests and series."""

return flatten(map(expand_range, ranges))


#pylint: disable=C0103
def is_series_id(id: str) -> bool:
"""Determine whether the given ID is a series ID."""

return len(id) > 0 and id[0].lower() == 's'


def load_last_series(pav_cfg, errfile: TextIO) -> Union[series.TestSeries, None]:
"""Load the series object for the last series run by this user on this system."""

Expand All @@ -51,7 +103,7 @@ def set_arg_defaults(args):
args.filter = getattr(args, 'filter', def_filter)


def arg_filtered_tests(pav_cfg, args: argparse.Namespace,
def arg_filtered_tests(pav_cfg: "PavConfig", args: argparse.Namespace,
verbose: TextIO = None) -> dir_db.SelectItems:
"""Search for test runs that match based on the argument values in args,
and return a list of matching test id's.
Expand All @@ -64,6 +116,8 @@ def arg_filtered_tests(pav_cfg, args: argparse.Namespace,
2. All of the used bits are *ALWAYS* used, so any errors will pop up
immediately in unit tests.

TODO: Rewrite the interface so that it's cleaner and not coupled to argparse. - HW

:param pav_cfg: The Pavilion config.
:param args: An argument namespace with args defined by
`filters.add_test_filter_args`, plus one additional `tests` argument
Expand All @@ -80,26 +134,10 @@ def arg_filtered_tests(pav_cfg, args: argparse.Namespace,
sort_by = getattr(args, 'sort_by', 'created')

ids = []

for test_range in args.tests:
if '-' in test_range:
id_start, id_end = test_range.split('-', 1)
if id_start.startswith('s'):
series_range_start = int(id_start.replace('s',''))
if id_end.startswith('s'):
series_range_end = int(id_end.replace('s',''))
else:
series_range_end = int(id_end)
series_ids = range(series_range_start, series_range_end+1)
for sid in series_ids:
ids.append('s' + str(sid))
else:
test_range_start = int(id_start)
test_range_end = int(id_end)
test_ids = range(test_range_start, test_range_end+1)
for tid in test_ids:
ids.append(str(tid))
else:
ids.append(test_range)
ids.extend(expand_range(test_range))

args.tests = ids

if 'all' in args.tests:
Expand Down
27 changes: 21 additions & 6 deletions lib/pavilion/commands/cancel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import errno
import time
from argparse import Namespace

from pavilion import cancel_utils
from pavilion import cmd_utils
Expand All @@ -10,6 +11,8 @@
from pavilion import series
from pavilion.errors import TestSeriesError
from pavilion.test_run import TestRun
from pavilion.config import PavConfig
from pavilion.micro import partition
from .base_classes import Command
from ..errors import TestRunError

Expand Down Expand Up @@ -39,19 +42,31 @@ def _setup_arguments(self, parser):
'in the most recent series submitted by the user is cancelled.')
filters.add_test_filter_args(parser, sort_keys=[], disable_opts=['sys-name'])

def run(self, pav_cfg, args):
"""Cancel the given tests."""
def run(self, pav_cfg: PavConfig, args: Namespace) -> int:
"""Cancel the given tests or series."""

if not args.tests:
if len(args.tests) == 0:
# Get the last series ran by this user.
series_id = series.load_user_series_id(pav_cfg)

if series_id is not None:
args.tests.append(series_id)

cancelled_series = False
# Separate out into tests and series
series_ids, test_ids = partition(cmd_utils.is_series_id, args.tests)

test_paths = cmd_utils.arg_filtered_tests(pav_cfg, args, verbose=self.errfile).paths
args.tests = test_ids
args.series = series_ids

# Get TestRun and TestSeries objects
test_paths = cmd_utils.arg_filtered_tests(pav_cfg, args, verbose=self.errfile).paths
tests = cmd_utils.get_tests_by_paths(pav_cfg, test_paths, errfile=self.errfile)

return cancel_utils.cancel_tests(pav_cfg, tests, self.outfile)
sinfos = cmd_utils.arg_filtered_series(pav_cfg, args, verbose=self.errfile)
test_series = map(lambda x: series.TestSeries.load(pav_cfg, x.sid), sinfos)

# Cancel TestRuns and TestSeries
test_ret = cancel_utils.cancel_tests(pav_cfg, tests, self.outfile)
sers_ret = cancel_utils.cancel_series(test_series, self.outfile)

return test_ret or sers_ret
4 changes: 3 additions & 1 deletion lib/pavilion/commands/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import errno
import sys
from argparse import Namespace
from typing import List

from pavilion import arguments
Expand All @@ -17,6 +18,7 @@
from pavilion import sys_vars
from pavilion import utils
from pavilion.errors import TestSeriesError, TestSeriesWarning
from pavilion.config import PavConfig
from .base_classes import Command, sub_cmd


Expand Down Expand Up @@ -426,7 +428,7 @@ def _state_history_cmd(self, pav_cfg: config.PavConfig, args):
)

@sub_cmd()
def _cancel_cmd(self, pav_cfg, args):
def _cancel_cmd(self, pav_cfg: PavConfig, args: Namespace) -> int:
"""Cancel all series found given the arguments."""

series_info = cmd_utils.arg_filtered_series(pav_cfg, args, verbose=self.errfile)
Expand Down
28 changes: 28 additions & 0 deletions lib/pavilion/limiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import time
import math
from typing import Callable, Tuple, Any


class TimeLimiter:
"""Wraps a call to a function and only calls it if the specified
cooldown (in seconds) has elapsed since the last call."""

def __init__(self, func: Callable[[], Any], cooldown: float):
self.function = func
self.cooldown = cooldown
self.last_called = -math.inf

def __call__(self) -> Tuple[bool, Any]:
"""Calls the function if enough time has passed, and returns
a tuple containing a boolean indicating whether the function was actually called
and the return value of the function (or None if it was not called)."""

current_time = time.time()

if current_time - self.last_called > self.cooldown:
res = self.function()
self.last_called = current_time

return (True, res)

return (False, None)
Loading
Loading