Skip to content

Commit

Permalink
[jump_ci] testing: add multi-test support
Browse files Browse the repository at this point in the history
  • Loading branch information
kpouget committed Jan 21, 2025
1 parent f0ff905 commit 18a83a8
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 37 deletions.
3 changes: 3 additions & 0 deletions projects/jump_ci/testing/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ exec_list:
prepare_ci: null
test_ci: null
post_cleanup_ci: null
multi_run:
args: [ray, fms, ilab]
stop_on_error: false
92 changes: 55 additions & 37 deletions projects/jump_ci/testing/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def rewrite_variables_overrides(variable_overrides_dict):
logging.info(f"New args to execute on the jump host: {new_args_str}")
for idx, value in enumerate(new_args):
new_variable_overrides[f"PR_POSITIONAL_ARG_{idx+1}"] = value
next_pr_positional_arg_count = idx + 2

for k, v in variable_overrides_dict.items():
if k.startswith("PR_POSITIONAL_ARG"): continue
Expand All @@ -44,7 +45,7 @@ def rewrite_variables_overrides(variable_overrides_dict):
new_variable_overrides[k] = v
logging.info(f"Passing '{k}: {v}' to the new variables overrides")

return new_variable_overrides
return new_variable_overrides, next_pr_positional_arg_count


def jump_ci(command):
Expand Down Expand Up @@ -72,8 +73,6 @@ def do_jump_ci(cluster=None, project=None, test_args=None):
TOPSAIL_JUMP_CI="true",
TOPSAIL_JUMP_CI_INSIDE_JUMP_HOST="true",
)
if step_dir := os.environ.get("TOPSAIL_OPENSHIFT_CI_STEP_DIR"):
extra_env["TOPSAIL_OPENSHIFT_CI_STEP_DIR"] = f"{step_dir}/test-artifacts" # see "jump_ci retrieve_artifacts" below

env_pass_lists = config.project.get_config("env.pass_lists", print=False)

Expand Down Expand Up @@ -112,51 +111,70 @@ def do_jump_ci(cluster=None, project=None, test_args=None):
variables_overrides_dict[f"PR_POSITIONAL_ARG_{idx+1}"] = arg

config.project.set_config("overrides", variables_overrides_dict)

next_pr_positional_arg_count = idx + 2
else:
if not os.environ.get("OPENSHIFT_CI") == "true":
logging.fatal("Not running in OpenShift CI. Don't know how to rewrite the variable_overrides_file. Aborting.")
raise SystemExit(1)

project = config.project.get_config("overrides.PR_POSITIONAL_ARG_2")

variables_overrides_dict = rewrite_variables_overrides(
variables_overrides_dict, next_pr_positional_arg_count = rewrite_variables_overrides(
config.project.get_config("overrides")
)

run.run_toolbox(
"jump_ci", "prepare_step",
cluster=cluster,
lock_owner=utils.get_lock_owner(),
project=project,
step=command,
env_file=env_fd_path,
variables_overrides_dict=variables_overrides_dict,
secrets_path_env_key=secrets_path_env_key,
)
for idx, multi_run_args in enumerate((config.project.get_config("multi_run.args") or [...])):
test_artifacts_dirname = "test-artifacts"
multi_run_args_dict = {}
if multi_run_args is not ...:
test_artifacts_dirname = f"test-artifacts-{idx:03d}"
with open(env.ARTIFACTS_DIR / "multi_run_args.list", "a+") as f:
print(f"{test_artifacts_dirname}: {multi_run_args}")
for idx, multi_run_arg in enumerate((multi_run_args if isinstance(multi_run_args, list) else [multi_run_args])):
variables_overrides_dict[f"PR_POSITIONAL_ARG_{next_pr_positional_arg_count+idx}"] = multi_run_arg
else:
test_artifacts_dirname = "test-artifacts"

if step_dir := os.environ.get("TOPSAIL_OPENSHIFT_CI_STEP_DIR"):
# see "jump_ci retrieve_artifacts" below
extra_env["TOPSAIL_OPENSHIFT_CI_STEP_DIR"] = f"{step_dir}/{test_artifacts_dirname}"

run.run_toolbox(
"jump_ci", "prepare_step",
cluster=cluster,
lock_owner=utils.get_lock_owner(),
project=project,
step=command,
env_file=env_fd_path,
variables_overrides_dict=(variables_overrides_dict | multi_run_args_dict),
secrets_path_env_key=secrets_path_env_key,
)

try:
tunnelling.run_with_ansible_ssh_conf(f"bash {cluster_lock_dir}/test/{command}/entrypoint.sh")
logging.info(f"Test step '{command}' on cluster '{cluster}' succeeded.")
failed = False
except subprocess.CalledProcessError as e:
logging.fatal(f"Test step '{command}' on cluster '{cluster}' FAILED.")
failed = True
except run.SignalError as e:
logging.error(f"Caught signal {e.sig}. Aborting.")
raise
finally:
# always run the cleanup to be sure that the container doesn't stay running
tunnelling.run_with_ansible_ssh_conf(f"bash {cluster_lock_dir}/test/{command}/entrypoint.sh cleanup")

run.run_toolbox(
"jump_ci", "retrieve_artifacts",
cluster=cluster,
lock_owner=utils.get_lock_owner(),
remote_dir=f"test/{command}/artifacts",
local_dir=f"../test-artifacts", # copy to the main artifact directory
mute_stdout=True,
)
try:
tunnelling.run_with_ansible_ssh_conf(f"bash {cluster_lock_dir}/test/{command}/entrypoint.sh")
logging.info(f"Test step '{command}' on cluster '{cluster}' succeeded.")
failed = False
except subprocess.CalledProcessError as e:
logging.fatal(f"Test step '{command}' on cluster '{cluster}' FAILED.")
failed = True
except run.SignalError as e:
logging.error(f"Caught signal {e.sig}. Aborting.")
raise
finally:
# always run the cleanup to be sure that the container doesn't stay running
tunnelling.run_with_ansible_ssh_conf(f"bash {cluster_lock_dir}/test/{command}/entrypoint.sh cleanup")

run.run_toolbox(
"jump_ci", "retrieve_artifacts",
cluster=cluster,
lock_owner=utils.get_lock_owner(),
remote_dir=f"test/{command}/artifacts",
local_dir=f"../{test_artifacts_dirname}", # copy to the main artifact directory
mute_stdout=True,
)

if failed and config.project.get_config("multi_run.stop_on_error"):
break

jump_ci_artifacts = env.ARTIFACT_DIR / "jump-ci-artifacts"
jump_ci_artifacts.mkdir(parents=True, exist_ok=True)
Expand Down

0 comments on commit 18a83a8

Please sign in to comment.