Skip to content

Commit

Permalink
Adding tron run id to pod labels for k8s
Browse files Browse the repository at this point in the history
  • Loading branch information
wilmer05 committed Feb 12, 2024
1 parent c0d3a0a commit 2c88f6c
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 2 deletions.
23 changes: 23 additions & 0 deletions tests/bin/action_runner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,26 @@ def test_build_environment_too_long_run_id(self):
TRON_RUN_NUM="10",
TRON_ACTION="bar.baz",
)


class TestBuildLabels:
def test_build_labels(self):
labels = action_runner.build_labels("MASTER.foo.10.bar")

assert labels == dict(
TRON_RUN_NUM="10",
)

def test_build_labels_invalid_run_id(self):
labels = action_runner.build_labels("asdf")

assert labels == dict(
TRON_RUN_NUM="UNKNOWN",
)

def test_build_labels_too_long_run_id(self):
labels = action_runner.build_labels("MASTER.foo.10.bar.baz")

assert labels == dict(
TRON_RUN_NUM="10",
)
3 changes: 3 additions & 0 deletions tests/core/actionrun_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1802,6 +1802,9 @@ def mock_k8s_action_run(self):
"TRON_RUN_NUM": "42",
"TRON_ACTION": "mock_action_name",
},
labels={
"TRON_RUN_NUM": "42",
},
)

return KubernetesActionRun(
Expand Down
18 changes: 18 additions & 0 deletions tron/bin/action_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ def build_environment(run_id, original_env=None):
return new_env


def build_labels(run_id, original_labels=None):
if original_labels is None:
original_labels = dict()

try:
run_num = run_id.split(".", maxsplit=3)[2]
except IndexError:
# if we can't parse the run_id, we don't want to abort, so just
# set these semi-arbitrarily
run_num = "UNKNOWN"

new_labels = dict(original_labels)
new_labels["TRON_RUN_NUM"] = run_num

logging.debug(new_labels)
return new_labels


def run_proc(output_path, command, run_id, proc):
logging.warning(f"{run_id} running as pid {proc.pid}")
status_file = StatusFile(os.path.join(output_path, STATUS_FILE))
Expand Down
5 changes: 3 additions & 2 deletions tron/core/actionrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from tron.actioncommand import NoActionRunnerFactory
from tron.actioncommand import SubprocessActionRunnerFactory
from tron.bin.action_runner import build_environment # type: ignore # mypy can't find library stub
from tron.bin.action_runner import build_labels
from tron.config.config_utils import StringFormatter
from tron.config.schema import ExecutorTypes
from tron.core import action
Expand Down Expand Up @@ -1170,7 +1171,7 @@ def submit_command(self, attempt: ActionRunAttempt) -> Optional[KubernetesTask]:
cap_drop=attempt.command_config.cap_drop,
node_selectors=attempt.command_config.node_selectors,
node_affinities=attempt.command_config.node_affinities,
pod_labels=attempt.command_config.labels,
pod_labels=build_labels(run_id=self.id, original_labels=attempt.command_config.labels),
pod_annotations=attempt.command_config.annotations,
service_account_name=attempt.command_config.service_account_name,
ports=attempt.command_config.ports,
Expand Down Expand Up @@ -1244,7 +1245,7 @@ def recover(self) -> Optional[KubernetesTask]:
task_id=last_attempt.kubernetes_task_id,
node_selectors=last_attempt.command_config.node_selectors,
node_affinities=last_attempt.command_config.node_affinities,
pod_labels=last_attempt.command_config.labels,
pod_labels=build_labels(run_id=self.id, original_labels=last_attempt.command_config.labels),
pod_annotations=last_attempt.command_config.annotations,
service_account_name=last_attempt.command_config.service_account_name,
ports=last_attempt.command_config.ports,
Expand Down

0 comments on commit 2c88f6c

Please sign in to comment.