Skip to content

Commit

Permalink
Disable log printing GPU and set GCS output using --env (#228)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonb377 authored Mar 29, 2024
1 parent 599a116 commit 12aa063
Showing 1 changed file with 11 additions and 10 deletions.
21 changes: 11 additions & 10 deletions xlml/utils/xpk.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ def run_workload(
):
"""Run workload through xpk tool."""

run_cmds = f"export {metric_config.SshEnvVars.GCS_OUTPUT.name}={gcs_path}; {run_cmds}"

with tempfile.TemporaryDirectory() as tmpdir:
cmds = (
"set -xue",
Expand All @@ -71,6 +69,7 @@ def run_workload(
f" --command='{run_cmds}' --device-type={accelerator_type}"
f" --num-slices={num_slices} --docker-image={docker_image}"
f" --project={cluster_project} --zone={zone}"
f" --env {metric_config.SshEnvVars.GCS_OUTPUT.name}={gcs_path}"
),
)
hook = SubprocessHook()
Expand Down Expand Up @@ -142,14 +141,16 @@ def wait_for_workload_completion(
elif pod.status.phase in ["Unknown"]:
raise RuntimeError(f"Bad pod phase: {pod.status.phase}")
finally:
# Print the logs of the last pod checked - either the first failed pod or
# the last successful one.
logs = core_api.read_namespaced_pod_log(
name=pod.metadata.name, namespace=pod.metadata.namespace
)
logging.info(f"Logs for pod {pod.metadata.name}:")
for line in logs.split("\n"):
logging.info(line)
# TODO(jonbolin): log printing for GPUs, which have multiple containers
if len(pod.spec.containers) == 1:
# Print the logs of the last pod checked - either the first failed pod or
# the last successful one.
logs = core_api.read_namespaced_pod_log(
name=pod.metadata.name, namespace=pod.metadata.namespace
)
logging.info(f"Logs for pod {pod.metadata.name}:")
for line in logs.split("\n"):
logging.info(line)
url = WORKLOAD_URL_FORMAT.format(
region=region,
cluster=cluster_name,
Expand Down

0 comments on commit 12aa063

Please sign in to comment.