Skip to content

Commit e35a174

Browse files
address self-review comment
1 parent 304cc98 commit e35a174

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
sys.path.append("../")
2424
from utils.utils import get_memory, get_cpu, standard_timestamp, is_mash_installed
2525

26-
_LOCAL_LOGS_LOCATION = "../../bin/dlio-logs"
26+
_LOCAL_LOGS_LOCATION = "../../bin/dlio-logs/logs"
2727

2828
record = {
2929
"pod_name": "",
@@ -56,7 +56,7 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str):
5656
"-r",
5757
"--no-user-output-enabled", # do not print names of files being copied
5858
f"gs://{dlioWorkload.bucket}/logs/{instanceId}",
59-
_LOCAL_LOGS_LOCATION + "/logs",
59+
_LOCAL_LOGS_LOCATION,
6060
],
6161
capture_output=False,
6262
text=True,
@@ -100,7 +100,7 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str):
100100
args = parser.parse_args()
101101

102102
try:
103-
os.makedirs(_LOCAL_LOGS_LOCATION + "/logs")
103+
os.makedirs(_LOCAL_LOGS_LOCATION)
104104
except FileExistsError:
105105
pass
106106

@@ -125,9 +125,7 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str):
125125
if not mash_installed:
126126
print("Mash is not installed, will skip parsing CPU and memory usage.")
127127

128-
for root, _, files in os.walk(
129-
_LOCAL_LOGS_LOCATION + "/logs/" + args.instance_id
130-
):
128+
for root, _, files in os.walk(_LOCAL_LOGS_LOCATION + "/" + args.instance_id):
131129
if files:
132130
print(f"Parsing directory {root} ...")
133131
per_epoch_stats_file = root + "/per_epoch_stats.json"

perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def run_command(command: str):
3434
print(result.stderr)
3535

3636

37-
def createHelmInstallCommands(dlioWorkloads: list, instanceId: str):
37+
def createHelmInstallCommands(dlioWorkloads: set, instanceId: str):
3838
"""Create helm install commands for the given set of dlioWorkload objects."""
3939
helm_commands = []
4040
for dlioWorkload in dlioWorkloads:

perfmetrics/scripts/testing_on_gke/examples/dlio/unet3d-loading-test/templates/dlio-tester.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ spec:
7373
sleep 300
7474
{{ end }}
7575
76+
outputDir=/logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }}
77+
7678
echo "Testing {{ .Values.scenario }}"
7779
mpirun -np 8 dlio_benchmark workload=unet3d_a100 \
7880
++workload.train.epochs=4 \
@@ -84,11 +86,11 @@ spec:
8486
++workload.reader.batch_size={{ .Values.dlio.batchSize }} \
8587
++workload.dataset.record_length={{ .Values.dlio.recordLength }} \
8688
++workload.reader.read_threads={{ .Values.dlio.readThreads }} \
87-
++workload.output.folder=/logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }}
89+
++workload.output.folder=${outputDir}
8890
8991
# dump the gcsfuse-mount-configuration to a file in output-directory.
9092
{{ if eq .Values.scenario "gcsfuse-generic"}}
91-
echo "{{ .Values.gcsfuse.mountOptions }}" > /logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }}/gcsfuse_mount_options
93+
echo "{{ .Values.gcsfuse.mountOptions }}" > ${outputDir}/gcsfuse_mount_options
9294
{{ end }}
9395
9496
gsutil -m cp -R /logs/{{ .Values.instanceId }} gs://{{ .Values.bucketName }}/logs/{{ .Values.instanceId }}/$(date +"%Y-%m-%d-%H-%M")

0 commit comments

Comments
 (0)