Skip to content

Commit

Permalink
mlperf
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Jun 8, 2023
1 parent b5539ea commit 42eab04
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 9 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,19 @@ jobs:
MILABENCH_DASH: "no"

steps:
- uses: actions/checkout@v3

- uses: conda-incubator/setup-miniconda@v2
with:
auto-activate-base: false
python-version: 3.9
miniconda-version: "latest"
activate-environment: test

- name: clean
run: |
python -c "import shutil; shutil.rmtree('.')"
- uses: actions/checkout@v3

- name: Pytorch Sanity
run: |
if [[ "${{ matrix.arch }}" == "rocm" ]]; then
Expand Down
7 changes: 7 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[submodule "benchmarks/mlperf/apex"]
path = benchmarks/mlperf/apex
url = https://github.com/NVIDIA/apex.git

[submodule "benchmarks/mlperf/training_results_v2.1"]
path = benchmarks/mlperf/training_results_v2.1
url = https://github.com/mlcommons/training_results_v2.1.git
12 changes: 6 additions & 6 deletions benchmarks/huggingface/bench/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .synth import SyntheticData, generators



def is_tf32_allowed(args):
return "tf32" in args.precision

Expand Down Expand Up @@ -56,11 +57,12 @@ def __init__(self, args):

example = next(iter(self.loader))
example = {k: x.to(self.device) for k, x in example.items()}

# print({k: x.shape for k, x in example.items()})


model = ModelWrapper(info.model).to(self.device)
model = torch.jit.trace(model, example)

jit = False
if jit:
model = torch.jit.trace(model, example)

self.model = model
self.optimizer = optim.Adam(self.model.parameters(), lr=args.lr)
Expand All @@ -87,8 +89,6 @@ def train(self):
"train", self.loader, report_batch=True, batch_size=self.batch_size
):
data = {k: v.to(self.device) for k, v in data.items()}

template = {k: (v.shape, v.dtype) for k, v in data.items()}
self.step(data)


Expand Down
1 change: 1 addition & 0 deletions benchmarks/mlperf/apex
Submodule apex added at 05091d
10 changes: 10 additions & 0 deletions benchmarks/mlperf/benchfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from milabench.pack import Package


class MLPerfBenchmark(Package):
base_requirements = "requirements.in"
main_script = "main.py"


__pack__ = MLPerfBenchmark

13 changes: 13 additions & 0 deletions benchmarks/mlperf/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

import sys
import os


FOLDER = os.path.dirname(__file__)
BENCH = "training_results_v2.1/NVIDIA/benchmarks/bert/implementations/pytorch-preview"

print(sys.path)
sys.path.append(os.path.join(FOLDER, BENCH))
print(sys.path)

import run_squad
4 changes: 4 additions & 0 deletions benchmarks/mlperf/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
git+https://github.com/NVIDIA/mlperf-common.git
git+https://github.com/NVIDIA/apex.git
git+https://github.com/mlcommons/logging.git
boto3
1 change: 1 addition & 0 deletions benchmarks/mlperf/training_results_v2.1
Submodule training_results_v2.1 added at 158189
12 changes: 11 additions & 1 deletion config/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ _defaults:
gpu_load_threshold: 0.5
gpu_mem_threshold: 0.5


mlperf:
inherits: _defaults
definition: ../benchmarks/mlperf
group: mlperf
install_group: torch
plan:
method: per_gpu


_torchvision:
inherits: _defaults
definition: ../benchmarks/torchvision
Expand Down Expand Up @@ -174,7 +184,7 @@ _bert-base:
- precision-showcase
argv:
--model: "Bert"
--batch-size: 32
--batch-size: 48
voir:
options:
stop: 30
Expand Down

0 comments on commit 42eab04

Please sign in to comment.