Merge pull request #99 from mlcommons/dev

Merge with dev
mlcommons · Jan 3, 2025 · bd5ed77 · bd5ed77
2 parents 298f35a + 62ed33d
commit bd5ed77
Show file tree

Hide file tree

Showing 65 changed files with 4,928 additions and 99 deletions.
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -3,7 +3,7 @@ name: "Code formatting"
 on:
   push:
     branches:
-    - "**"
+      - "**"
 
 env:
   python_version: "3.9"
@@ -12,16 +12,17 @@ jobs:
   format-code:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout code
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
-          ssh-key: ${{ secrets.DEPLOY_KEY }}
+
       - name: Set up Python ${{ env.python_version }}
         uses: actions/setup-python@v3
         with:
           python-version: ${{ env.python_version }}
 
-      - name: Format modified python files
+      - name: Format modified Python files
         env:
           filter: ${{ github.event.before }}
         run: |
@@ -48,14 +49,14 @@ jobs:
             fi
           done
 
-      - name: Commit and Push
+      - name: Commit and push changes
         run: |
           HAS_CHANGES=$(git diff --staged --name-only)
           if [ ${#HAS_CHANGES} -gt 0 ]; then
             git config --global user.name mlcommons-bot
             git config --global user.email "mlcommons-bot@users.noreply.github.com"
             # Commit changes
             git commit -m '[Automated Commit] Format Codebase'
-            git push
-      
-          fi 
+            # Use the PAT to push changes
+            git push 
+          fi
diff --git a/.github/workflows/run-individual-script-tests.yml b/.github/workflows/run-individual-script-tests.yml
@@ -3,10 +3,10 @@ name: Individual CM script Tests
 
 on:
   pull_request:
-    branches: [ "main", "mlperf-inference", "dev" ]
+    branches: [ "main", "dev" ]
     paths:
       - 'script/**_cm.json'
-      - 'script/**_cm.yml'
+      - 'script/**_cm.yaml'
 
 jobs:
   run-script-tests:
@@ -34,4 +34,4 @@ jobs:
         done
         python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm"
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-        DOCKER_CM_REPO=${{ github.event.pull_request.head.repo.html_url }} DOCKER_CM_REPO_BRANCH=${{ github.event.pull_request.head.ref }} TEST_INPUT_INDEX=${{ matrix.test-input-index }} python3 tests/script/process_tests.py ${{ steps.getfile.outputs.files }}
+        DOCKER_CM_REPO=${{ github.event.pull_request.head.repo.html_url }} DOCKER_CM_REPO_BRANCH=${{ github.event.pull_request.head.ref }} TEST_INPUT_INDEX=${{ matrix.test-input-index }} python3 script/test-cm-core/src/script/process_tests.py ${{ steps.getfile.outputs.files }}
diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml
@@ -1,10 +1,10 @@
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: MLPerf inference ABTF POC Test
+name: MLPerf Automotive POC Test
 
 on:
   pull_request:
-    branches: [ "main", "mlperf-inference" ]
+    branches: [ "main", "dev" ]
     paths:
       - '.github/workflows/test-mlperf-inference-abtf-poc.yml'
       - '**'
@@ -55,7 +55,7 @@ jobs:
       run: |
         pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm"
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-        cm pull repo mlcommons@cm4abtf --branch=poc
+        #cm pull repo mlcommons@cm4abtf --branch=poc
   
     - name: Install Docker on macos
       if: runner.os == 'macOS-deactivated'

diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml
@@ -31,5 +31,5 @@ jobs:
         git config --global credential.helper store
         huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
         cm pull repo
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1
+        cm run script --tags=run-mlperf,inference,_submission,_short --adr.inference-src.tags=_branch.dev --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1
         cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions
diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml
@@ -64,4 +64,4 @@ jobs:
         git config --global credential.https://github.com.helper "!gh auth git-credential"
         git config --global credential.https://gist.github.com.helper ""
         git config --global credential.https://gist.github.com.helper "!gh auth git-credential"
-        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet
diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml
@@ -64,4 +64,4 @@ jobs:
         git config --global credential.https://github.com.helper "!gh auth git-credential"
         git config --global credential.https://gist.github.com.helper ""
         git config --global credential.https://gist.github.com.helper "!gh auth git-credential"
-        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet
diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml
@@ -31,7 +31,7 @@ jobs:
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
     - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }}
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --pull_changes=yes --pull_inference_changes=yes  --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet  -v --target_qps=1
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes  --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet  -v --target_qps=1
     - name: Push Results
       if: github.repository_owner == 'gateoverflow'
       env:
@@ -45,4 +45,4 @@ jobs:
         git config --global credential.https://github.com.helper "!gh auth git-credential"
         git config --global credential.https://gist.github.com.helper ""
         git config --global credential.https://gist.github.com.helper "!gh auth git-credential"
-        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet
diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -22,4 +22,4 @@ jobs:
         python3 -m pip install cm4mlops
         cm pull repo
         cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }}  --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions  --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
-        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "08 01 * * */3" #to be adjusted
+    - cron: "58 23 * * *" #to be adjusted
 
 jobs:
   run_nvidia:
@@ -17,20 +17,31 @@ jobs:
       strategy:
         fail-fast: false
         matrix:
-          system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] 
+          # system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9", "mlc-server" ]
+          system: [ "mlc-server" ] 
           python-version: [ "3.12" ]
           model: [ "resnet50",  "retinanet",  "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ]
           exclude:
            - model: gptj-99.9
 
       steps:
       - name: Test MLPerf Inference NVIDIA ${{ matrix.model }}
+        env:
+          gpu_name: rtx_4090
         run: |
           # Set hw_name based on matrix.system
           if [ "${{ matrix.system }}" = "GO-spr" ]; then
             hw_name="RTX4090x2"
+            gpu_name=rtx_4090
+            docker_string=" --docker"
+          elif [ "${{ matrix.system }}" = "mlc-server" ]; then
+            hw_name="H100x8"
+            gpu_name=h100
+            docker_string=" "
           else
             hw_name="RTX4090x1"
+            gpu_name=rtx_4090
+            docker_string=" --docker"
           fi
 
           if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
@@ -40,6 +51,6 @@ jobs:
           pip install --upgrade cm4mlops
           cm pull repo
           
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string --quiet
 
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py
@@ -1902,6 +1902,9 @@ def docker(i):
 
     noregenerate_docker_file = i.get('docker_noregenerate', False)
     norecreate_docker_image = i.get('docker_norecreate', True)
+    recreate_docker_image = i.get('docker_recreate', False)
+    if recreate_docker_image:  # force recreate
+        norecreate_docker_image = False
 
     if i.get('docker_skip_build', False):
         noregenerate_docker_file = True
@@ -1974,8 +1977,6 @@ def docker(i):
             env['CM_DOCKER_CACHE'] = docker_cache
 
     image_repo = i.get('docker_image_repo', '')
-    if image_repo == '':
-        image_repo = 'local'
 
     # Host system needs to have docker
     r = self_module.cmind.access({'action': 'run',
@@ -2169,7 +2170,7 @@ def docker(i):
 
         # env keys corresponding to container mounts are explicitly passed to
         # the container run cmd
-        container_env_string = ''
+        container_env = {}
         for index in range(len(mounts)):
             mount = mounts[index]
             # Since windows may have 2 :, we search from the right
@@ -2211,7 +2212,6 @@ def docker(i):
                         new_container_mount, new_container_mount_env = get_container_path(
                             env[tmp_value])
                         container_env_key = new_container_mount_env
-                        # container_env_string += " --env.{}={} ".format(tmp_value, new_container_mount_env)
                     else:  # we skip those mounts
                         mounts[index] = None
                         skip = True
@@ -2223,8 +2223,7 @@ def docker(i):
                 continue
             mounts[index] = new_host_mount + ":" + new_container_mount
             if host_env_key:
-                container_env_string += " --env.{}={} ".format(
-                    host_env_key, container_env_key)
+                container_env[host_env_key] = container_env_key
 
                 for v in docker_input_mapping:
                     if docker_input_mapping[v] == host_env_key:
@@ -2255,10 +2254,16 @@ def docker(i):
         for key in proxy_keys:
             if os.environ.get(key, '') != '':
                 value = os.environ[key]
-                container_env_string += " --env.{}={} ".format(key, value)
+                container_env[key] = value
                 env['+ CM_DOCKER_BUILD_ARGS'].append(
                     "{}={}".format(key, value))
 
+        if container_env:
+            if not i_run_cmd.get('env'):
+                i_run_cmd['env'] = container_env
+            else:
+                i_run_cmd['env'] = {**i_run_cmd['env'], **container_env}
+
         docker_use_host_group_id = i.get(
             'docker_use_host_group_id',
             docker_settings.get('use_host_group_id'))
@@ -2400,8 +2405,7 @@ def docker(i):
                                    'docker_run_cmd_prefix': i.get('docker_run_cmd_prefix', '')})
         if r['return'] > 0:
             return r
-        run_cmd = r['run_cmd_string'] + ' ' + \
-            container_env_string + ' --docker_run_deps '
+        run_cmd = r['run_cmd_string'] + ' ' + ' --docker_run_deps '
 
         env['CM_RUN_STATE_DOCKER'] = True
 
@@ -2432,10 +2436,8 @@ def docker(i):
                            'docker_os_version': docker_os_version,
                            'cm_repo': cm_repo,
                            'env': env,
-                           'image_repo': image_repo,
                            'interactive': interactive,
                            'mounts': mounts,
-                           'image_name': image_name,
                            #                            'image_tag': script_alias,
                            'image_tag_extra': image_tag_extra,
                            'detached': detached,
@@ -2452,6 +2454,12 @@ def docker(i):
                            }
                            }
 
+        if image_repo:
+            cm_docker_input['image_repo'] = image_repo
+
+        if image_name:
+            cm_docker_input['image_name'] = image_name
+
         if all_gpus:
             cm_docker_input['all_gpus'] = True
 

diff --git a/script/app-mlperf-automotive-mlcommons-python/README-extra.md b/script/app-mlperf-automotive-mlcommons-python/README-extra.md
@@ -0,0 +1 @@
+# CM script