triton-inference-server · mc-nv · Jul 28, 2023 · Jul 11, 2023 · Jul 19, 2023 · Jul 28, 2023
diff --git a/Dockerfile.sdk b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.06-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.07-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main

diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min
@@ -153,7 +153,7 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
 #
 # Installing CUDNN
 #
-ARG CUDNN_VERSION=8.9.1.23
+ARG CUDNN_VERSION=8.9.3.28
 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
 ARG CUDNN_SOURCE=${CUDNN_ZIP}
 

diff --git a/README.md b/README.md
@@ -32,8 +32,8 @@
 
 **LATEST RELEASE: You are currently on the main branch which tracks
 under-development progress towards the next release. The current release is
-version [2.35.0](https://github.com/triton-inference-server/server/tree/r23.06)
-and corresponds to the 23.06 container release on
+version [2.36.0](https://github.com/triton-inference-server/server/tree/r23.07)
+and corresponds to the 23.07 container release on
 [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).**
 
 ----
@@ -88,16 +88,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository
-git clone -b r23.06 https://github.com/triton-inference-server/server.git
+git clone -b r23.07 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.07-py3 tritonserver --model-repository=/models
 
 # Step 3: Sending an Inference Request
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.06-py3-sdk
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.07-py3-sdk
 /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following

diff --git a/build.py b/build.py
@@ -69,12 +69,12 @@
 TRITON_VERSION_MAP = {
     "2.37.0dev": (
         "23.08dev",  # triton container
-        "23.06",  # upstream container
-        "1.15.0",  # ORT
+        "23.07",  # upstream container
+        "1.15.1",  # ORT
         "2023.0.0",  # ORT OpenVINO
         "2023.0.0",  # Standalone OpenVINO
         "2.4.7",  # DCGM version
-        "py310_23.1.0-1",  # Conda version
+        "py310_23.1.0-1",  # Conda version.
     )
 }
 

diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:23.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.07-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1

diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.35.0"
+appVersion: "2.36.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart

diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:23.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.07-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -46,13 +46,13 @@ image:
     # Model Control Mode (Optional, default: none)
     #
     # To set model control mode, uncomment and configure below
-    # See https://github.com/triton-inference-server/server/blob/r23.06/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r23.07/docs/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r23.06/README.md
+    # see https://github.com/triton-inference-server/server/blob/r23.07/README.md
     #  for more details
 
 service:

diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:23.06-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.07-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1

diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:23.06-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:23.07-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:

diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -28,8 +28,8 @@
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
 export MAJOR_VERSION=2.33
-export MINOR_VERSION=2.35.0
-export NGC_VERSION=23.06-py3
+export MINOR_VERSION=2.36.0
+export NGC_VERSION=23.07-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 

diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -28,4 +28,4 @@ apiVersion: v1
 appVersion: "2.33"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.35.0
+version: 2.36.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -32,13 +32,13 @@ tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
 modelRepositoryPath: gs://triton_sample_models/23_04
-publishedVersion: '2.35.0'
+publishedVersion: '2.36.0'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 23.06-py3
+  tag: 23.07-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1

diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.35.0'
+  publishedVersion: '2.36.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.

diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.35.0'
+  publishedVersion: '2.36.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.