diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 5d7f409e8f..d5891accb7 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,7 +29,7 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.06-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.07-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_COMMON_REPO_TAG=main diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min index ee9393de80..3340f2d840 100644 --- a/Dockerfile.win10.min +++ b/Dockerfile.win10.min @@ -153,7 +153,7 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}" # # Installing CUDNN # -ARG CUDNN_VERSION=8.9.1.23 +ARG CUDNN_VERSION=8.9.3.28 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip ARG CUDNN_SOURCE=${CUDNN_ZIP} diff --git a/README.md b/README.md index 526f39695f..03bb690384 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ **LATEST RELEASE: You are currently on the main branch which tracks under-development progress towards the next release. The current release is -version [2.35.0](https://github.com/triton-inference-server/server/tree/r23.06) -and corresponds to the 23.06 container release on +version [2.36.0](https://github.com/triton-inference-server/server/tree/r23.07) +and corresponds to the 23.07 container release on [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).** ---- @@ -88,16 +88,16 @@ Inference Server with the ```bash # Step 1: Create the example model repository -git clone -b r23.06 https://github.com/triton-inference-server/server.git +git clone -b r23.07 https://github.com/triton-inference-server/server.git cd server/docs/examples ./fetch_models.sh # Step 2: Launch triton from the NGC Triton container -docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models +docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.07-py3 tritonserver --model-repository=/models # Step 3: Sending an Inference Request # In a separate console, launch the image_client example from the NGC Triton SDK container -docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.06-py3-sdk +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.07-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg # Inference should return the following diff --git a/build.py b/build.py index 1bf9d8ac10..f9c97060d7 100755 --- a/build.py +++ b/build.py @@ -69,12 +69,12 @@ TRITON_VERSION_MAP = { "2.37.0dev": ( "23.08dev", # triton container - "23.06", # upstream container - "1.15.0", # ORT + "23.07", # upstream container + "1.15.1", # ORT "2023.0.0", # ORT OpenVINO "2023.0.0", # Standalone OpenVINO "2.4.7", # DCGM version - "py310_23.1.0-1", # Conda version + "py310_23.1.0-1", # Conda version. ) } diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml index 7fd1ca4e48..85bb00f08d 100644 --- a/deploy/aws/values.yaml +++ b/deploy/aws/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.06-py3 + imageName: nvcr.io/nvidia/tritonserver:23.07-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml index ac9245f618..aaf34629e7 100644 --- a/deploy/fleetcommand/Chart.yaml +++ b/deploy/fleetcommand/Chart.yaml @@ -26,7 +26,7 @@ apiVersion: v1 # appVersion is the Triton version; update when changing release -appVersion: "2.35.0" +appVersion: "2.36.0" description: Triton Inference Server (Fleet Command) name: triton-inference-server # version is the Chart version; update when changing anything in the chart diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml index c304d9de1f..bc75ef6423 100644 --- a/deploy/fleetcommand/values.yaml +++ b/deploy/fleetcommand/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.06-py3 + imageName: nvcr.io/nvidia/tritonserver:23.07-py3 pullPolicy: IfNotPresent numGpus: 1 serverCommand: tritonserver @@ -46,13 +46,13 @@ image: # Model Control Mode (Optional, default: none) # # To set model control mode, uncomment and configure below - # See https://github.com/triton-inference-server/server/blob/r23.06/docs/model_management.md + # See https://github.com/triton-inference-server/server/blob/r23.07/docs/model_management.md # for more details #- --model-control-mode=explicit|poll|none # # Additional server args # - # see https://github.com/triton-inference-server/server/blob/r23.06/README.md + # see https://github.com/triton-inference-server/server/blob/r23.07/README.md # for more details service: diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml index 96eb213a6c..7533b5f693 100644 --- a/deploy/gcp/values.yaml +++ b/deploy/gcp/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.06-py3 + imageName: nvcr.io/nvidia/tritonserver:23.07-py3 pullPolicy: IfNotPresent modelRepositoryPath: gs://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml index dfbc458556..9893a9b920 100644 --- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml +++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml @@ -33,7 +33,7 @@ metadata: namespace: default spec: containers: - - image: nvcr.io/nvidia/tritonserver:23.06-py3-sdk + - image: nvcr.io/nvidia/tritonserver:23.07-py3-sdk imagePullPolicy: Always name: nv-triton-client securityContext: diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh index 64292409c8..958af2f709 100755 --- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh +++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh @@ -28,8 +28,8 @@ export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/') export APP_NAME=tritonserver export MAJOR_VERSION=2.33 -export MINOR_VERSION=2.35.0 -export NGC_VERSION=23.06-py3 +export MINOR_VERSION=2.36.0 +export NGC_VERSION=23.07-py3 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml index dc68798d4e..f448a96ca7 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml @@ -28,4 +28,4 @@ apiVersion: v1 appVersion: "2.33" description: Triton Inference Server name: triton-inference-server -version: 2.35.0 +version: 2.36.0 diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml index 6a7dc39772..ee7638414a 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml @@ -32,13 +32,13 @@ tritonProtocol: HTTP # HPA GPU utilization autoscaling target HPATargetAverageValue: 85 modelRepositoryPath: gs://triton_sample_models/23_04 -publishedVersion: '2.35.0' +publishedVersion: '2.36.0' gcpMarketplace: true image: registry: gcr.io repository: nvidia-ngc-public/tritonserver - tag: 23.06-py3 + tag: 23.07-py3 pullPolicy: IfNotPresent # modify the model repository here to match your GCP storage bucket numGpus: 1 diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml index 1a51f17a8f..0c4e79b7ed 100644 --- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.35.0' + publishedVersion: '2.36.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml index 4da79a389a..ddf67a6e90 100644 --- a/deploy/gke-marketplace-app/server-deployer/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.35.0' + publishedVersion: '2.36.0' publishedVersionMetadata: releaseNote: >- Initial release.