Llama3 on Ec2 inf2.48xlarge

aws-samples · Jul 12, 2024 · c931d40 · c931d40
1 parent 0789bab
commit c931d40
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@
     * [Llama3 on Amazon SageMaker ](#llama3-on-amazon-sagemaker)
     * [Full list of benchmarked models](#full-list-of-benchmarked-models)
   * [New in this release](#new-in-this-release)
-    * [v1.0.49](#v1049)
+    * [v1.0.49](#v1050)
   * [Description](#description)
     * [Workflow for `FMBench`](#workflow-for-fmbench)
   * [Getting started](#getting-started)
@@ -105,6 +105,10 @@ Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/bl
 
 ## New in this release
 
+## v1.0.50
+1. `Llama3-8b` on Amazon EC2 `inf2.48xlarge` config file.
+1. Update to new version of DJL LMI (0.28.0).
+
 ### v1.0.49
 1. Streaming support for Amazon SageMaker and Amazon Bedrock.
 1. Per-token latency metrics such as time to first token (TTFT) and mean time per-output token (TPOT).
@@ -115,11 +119,6 @@ Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/bl
 1. `Phi-3-mini-4k-instruct` configuration file.
 1. Tokenizer and misc. bug fixes.
 
-### v1.0.47
-1. Run `FMBench` as a Docker container.
-1. Bug fixes for GovCloud support.
-1. Updated README for EKS cluster creation.
-
 
 [Release history](./release_history.md)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fmbench"
-version = "1.0.49"
+version = "1.0.50"
 description ="Benchmark performance of **any Foundation Model (FM)** deployed on **any AWS Generative AI service**, be it **Amazon SageMaker**, **Amazon Bedrock**, **Amazon EKS**, or **Amazon EC2**. The FMs could be deployed on these platforms either directly through `FMbench`, or, if they are already deployed then also they could be benchmarked through the **Bring your own endpoint** mode supported by `FMBench`."
 authors = ["Amit Arora <aroraai@amazon.com>", "Madhur prashant <Madhurpt@amazon.com>"]
 readme = "README.md"

diff --git a/release_history.md b/release_history.md
@@ -1,3 +1,8 @@
+### v1.0.47
+1. Run `FMBench` as a Docker container.
+1. Bug fixes for GovCloud support.
+1. Updated README for EKS cluster creation.
+
 ### v1.0.46
 1. Native model deployment support for EC2 and EKS (i.e. you can now deploy and benchmark models on EC2 and EKS).
 1. FMBench is now available in GovCloud.

diff --git a/src/fmbench/configs/llama3/8b/config-llama3-8b-inf2-g5.yml b/src/fmbench/configs/llama3/8b/config-llama3-8b-inf2-g5.yml
@@ -162,7 +162,7 @@ experiments:
     model_name: llama3-8b-inf2.24xl
     ep_name: llama-3-8b-instruct-inf2-24xl
     instance_type: "ml.inf2.24xlarge"
-    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.27.0-neuronx-sdk2.18.1'
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.28.0-neuronx-sdk2.18.2'
     deploy: yes
     instance_count: 1
     # FMBench comes packaged with multiple deployment scripts, such as scripts for JumpStart
@@ -219,7 +219,7 @@ experiments:
     model_name: llama3-8b-instruct-g5.12xl
     ep_name: llama-3-8b-instruct-g5-12xl
     instance_type: "ml.g5.12xlarge"
-    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.27.0-deepspeed0.12.6-cu121'
+    image_uri: '763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.28.0-lmi10.0.0-cu124'
     deploy: yes
     instance_count: 1
     deployment_script: jumpstart.py