support custom /tmp directory, Mistral on AWS Chips

aws-samples · Aug 22, 2024 · f6f5080 · f6f5080
1 parent f0234e1
commit f6f5080
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -60,7 +60,7 @@ Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/bl
 |:--------------------------------|:-------|:--------------|:---------------------|:---------------|:-------------|:-------------|:-----------------------------|:--------------------------------|
 | **Anthropic Claude-3 Sonnet**   |        |               |                     |                |              |              | ✅                           | ✅                               |
 | **Anthropic Claude-3 Haiku**    |        |               |                     |                |              |              | ✅                           |                                    |
-| **Mistral-7b-instruct**          |        |               | ✅                   |                | ✅            | ✅           | ✅                           |                                    |
+| **Mistral-7b-instruct**          |        |      ✅          | ✅                   |                | ✅            | ✅           | ✅                           |                                    |
 | **Mistral-7b-AWQ**               |        |               |                     |                |              | ✅           |                             |                                    |
 | **Mixtral-8x7b-instruct**       |        |               |                     |                |              |              | ✅                           |                                    |
 | **Llama3.1-8b instruct**         |        | ✅           |        ✅          | ✅             |              |              | ✅                           |                                    |
@@ -81,6 +81,10 @@ Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/bl
 
 ## New in this release
 
+## 2.0.3
+
+1. Support for a EFA directory for benchmarking on EC2.
+
 ## 2.0.2
 
 1. Code cleanup, minor bug fixes and report improvements.
@@ -89,12 +93,6 @@ Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/bl
 
 1. 🚨 Model evaluations done by a **Panel of LLM Evaluators[[1]](#1)** 🚨
 
-## v1.0.52
-
-1. Compile for AWS Chips (Trainium, Inferentia) and deploy to SageMaker directly through `FMBench`.
-1. `Llama3.1-8b` and `Llama3.1-70b` config files for AWS Chips (Trainium, Inferentia).
-1. Misc. bug fixes.
-
 
 [Release history](./release_history.md)
 

diff --git a/docs/benchmarking.md b/docs/benchmarking.md
@@ -8,7 +8,7 @@
 |:--------------------------------|:-------|:--------------|:---------------------|:---------------|:-------------|:-------------|:-----------------------------|:--------------------------------|
 | **Anthropic Claude-3 Sonnet**   |        |               |                     |                |              |              | ✅                           | ✅                               |
 | **Anthropic Claude-3 Haiku**    |        |               |                     |                |              |              | ✅                           |                                    |
-| **Mistral-7b-instruct**          |        |               | ✅                   |                | ✅            | ✅           | ✅                           |                                    |
+| **Mistral-7b-instruct**          |        |      ✅         | ✅                   |                | ✅            | ✅           | ✅                           |                                    |
 | **Mistral-7b-AWQ**               |        |               |                     |                |              | ✅           |                             |                                    |
 | **Mixtral-8x7b-instruct**       |        |               |                     |                |              |              | ✅                           |                                    |
 | **Llama3.1-8b instruct**         |        | ✅           |                     | ✅             |              |              | ✅                           |                                    |
@@ -25,4 +25,4 @@
 | **AI21 J2 Ultra**                |        |               |                     |                |              |              | ✅                           |                                    |
 | **Gemma-2b**                     |        |               | ✅                   |                |              |              |                             |                                    |
 | **Phi-3-mini-4k-instruct**       |        |               | ✅                   |                |              |              |                             |                                    |
-| **distilbert-base-uncased**      |        |               | ✅                   |                |              |              |                             |                                    |
+| **distilbert-base-uncased**      |        |               | ✅                   |                |              |              |                             |                                    |
diff --git a/docs/benchmarking_on_ec2.md b/docs/benchmarking_on_ec2.md
@@ -36,23 +36,23 @@ The steps for deploying the model on your EC2 instance are described below.
         curl -s https://raw.githubusercontent.com/aws-samples/foundation-model-benchmarking-tool/main/copy_s3_content.sh | sh
         ```
 
-    1. To download the model files from HuggingFace, create a `hf_token.txt` file in the `/tmp/fmbench-read/scripts/` directory containing the Hugging Face token you would like to use. In the command below replace the `hf_yourtokenstring` with your hugging Face token.
+    1. To download the model files from HuggingFace, create a `hf_token.txt` file in the `/tmp/fmbench-read/scripts/` directory containing the Hugging Face token you would like to use. In the command below replace the `hf_yourtokenstring` with your Hugging Face token.
 
         ```{.bash}
         echo hf_yourtokenstring > /tmp/fmbench-read/scripts/hf_token.txt
         ```
 
-    1. Run `FMBench` with a packaged or a custom config file. **_This step will also deploy the model on the EC2 instance_**. The `--write-bucket` parameter value is just a placeholder and an actual S3 bucket is not required
+    1. Run `FMBench` with a packaged or a custom config file. **_This step will also deploy the model on the EC2 instance_**. The `--write-bucket` parameter value is just a placeholder and an actual S3 bucket is not required. **_Skip to the next step if benchmarking for AWS Chips_**. You could set the `--tmp-dir` flag to an EFA path instead of `/tmp` if using a shared path for storing config files and reports.
 
         ```{.bash}
-        fmbench --config-file /tmp/fmbench-read/configs/llama3/8b/config-ec2-llama3-8b.yml --local-mode yes --write-bucket placeholder > fmbench.log 2>&1
+        fmbench --config-file /tmp/fmbench-read/configs/llama3/8b/config-ec2-llama3-8b.yml --local-mode yes --write-bucket placeholder --tmp-dir /tmp > fmbench.log 2>&1
         ```
 
     1. For example, to run `FMBench` on a `llama3-8b-Instruct` model on an `inf2.48xlarge` instance, run the command 
     command below. The config file for this example can be viewed [here](src/fmbench/configs/llama3/8b/config-ec2-llama3-8b-inf2-48xl.yml).
 
         ```{.bash}
-        fmbench --config-file /tmp/fmbench-read/configs/llama3/8b/config-ec2-llama3-8b-inf2-48xl.yml --local-mode yes --write-bucket placeholder > fmbench.log 2>&1
+        fmbench --config-file /tmp/fmbench-read/configs/llama3/8b/config-ec2-llama3-8b-inf2-48xl.yml --local-mode yes --write-bucket placeholder --tmp-dir /tmp > fmbench.log 2>&1
         ```
 
     1. Open a new Terminal and navigate to the `foundation-model-benchmarking-tool` directory and do a `tail` on `fmbench.log` to see a live log of the run.

diff --git a/docs/releases.md b/docs/releases.md
@@ -1,5 +1,9 @@
 # Releases
 
+## 2.0.3
+
+1. Support for a EFA directory for benchmarking on EC2.
+
 ## 2.0.2
 
 1. Code cleanup, minor bug fixes and report improvements.

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fmbench"
-version = "2.0.2"
+version = "2.0.3"
 description ="Benchmark performance of **any Foundation Model (FM)** deployed on **any AWS Generative AI service**, be it **Amazon SageMaker**, **Amazon Bedrock**, **Amazon EKS**, or **Amazon EC2**. The FMs could be deployed on these platforms either directly through `FMbench`, or, if they are already deployed then also they could be benchmarked through the **Bring your own endpoint** mode supported by `FMBench`."
 authors = ["Amit Arora <aroraai@amazon.com>", "Madhur prashant <Madhurpt@amazon.com>"]
 readme = "README.md"

diff --git a/release_history.md b/release_history.md
@@ -1,3 +1,9 @@
+## v1.0.52
+
+1. Compile for AWS Chips (Trainium, Inferentia) and deploy to SageMaker directly through `FMBench`.
+1. `Llama3.1-8b` and `Llama3.1-70b` config files for AWS Chips (Trainium, Inferentia).
+1. Misc. bug fixes.
+
 ## v1.0.51
 
 1. `FMBench` has a [website](https://aws-samples.github.io/foundation-model-benchmarking-tool/index.html) now. Rework the README file to make it lightweight.