diff --git a/bin/generate_truss_examples.py b/bin/generate_truss_examples.py
index defa1ca1b..defe29f42 100644
--- a/bin/generate_truss_examples.py
+++ b/bin/generate_truss_examples.py
@@ -10,14 +10,13 @@
```
"""
import enum
-import itertools
import json
import os
import shutil
import subprocess
import sys
from pathlib import Path
-from typing import Iterator, List, Optional, Tuple
+from typing import List, Optional, Tuple
import yaml
@@ -209,6 +208,14 @@ def _generate_truss_example(truss_directory: str):
description: "{doc_information["description"]}"
---
"""
+
+ path_in_examples_repo = "/".join(Path(truss_directory).parts[1:])
+ link_to_github = f"""
+
+
+ """
files_to_scrape = doc_information["files"]
full_content, code_blocks = zip(
@@ -222,7 +229,7 @@ def _generate_truss_example(truss_directory: str):
file_content = "\n".join(full_content) + _generate_request_example_block(
full_code_block
)
- example_content = f"""{header}\n{file_content}"""
+ example_content = f"""{header}\n{link_to_github}\n{file_content}"""
path_to_example = Path(example_destination)
path_to_example.parent.mkdir(parents=True, exist_ok=True)
@@ -246,17 +253,6 @@ def _format_group_name(group_name: str) -> str:
return lowercase_name[0].upper() + lowercase_name[1:]
-def _toc_section(
- example_group_name: str, example_group: Iterator[Tuple[str, ...]]
-) -> dict:
- return {
- "group": _format_group_name(example_group_name),
- "pages": [
- f"examples/{example[0]}/{example[1]}" for example in list(example_group)
- ],
- }
-
-
def update_toc(example_dirs: List[str]):
"""
Update the table of contents in the README.md file.
@@ -273,21 +269,12 @@ def update_toc(example_dirs: List[str]):
examples_section = [item for item in navigation if item["group"] == "Examples"][0]
- # Group together by the parent directory. ie:
- #
- # * 3_llms/llm
- # * 3_llms/llm-streaming
- #
- # will be grouped together with they key "3_llms". This allows us to have proper
- # nesting in the table of contents.
- grouped_examples = itertools.groupby(
- sorted(transformed_example_paths, key=lambda example: example[0]),
- key=lambda example: example[0],
- )
-
+ # Sort examples by the group name
examples_section["pages"] = [
- _toc_section(example_group_name, example_group)
- for example_group_name, example_group in grouped_examples
+ f"examples/{example_path[0]}/{example_path[1]}"
+ for example_path in sorted(
+ transformed_example_paths, key=lambda example: example[0]
+ )
]
serialized_mint_config = json.dumps(mint_config, indent=2)
diff --git a/docs/examples/1_introduction/getting-started-bert.mdx b/docs/examples/1_introduction/getting-started-bert.mdx
index d434134de..5a1033d01 100644
--- a/docs/examples/1_introduction/getting-started-bert.mdx
+++ b/docs/examples/1_introduction/getting-started-bert.mdx
@@ -3,6 +3,12 @@ title: "Getting Started: Text Classification"
description: "Building your first Truss"
---
+
+
+
+
In this example, we go through building your first Truss model. We'll be using the HuggingFace transformers
library to build a text classification model that can detect sentiment of text.
@@ -63,7 +69,9 @@ such as the name, and the Python version to build with.
```yaml config.yaml
model_name: bert
python_version: py310
-model_metadata: {}
+model_metadata:
+ example_model_input: { "text": "Hello my name is {MASK}" }
+
```
### Set up python requirements
@@ -135,7 +143,9 @@ class Model:
```yaml config.yaml
model_name: bert
python_version: py310
-model_metadata: {}
+model_metadata:
+ example_model_input: { "text": "Hello my name is {MASK}" }
+
requirements:
- torch==2.0.1
diff --git a/docs/examples/2_image_classification/clip.mdx b/docs/examples/2_image_classification/clip.mdx
new file mode 100644
index 000000000..85d803fba
--- /dev/null
+++ b/docs/examples/2_image_classification/clip.mdx
@@ -0,0 +1,181 @@
+---
+title: "Image Classification with CLIP"
+description: "Deploy a CLIP model to classify images"
+---
+
+
+
+
+
+In this example, we create a Truss that uses [CLIP](https://openai.com/research/clip) to classify images,
+using some pre-defined labels. The input to this Truss will be an image, the output will be a classification.
+
+One of the major things to note about this example is that since the inputs are images, we need to have
+some mechanism for downloading the image. To accomplish this, we have the user pass a downloadable URL to
+the Truss, and in the Truss code, download the image. To do this efficiently, we will make use of the
+`preprocess` method in Truss.
+
+# Set up imports and constants
+
+For our CLIP Truss, we will be using the Hugging Face transformers library, as well as
+`pillow` for image processing.
+
+```python model/model.py
+import requests
+from typing import Dict
+from PIL import Image
+from transformers import CLIPProcessor, CLIPModel
+
+```
+This is the CLIP model from Hugging Face that we will use for this example.
+
+```python model/model.py
+CHECKPOINT = "openai/clip-vit-base-patch32"
+
+```
+# Define the Truss
+
+In the `load` method, we load in the pretrained CLIP model from the
+Hugging Face checkpoint specified above.
+
+```python model/model.py
+class Model:
+ def __init__(self, **kwargs) -> None:
+ self._processor = None
+ self._model = None
+
+ def load(self):
+ """
+ Loads the CLIP model and processor checkpoints.
+ """
+ self._model = CLIPModel.from_pretrained(CHECKPOINT)
+ self._processor = CLIPProcessor.from_pretrained(CHECKPOINT)
+
+```
+In the `preprocess` method, we download the image from the url and preprocess it.
+This method is a part of the Truss class, and is designed to be used for any logic
+involving IO, like in this case, downloading an image.
+
+It is called before the predict method in a separate thread, and is not subject to the same
+concurrency limits as the predict method, so can be called many times in parallel.
+This makes it such that the predict method is not unnecessarily blocked on IO-bound
+tasks, and helps improve the throughput of the Truss. See our [guide to concurrency](../guides/concurrency)
+for more info.
+
+```python model/model.py
+ def preprocess(self, request: Dict) -> Dict:
+
+ image = Image.open(requests.get(request.pop("url"), stream=True).raw)
+ request["inputs"] = self._processor(
+ text=["a photo of a cat", "a photo of a dog"], # Define preset labels to use
+ images=image,
+ return_tensors="pt",
+ padding=True
+ )
+ return request
+
+```
+The `predict` method performs the actual inference, and outputs a probability associated
+with each of the labels defined earlier.
+
+```python model/model.py
+ def predict(self, request: Dict) -> Dict:
+ """
+ This performs the actual classification. The predict method is subject to
+ the predict concurrency constraints.
+ """
+ outputs = self._model(**request["inputs"])
+ logits_per_image = outputs.logits_per_image
+ return logits_per_image.softmax(dim=1).tolist()
+```
+
+# Set up the config.yaml
+
+The main section that needs to be filled out
+to run CLIP is the `requirements` section, where we need
+to include `transformers`, for the model pipeline, and `pillow`,
+for image processing.
+
+```yaml config.yaml
+model_name: clip-example
+requirements:
+- transformers==4.32.0
+- pillow==10.0.0
+- torch==2.0.1
+model_metadata:
+ example_model_input: {"url": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg?auto=compress&cs=tinysrgb&w=1600"}
+resources:
+ cpu: "3"
+ memory: 14Gi
+ use_gpu: true
+ accelerator: A10G
+```
+# Deploy the model
+
+Deploy the CLIP model like you would other Trusses, with:
+```bash
+$ truss push
+```
+You can then invoke the model with:
+```bash
+$ truss predict -d '{"image_url": "https://source.unsplash.com/gKXKBY-C-Dk/300x300""]}' --published
+```
+
+
+```python model/model.py
+import requests
+from typing import Dict
+from PIL import Image
+from transformers import CLIPProcessor, CLIPModel
+
+CHECKPOINT = "openai/clip-vit-base-patch32"
+
+class Model:
+ def __init__(self, **kwargs) -> None:
+ self._processor = None
+ self._model = None
+
+ def load(self):
+ """
+ Loads the CLIP model and processor checkpoints.
+ """
+ self._model = CLIPModel.from_pretrained(CHECKPOINT)
+ self._processor = CLIPProcessor.from_pretrained(CHECKPOINT)
+
+ def preprocess(self, request: Dict) -> Dict:
+
+ image = Image.open(requests.get(request.pop("url"), stream=True).raw)
+ request["inputs"] = self._processor(
+ text=["a photo of a cat", "a photo of a dog"], # Define preset labels to use
+ images=image,
+ return_tensors="pt",
+ padding=True
+ )
+ return request
+
+ def predict(self, request: Dict) -> Dict:
+ """
+ This performs the actual classification. The predict method is subject to
+ the predict concurrency constraints.
+ """
+ outputs = self._model(**request["inputs"])
+ logits_per_image = outputs.logits_per_image
+ return logits_per_image.softmax(dim=1).tolist()
+```
+```yaml config.yaml
+model_name: clip-example
+requirements:
+- transformers==4.32.0
+- pillow==10.0.0
+- torch==2.0.1
+model_metadata:
+ example_model_input: {"url": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg?auto=compress&cs=tinysrgb&w=1600"}
+resources:
+ cpu: "3"
+ memory: 14Gi
+ use_gpu: true
+ accelerator: A10G
+```
+
diff --git a/docs/examples/3_LLMs/llm-with-streaming.mdx b/docs/examples/3_LLMs/llm-with-streaming.mdx
index 8b3c27764..589cd548d 100644
--- a/docs/examples/3_LLMs/llm-with-streaming.mdx
+++ b/docs/examples/3_LLMs/llm-with-streaming.mdx
@@ -3,6 +3,12 @@ title: "LLM with Streaming"
description: "Building an LLM with streaming output"
---
+
+
+
+
In this example, we go through a Truss that serves an LLM, and streams the output to the client.
# Why Streaming?
@@ -143,11 +149,13 @@ and a few other related libraries.
```yaml config.yaml
model_name: "LLM with Streaming"
+model_metadata:
+ example_model_input: {"prompt": "what is the meaning of life"}
requirements:
- torch==2.0.1
- peft==0.4.0
- scipy==1.11.1
-- sentencepiece==1.11.1
+- sentencepiece==0.1.99
- accelerate==0.21.0
- bitsandbytes==0.41.1
- einops==0.6.1
@@ -235,11 +243,13 @@ class Model:
```
```yaml config.yaml
model_name: "LLM with Streaming"
+model_metadata:
+ example_model_input: {"prompt": "what is the meaning of life"}
requirements:
- torch==2.0.1
- peft==0.4.0
- scipy==1.11.1
-- sentencepiece==1.11.1
+- sentencepiece==0.1.99
- accelerate==0.21.0
- bitsandbytes==0.41.1
- einops==0.6.1
diff --git a/docs/examples/6_high_performance/tgi.mdx b/docs/examples/6_high_performance/tgi.mdx
index 20f225980..0ed0974d4 100644
--- a/docs/examples/6_high_performance/tgi.mdx
+++ b/docs/examples/6_high_performance/tgi.mdx
@@ -3,6 +3,12 @@ title: "High Performance LLMs with TGI"
description: "Deploy a language model with TGI"
---
+
+
+
+
[TGI](https://github.com/huggingface/text-generation-inference/tree/main) is a model server optimized for
language models. In this example, we put together a Truss that serves the model Falcon 7B using TGI.
@@ -24,7 +30,7 @@ The endpoint argument has two options:
Select the model that you'd like to use with TGI
```yaml config.yaml
- model: tiiuae/falcon-7b
+ model_id: tiiuae/falcon-7b
```
The `model_server` parameter allows you to specify a supported backend (in this example, TGI)
@@ -45,7 +51,8 @@ The remaining config options listed are standard Truss Config options.
```yaml config.yaml
environment_variables: {}
external_package_dirs: []
-model_metadata: {}
+model_metadata:
+ example_model_input: {"inputs": "what is the meaning of life"}
model_name: Falcon-TGI
python_version: py39
requirements: []
@@ -73,13 +80,14 @@ $ truss predict -d '{"inputs": "What is a large language model?", "parameters":
build:
arguments:
endpoint: generate_stream
- model: tiiuae/falcon-7b
+ model_id: tiiuae/falcon-7b
model_server: TGI
runtime:
predict_concurrency: 128
environment_variables: {}
external_package_dirs: []
-model_metadata: {}
+model_metadata:
+ example_model_input: {"inputs": "what is the meaning of life"}
model_name: Falcon-TGI
python_version: py39
requirements: []
diff --git a/docs/examples/6_high_performance/vllm.mdx b/docs/examples/6_high_performance/vllm.mdx
index 2b8f77953..9a44bc95e 100644
--- a/docs/examples/6_high_performance/vllm.mdx
+++ b/docs/examples/6_high_performance/vllm.mdx
@@ -3,6 +3,12 @@ title: "High Performance LLMs with vLLM"
description: "Deploy a language model with vLLM"
---
+
+
+
+
[vLLM](https://github.com/vllm-project/vllm) is a Python-based package that optimizes the Attention
layer in Transformer models. By better allocating memory used during the attention computation,
vLLM can reduce the memory footprint of a model and significantly improve inference speed. Truss
diff --git a/docs/examples/7_misc/private-huggingface-model.mdx b/docs/examples/7_misc/private-huggingface-model.mdx
index a0c7c82d6..935aff24a 100644
--- a/docs/examples/7_misc/private-huggingface-model.mdx
+++ b/docs/examples/7_misc/private-huggingface-model.mdx
@@ -3,6 +3,12 @@ title: "Private Hugging Face Model"
description: "Load a model that requires authentication with Hugging Face"
---
+
+
+
+
In this example, we build a Truss that uses a model that
requires Hugging Face authentication. The steps for loading a model
from Hugging Face are:
diff --git a/docs/examples/7_misc/system-packages.mdx b/docs/examples/7_misc/system-packages.mdx
index e451e8c68..3c66a7c21 100644
--- a/docs/examples/7_misc/system-packages.mdx
+++ b/docs/examples/7_misc/system-packages.mdx
@@ -3,6 +3,12 @@ title: "Model with system packages"
description: "Deploy a model with both Python and system dependencies"
---
+
+
+
+
In this example, we build a Truss with a model that requires specific system packages.
To add system packages to your Truss, you can add a `system_packages` key to your config.yaml file,
@@ -52,7 +58,8 @@ and `system_packages` sections.
```yaml config.yaml
environment_variables: {}
external_package_dirs: []
-model_metadata: {}
+model_metadata:
+ example_model_input: {"url": "https://templates.invoicehome.com/invoice-template-us-neat-750px.png", "prompt": "What is the invoice number?"}
model_name: LayoutLM Document QA
python_version: py39
```
@@ -112,7 +119,8 @@ class Model:
```yaml config.yaml
environment_variables: {}
external_package_dirs: []
-model_metadata: {}
+model_metadata:
+ example_model_input: {"url": "https://templates.invoicehome.com/invoice-template-us-neat-750px.png", "prompt": "What is the invoice number?"}
model_name: LayoutLM Document QA
python_version: py39
requirements:
diff --git a/docs/mint.json b/docs/mint.json
index 91dff4e0c..3676bd72b 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -55,32 +55,13 @@
{
"group": "Examples",
"pages": [
- {
- "group": "Introduction",
- "pages": [
- "examples/1_introduction/getting-started-bert"
- ]
- },
- {
- "group": "LLMs",
- "pages": [
- "examples/3_LLMs/llm-with-streaming"
- ]
- },
- {
- "group": "High performance",
- "pages": [
- "examples/6_high_performance/vllm",
- "examples/6_high_performance/tgi"
- ]
- },
- {
- "group": "Misc",
- "pages": [
- "examples/7_misc/private-huggingface-model",
- "examples/7_misc/system-packages"
- ]
- }
+ "examples/1_introduction/getting-started-bert",
+ "examples/2_image_classification/clip",
+ "examples/3_LLMs/llm-with-streaming",
+ "examples/6_high_performance/vllm",
+ "examples/6_high_performance/tgi",
+ "examples/7_misc/private-huggingface-model",
+ "examples/7_misc/system-packages"
]
},
{