diff --git a/bin/generate_truss_examples.py b/bin/generate_truss_examples.py index defa1ca1b..defe29f42 100644 --- a/bin/generate_truss_examples.py +++ b/bin/generate_truss_examples.py @@ -10,14 +10,13 @@ ``` """ import enum -import itertools import json import os import shutil import subprocess import sys from pathlib import Path -from typing import Iterator, List, Optional, Tuple +from typing import List, Optional, Tuple import yaml @@ -209,6 +208,14 @@ def _generate_truss_example(truss_directory: str): description: "{doc_information["description"]}" --- """ + + path_in_examples_repo = "/".join(Path(truss_directory).parts[1:]) + link_to_github = f""" + + + """ files_to_scrape = doc_information["files"] full_content, code_blocks = zip( @@ -222,7 +229,7 @@ def _generate_truss_example(truss_directory: str): file_content = "\n".join(full_content) + _generate_request_example_block( full_code_block ) - example_content = f"""{header}\n{file_content}""" + example_content = f"""{header}\n{link_to_github}\n{file_content}""" path_to_example = Path(example_destination) path_to_example.parent.mkdir(parents=True, exist_ok=True) @@ -246,17 +253,6 @@ def _format_group_name(group_name: str) -> str: return lowercase_name[0].upper() + lowercase_name[1:] -def _toc_section( - example_group_name: str, example_group: Iterator[Tuple[str, ...]] -) -> dict: - return { - "group": _format_group_name(example_group_name), - "pages": [ - f"examples/{example[0]}/{example[1]}" for example in list(example_group) - ], - } - - def update_toc(example_dirs: List[str]): """ Update the table of contents in the README.md file. @@ -273,21 +269,12 @@ def update_toc(example_dirs: List[str]): examples_section = [item for item in navigation if item["group"] == "Examples"][0] - # Group together by the parent directory. ie: - # - # * 3_llms/llm - # * 3_llms/llm-streaming - # - # will be grouped together with they key "3_llms". This allows us to have proper - # nesting in the table of contents. - grouped_examples = itertools.groupby( - sorted(transformed_example_paths, key=lambda example: example[0]), - key=lambda example: example[0], - ) - + # Sort examples by the group name examples_section["pages"] = [ - _toc_section(example_group_name, example_group) - for example_group_name, example_group in grouped_examples + f"examples/{example_path[0]}/{example_path[1]}" + for example_path in sorted( + transformed_example_paths, key=lambda example: example[0] + ) ] serialized_mint_config = json.dumps(mint_config, indent=2) diff --git a/docs/examples/1_introduction/getting-started-bert.mdx b/docs/examples/1_introduction/getting-started-bert.mdx index d434134de..5a1033d01 100644 --- a/docs/examples/1_introduction/getting-started-bert.mdx +++ b/docs/examples/1_introduction/getting-started-bert.mdx @@ -3,6 +3,12 @@ title: "Getting Started: Text Classification" description: "Building your first Truss" --- + + + + In this example, we go through building your first Truss model. We'll be using the HuggingFace transformers library to build a text classification model that can detect sentiment of text. @@ -63,7 +69,9 @@ such as the name, and the Python version to build with. ```yaml config.yaml model_name: bert python_version: py310 -model_metadata: {} +model_metadata: + example_model_input: { "text": "Hello my name is {MASK}" } + ``` ### Set up python requirements @@ -135,7 +143,9 @@ class Model: ```yaml config.yaml model_name: bert python_version: py310 -model_metadata: {} +model_metadata: + example_model_input: { "text": "Hello my name is {MASK}" } + requirements: - torch==2.0.1 diff --git a/docs/examples/2_image_classification/clip.mdx b/docs/examples/2_image_classification/clip.mdx new file mode 100644 index 000000000..85d803fba --- /dev/null +++ b/docs/examples/2_image_classification/clip.mdx @@ -0,0 +1,181 @@ +--- +title: "Image Classification with CLIP" +description: "Deploy a CLIP model to classify images" +--- + + + + + +In this example, we create a Truss that uses [CLIP](https://openai.com/research/clip) to classify images, +using some pre-defined labels. The input to this Truss will be an image, the output will be a classification. + +One of the major things to note about this example is that since the inputs are images, we need to have +some mechanism for downloading the image. To accomplish this, we have the user pass a downloadable URL to +the Truss, and in the Truss code, download the image. To do this efficiently, we will make use of the +`preprocess` method in Truss. + +# Set up imports and constants + +For our CLIP Truss, we will be using the Hugging Face transformers library, as well as +`pillow` for image processing. + +```python model/model.py +import requests +from typing import Dict +from PIL import Image +from transformers import CLIPProcessor, CLIPModel + +``` +This is the CLIP model from Hugging Face that we will use for this example. + +```python model/model.py +CHECKPOINT = "openai/clip-vit-base-patch32" + +``` +# Define the Truss + +In the `load` method, we load in the pretrained CLIP model from the +Hugging Face checkpoint specified above. + +```python model/model.py +class Model: + def __init__(self, **kwargs) -> None: + self._processor = None + self._model = None + + def load(self): + """ + Loads the CLIP model and processor checkpoints. + """ + self._model = CLIPModel.from_pretrained(CHECKPOINT) + self._processor = CLIPProcessor.from_pretrained(CHECKPOINT) + +``` +In the `preprocess` method, we download the image from the url and preprocess it. +This method is a part of the Truss class, and is designed to be used for any logic +involving IO, like in this case, downloading an image. + +It is called before the predict method in a separate thread, and is not subject to the same +concurrency limits as the predict method, so can be called many times in parallel. +This makes it such that the predict method is not unnecessarily blocked on IO-bound +tasks, and helps improve the throughput of the Truss. See our [guide to concurrency](../guides/concurrency) +for more info. + +```python model/model.py + def preprocess(self, request: Dict) -> Dict: + + image = Image.open(requests.get(request.pop("url"), stream=True).raw) + request["inputs"] = self._processor( + text=["a photo of a cat", "a photo of a dog"], # Define preset labels to use + images=image, + return_tensors="pt", + padding=True + ) + return request + +``` +The `predict` method performs the actual inference, and outputs a probability associated +with each of the labels defined earlier. + +```python model/model.py + def predict(self, request: Dict) -> Dict: + """ + This performs the actual classification. The predict method is subject to + the predict concurrency constraints. + """ + outputs = self._model(**request["inputs"]) + logits_per_image = outputs.logits_per_image + return logits_per_image.softmax(dim=1).tolist() +``` + +# Set up the config.yaml + +The main section that needs to be filled out +to run CLIP is the `requirements` section, where we need +to include `transformers`, for the model pipeline, and `pillow`, +for image processing. + +```yaml config.yaml +model_name: clip-example +requirements: +- transformers==4.32.0 +- pillow==10.0.0 +- torch==2.0.1 +model_metadata: + example_model_input: {"url": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg?auto=compress&cs=tinysrgb&w=1600"} +resources: + cpu: "3" + memory: 14Gi + use_gpu: true + accelerator: A10G +``` +# Deploy the model + +Deploy the CLIP model like you would other Trusses, with: +```bash +$ truss push +``` +You can then invoke the model with: +```bash +$ truss predict -d '{"image_url": "https://source.unsplash.com/gKXKBY-C-Dk/300x300""]}' --published +``` + + +```python model/model.py +import requests +from typing import Dict +from PIL import Image +from transformers import CLIPProcessor, CLIPModel + +CHECKPOINT = "openai/clip-vit-base-patch32" + +class Model: + def __init__(self, **kwargs) -> None: + self._processor = None + self._model = None + + def load(self): + """ + Loads the CLIP model and processor checkpoints. + """ + self._model = CLIPModel.from_pretrained(CHECKPOINT) + self._processor = CLIPProcessor.from_pretrained(CHECKPOINT) + + def preprocess(self, request: Dict) -> Dict: + + image = Image.open(requests.get(request.pop("url"), stream=True).raw) + request["inputs"] = self._processor( + text=["a photo of a cat", "a photo of a dog"], # Define preset labels to use + images=image, + return_tensors="pt", + padding=True + ) + return request + + def predict(self, request: Dict) -> Dict: + """ + This performs the actual classification. The predict method is subject to + the predict concurrency constraints. + """ + outputs = self._model(**request["inputs"]) + logits_per_image = outputs.logits_per_image + return logits_per_image.softmax(dim=1).tolist() +``` +```yaml config.yaml +model_name: clip-example +requirements: +- transformers==4.32.0 +- pillow==10.0.0 +- torch==2.0.1 +model_metadata: + example_model_input: {"url": "https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg?auto=compress&cs=tinysrgb&w=1600"} +resources: + cpu: "3" + memory: 14Gi + use_gpu: true + accelerator: A10G +``` + diff --git a/docs/examples/3_LLMs/llm-with-streaming.mdx b/docs/examples/3_LLMs/llm-with-streaming.mdx index 8b3c27764..589cd548d 100644 --- a/docs/examples/3_LLMs/llm-with-streaming.mdx +++ b/docs/examples/3_LLMs/llm-with-streaming.mdx @@ -3,6 +3,12 @@ title: "LLM with Streaming" description: "Building an LLM with streaming output" --- + + + + In this example, we go through a Truss that serves an LLM, and streams the output to the client. # Why Streaming? @@ -143,11 +149,13 @@ and a few other related libraries. ```yaml config.yaml model_name: "LLM with Streaming" +model_metadata: + example_model_input: {"prompt": "what is the meaning of life"} requirements: - torch==2.0.1 - peft==0.4.0 - scipy==1.11.1 -- sentencepiece==1.11.1 +- sentencepiece==0.1.99 - accelerate==0.21.0 - bitsandbytes==0.41.1 - einops==0.6.1 @@ -235,11 +243,13 @@ class Model: ``` ```yaml config.yaml model_name: "LLM with Streaming" +model_metadata: + example_model_input: {"prompt": "what is the meaning of life"} requirements: - torch==2.0.1 - peft==0.4.0 - scipy==1.11.1 -- sentencepiece==1.11.1 +- sentencepiece==0.1.99 - accelerate==0.21.0 - bitsandbytes==0.41.1 - einops==0.6.1 diff --git a/docs/examples/6_high_performance/tgi.mdx b/docs/examples/6_high_performance/tgi.mdx index 20f225980..0ed0974d4 100644 --- a/docs/examples/6_high_performance/tgi.mdx +++ b/docs/examples/6_high_performance/tgi.mdx @@ -3,6 +3,12 @@ title: "High Performance LLMs with TGI" description: "Deploy a language model with TGI" --- + + + + [TGI](https://github.com/huggingface/text-generation-inference/tree/main) is a model server optimized for language models. In this example, we put together a Truss that serves the model Falcon 7B using TGI. @@ -24,7 +30,7 @@ The endpoint argument has two options: Select the model that you'd like to use with TGI ```yaml config.yaml - model: tiiuae/falcon-7b + model_id: tiiuae/falcon-7b ``` The `model_server` parameter allows you to specify a supported backend (in this example, TGI) @@ -45,7 +51,8 @@ The remaining config options listed are standard Truss Config options. ```yaml config.yaml environment_variables: {} external_package_dirs: [] -model_metadata: {} +model_metadata: + example_model_input: {"inputs": "what is the meaning of life"} model_name: Falcon-TGI python_version: py39 requirements: [] @@ -73,13 +80,14 @@ $ truss predict -d '{"inputs": "What is a large language model?", "parameters": build: arguments: endpoint: generate_stream - model: tiiuae/falcon-7b + model_id: tiiuae/falcon-7b model_server: TGI runtime: predict_concurrency: 128 environment_variables: {} external_package_dirs: [] -model_metadata: {} +model_metadata: + example_model_input: {"inputs": "what is the meaning of life"} model_name: Falcon-TGI python_version: py39 requirements: [] diff --git a/docs/examples/6_high_performance/vllm.mdx b/docs/examples/6_high_performance/vllm.mdx index 2b8f77953..9a44bc95e 100644 --- a/docs/examples/6_high_performance/vllm.mdx +++ b/docs/examples/6_high_performance/vllm.mdx @@ -3,6 +3,12 @@ title: "High Performance LLMs with vLLM" description: "Deploy a language model with vLLM" --- + + + + [vLLM](https://github.com/vllm-project/vllm) is a Python-based package that optimizes the Attention layer in Transformer models. By better allocating memory used during the attention computation, vLLM can reduce the memory footprint of a model and significantly improve inference speed. Truss diff --git a/docs/examples/7_misc/private-huggingface-model.mdx b/docs/examples/7_misc/private-huggingface-model.mdx index a0c7c82d6..935aff24a 100644 --- a/docs/examples/7_misc/private-huggingface-model.mdx +++ b/docs/examples/7_misc/private-huggingface-model.mdx @@ -3,6 +3,12 @@ title: "Private Hugging Face Model" description: "Load a model that requires authentication with Hugging Face" --- + + + + In this example, we build a Truss that uses a model that requires Hugging Face authentication. The steps for loading a model from Hugging Face are: diff --git a/docs/examples/7_misc/system-packages.mdx b/docs/examples/7_misc/system-packages.mdx index e451e8c68..3c66a7c21 100644 --- a/docs/examples/7_misc/system-packages.mdx +++ b/docs/examples/7_misc/system-packages.mdx @@ -3,6 +3,12 @@ title: "Model with system packages" description: "Deploy a model with both Python and system dependencies" --- + + + + In this example, we build a Truss with a model that requires specific system packages. To add system packages to your Truss, you can add a `system_packages` key to your config.yaml file, @@ -52,7 +58,8 @@ and `system_packages` sections. ```yaml config.yaml environment_variables: {} external_package_dirs: [] -model_metadata: {} +model_metadata: + example_model_input: {"url": "https://templates.invoicehome.com/invoice-template-us-neat-750px.png", "prompt": "What is the invoice number?"} model_name: LayoutLM Document QA python_version: py39 ``` @@ -112,7 +119,8 @@ class Model: ```yaml config.yaml environment_variables: {} external_package_dirs: [] -model_metadata: {} +model_metadata: + example_model_input: {"url": "https://templates.invoicehome.com/invoice-template-us-neat-750px.png", "prompt": "What is the invoice number?"} model_name: LayoutLM Document QA python_version: py39 requirements: diff --git a/docs/mint.json b/docs/mint.json index 91dff4e0c..3676bd72b 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -55,32 +55,13 @@ { "group": "Examples", "pages": [ - { - "group": "Introduction", - "pages": [ - "examples/1_introduction/getting-started-bert" - ] - }, - { - "group": "LLMs", - "pages": [ - "examples/3_LLMs/llm-with-streaming" - ] - }, - { - "group": "High performance", - "pages": [ - "examples/6_high_performance/vllm", - "examples/6_high_performance/tgi" - ] - }, - { - "group": "Misc", - "pages": [ - "examples/7_misc/private-huggingface-model", - "examples/7_misc/system-packages" - ] - } + "examples/1_introduction/getting-started-bert", + "examples/2_image_classification/clip", + "examples/3_LLMs/llm-with-streaming", + "examples/6_high_performance/vllm", + "examples/6_high_performance/tgi", + "examples/7_misc/private-huggingface-model", + "examples/7_misc/system-packages" ] }, {