From 712a887dec070c9a32c7b3d184275528ea4b1e03 Mon Sep 17 00:00:00 2001 From: guschmue Date: Thu, 16 May 2024 10:25:29 -0700 Subject: [PATCH] add olive instructions to readme --- js/chat/README.md | 15 +++++++++------ js/chat/main.js | 5 ----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/js/chat/README.md b/js/chat/README.md index ff4d1dd6..0b9e2d7f 100644 --- a/js/chat/README.md +++ b/js/chat/README.md @@ -40,12 +40,15 @@ npm run dev This will build the project and start a dev server. Point your browser to http://localhost:8080/. -### The ONNX Model +### The Phi3 ONNX Model -The model used in this project is hosted on [Hugging Face](https://huggingface.co/schmuell/phi3-int4). It was created using the [onnx model builder](https://github.com/microsoft/onnxruntime-genai/tree/main/src/python/py/models). +The model used in this example is hosted on [Hugging Face](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx-web). It is slightly different than the ONNX model for CUDA or CPU: +1. The model output 'logits' is kept as float32 (even for float16 models) since Javascript does not support float16. +2. Our WebGPU implementation uses the custom Multiheaded Attention operator instread of Group Query Attention. +3. Phi3 is larger then 2GB and we need to use external data files. To keep them cacheable in the browser, + both model.onnx and model.onnx.data are kept under 2GB. -You can create the model with +The model was created using the [ONNX genai model builder](https://github.com/microsoft/onnxruntime-genai/tree/main/src/python/py/models). -```sh -python builder.py -m microsoft/Phi-3-mini-4k-instruct -o $your_output -p int4 -e web -``` +If you like to create the model yourself, you can use [Olive](https://github.com/microsoft/Olive/). +An example how to create the model for ONNX Runtime Web with Olive can be found [here](https://github.com/microsoft/Olive/tree/main/examples/phi3). diff --git a/js/chat/main.js b/js/chat/main.js index d2bd3d23..10d7599c 100644 --- a/js/chat/main.js +++ b/js/chat/main.js @@ -4,12 +4,7 @@ import { marked } from 'marked'; const MODELS = { - "tinyllama": { name: "tinyllama", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-int4", file: "decoder_model_merged" }, - "tinyllama_fp16": { name: "tinyllama-fp16", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-fp16", externaldata: true, file: "decoder_model_merged" }, - "phi2": { name: "phi2", path: "schmuell/phi2-int4", file: "decoder_model_merged" }, "phi3": { name: "phi3", path: "schmuell/phi3-int4", externaldata: true }, - "phi3-1": { name: "phi3-1", path: "schmuell/phi3-1", externaldata: true }, - "stablelm": { name: "stablelm", path: "schmuell/stablelm-2-zephyr-1_6b-int4", file: "decoder_model_merged" }, } const preCannedQueries = {