From 5276e8b4a83da97e07e64d99eb97892c40519e3f Mon Sep 17 00:00:00 2001 From: slam Date: Mon, 15 Apr 2024 18:55:36 +0800 Subject: [PATCH] feat(BE-190): As a user, i want able to use ollama-client --- ollama-client/api.md | 1040 +++++++++++++++++ ollama-client/build.gradle.kts | 14 + ollama-client/jvm/.gitkeep | 0 .../ollama-client-core/build.gradle.kts | 51 + .../kotlin/com/tddworks/ollama/api/Ollama.kt | 12 + .../tddworks/ollama/api/chat/OllamaChatApi.kt | 8 + .../ollama/api/chat/OllamaChatRequest.kt | 24 + .../ollama/api/chat/OllamaChatResponse.kt | 55 + .../api/chat/internal/DefaultOllamaChatApi.kt | 45 + .../ollama/api/chat/internal/JsonLenient.kt | 23 + .../api/chat/internal/json/OllamaModule.kt | 32 + .../com/tddworks/ollama/api/MockHttpClient.kt | 43 + .../api/internal/DefaultOllamaChatApiITest.kt | 73 ++ .../api/internal/DefaultOllamaChatApiTest.kt | 152 +++ 14 files changed, 1572 insertions(+) create mode 100644 ollama-client/api.md create mode 100644 ollama-client/build.gradle.kts create mode 100644 ollama-client/jvm/.gitkeep create mode 100644 ollama-client/ollama-client-core/build.gradle.kts create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt create mode 100644 ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt create mode 100644 ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt create mode 100644 ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt diff --git a/ollama-client/api.md b/ollama-client/api.md new file mode 100644 index 0000000..aba605f --- /dev/null +++ b/ollama-client/api.md @@ -0,0 +1,1040 @@ +# API + +## Endpoints + +- [Generate a completion](#generate-a-completion) +- [Generate a chat completion](#generate-a-chat-completion) +- [Create a Model](#create-a-model) +- [List Local Models](#list-local-models) +- [Show Model Information](#show-model-information) +- [Copy a Model](#copy-a-model) +- [Delete a Model](#delete-a-model) +- [Pull a Model](#pull-a-model) +- [Push a Model](#push-a-model) +- [Generate Embeddings](#generate-embeddings) + +## Conventions + +### Model names + +Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version. + +### Durations + +All durations are returned in nanoseconds. + +### Streaming responses + +Certain endpoints stream responses as JSON objects and can optional return non-streamed responses. + +## Generate a completion + +```shell +POST /api/generate +``` + +Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request. + +### Parameters + +- `model`: (required) the [model name](#model-names) +- `prompt`: the prompt to generate a response for +- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`) + +Advanced parameters (optional): + +- `format`: the format to return a response in. Currently the only accepted value is `json` +- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` +- `system`: system message to (overrides what is defined in the `Modelfile`) +- `template`: the prompt template to use (overrides what is defined in the `Modelfile`) +- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory +- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects +- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API +- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) + +#### JSON mode + +Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below. + +> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace. + +### Examples + +#### Generate request (Streaming) + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama2", + "prompt": "Why is the sky blue?" +}' +``` + +##### Response + +A stream of JSON objects is returned: + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T08:52:19.385406455-07:00", + "response": "The", + "done": false +} +``` + +The final response in the stream also includes additional data about the generation: + +- `total_duration`: time spent generating the response +- `load_duration`: time spent in nanoseconds loading the model +- `prompt_eval_count`: number of tokens in the prompt +- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt +- `eval_count`: number of tokens the response +- `eval_duration`: time in nanoseconds spent generating the response +- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory +- `response`: empty if the response was streamed, if not streamed, this will contain the full response + +To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`. + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T19:22:45.499127Z", + "response": "", + "done": true, + "context": [1, 2, 3], + "total_duration": 10706818083, + "load_duration": 6338219291, + "prompt_eval_count": 26, + "prompt_eval_duration": 130079000, + "eval_count": 259, + "eval_duration": 4232710000 +} +``` + +#### Request (No streaming) + +##### Request + +A response can be received in one reply when streaming is off. + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama2", + "prompt": "Why is the sky blue?", + "stream": false +}' +``` + +##### Response + +If `stream` is set to `false`, the response will be a single JSON object: + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T19:22:45.499127Z", + "response": "The sky is blue because it is the color of the sky.", + "done": true, + "context": [1, 2, 3], + "total_duration": 5043500667, + "load_duration": 5025959, + "prompt_eval_count": 26, + "prompt_eval_duration": 325953000, + "eval_count": 290, + "eval_duration": 4709213000 +} +``` + +#### Request (JSON mode) + +> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON. + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama2", + "prompt": "What color is the sky at different times of the day? Respond using JSON", + "format": "json", + "stream": false +}' +``` + +##### Response + +```json +{ + "model": "llama2", + "created_at": "2023-11-09T21:07:55.186497Z", + "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n", + "done": true, + "context": [1, 2, 3], + "total_duration": 4648158584, + "load_duration": 4071084, + "prompt_eval_count": 36, + "prompt_eval_duration": 439038000, + "eval_count": 180, + "eval_duration": 4196918000 +} +``` + +The value of `response` will be a string containing JSON similar to: + +```json +{ + "morning": { + "color": "blue" + }, + "noon": { + "color": "blue-gray" + }, + "afternoon": { + "color": "warm gray" + }, + "evening": { + "color": "orange" + } +} +``` + +#### Request (with images) + +To submit images to multimodal models such as `llava` or `bakllava`, provide a list of base64-encoded `images`: + +#### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llava", + "prompt":"What is in this picture?", + "stream": false, + "images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"] +}' +``` + +#### Response + +``` +{ + "model": "llava", + "created_at": "2023-11-03T15:36:02.583064Z", + "response": "A happy cartoon character, which is cute and cheerful.", + "done": true, + "context": [1, 2, 3], + "total_duration": 2938432250, + "load_duration": 2559292, + "prompt_eval_count": 1, + "prompt_eval_duration": 2195557000, + "eval_count": 44, + "eval_duration": 736432000 +} +``` + +#### Request (Raw Mode) + +In some cases, you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable templating. Also note that raw mode will not return a context. + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "mistral", + "prompt": "[INST] why is the sky blue? [/INST]", + "raw": true, + "stream": false +}' +``` + +#### Request (Reproducible outputs) + +For reproducible outputs, set `temperature` to 0 and `seed` to a number: + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "mistral", + "prompt": "Why is the sky blue?", + "options": { + "seed": 123, + "temperature": 0 + } +}' +``` + +##### Response + +```json +{ + "model": "mistral", + "created_at": "2023-11-03T15:36:02.583064Z", + "response": " The sky appears blue because of a phenomenon called Rayleigh scattering.", + "done": true, + "total_duration": 8493852375, + "load_duration": 6589624375, + "prompt_eval_count": 14, + "prompt_eval_duration": 119039000, + "eval_count": 110, + "eval_duration": 1779061000 +} +``` + +#### Generate request (With options) + +If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override. + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama2", + "prompt": "Why is the sky blue?", + "stream": false, + "options": { + "num_keep": 5, + "seed": 42, + "num_predict": 100, + "top_k": 20, + "top_p": 0.9, + "tfs_z": 0.5, + "typical_p": 0.7, + "repeat_last_n": 33, + "temperature": 0.8, + "repeat_penalty": 1.2, + "presence_penalty": 1.5, + "frequency_penalty": 1.0, + "mirostat": 1, + "mirostat_tau": 0.8, + "mirostat_eta": 0.6, + "penalize_newline": true, + "stop": ["\n", "user:"], + "numa": false, + "num_ctx": 1024, + "num_batch": 2, + "num_gqa": 1, + "num_gpu": 1, + "main_gpu": 0, + "low_vram": false, + "f16_kv": true, + "vocab_only": false, + "use_mmap": true, + "use_mlock": false, + "rope_frequency_base": 1.1, + "rope_frequency_scale": 0.8, + "num_thread": 8 + } +}' +``` + +##### Response + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T19:22:45.499127Z", + "response": "The sky is blue because it is the color of the sky.", + "done": true, + "context": [1, 2, 3], + "total_duration": 4935886791, + "load_duration": 534986708, + "prompt_eval_count": 26, + "prompt_eval_duration": 107345000, + "eval_count": 237, + "eval_duration": 4289432000 +} +``` + +#### Load a model + +If an empty prompt is provided, the model will be loaded into memory. + +##### Request + +```shell +curl http://localhost:11434/api/generate -d '{ + "model": "llama2" +}' +``` + +##### Response + +A single JSON object is returned: + +```json +{ + "model": "llama2", + "created_at": "2023-12-18T19:52:07.071755Z", + "response": "", + "done": true +} +``` + +## Generate a chat completion + +```shell +POST /api/chat +``` + +Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using `"stream": false`. The final response object will include statistics and additional data from the request. + +### Parameters + +- `model`: (required) the [model name](#model-names) +- `messages`: the messages of the chat, this can be used to keep a chat memory + +The `message` object has the following fields: + +- `role`: the role of the message, either `system`, `user` or `assistant` +- `content`: the content of the message +- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`) + +Advanced parameters (optional): + +- `format`: the format to return a response in. Currently the only accepted value is `json` +- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` +- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects +- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) + +### Examples + +#### Chat Request (Streaming) + +##### Request + +Send a chat message with a streaming response. + +```shell +curl http://localhost:11434/api/chat -d '{ + "model": "llama2", + "messages": [ + { + "role": "user", + "content": "why is the sky blue?" + } + ] +}' +``` + +##### Response + +A stream of JSON objects is returned: + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T08:52:19.385406455-07:00", + "message": { + "role": "assistant", + "content": "The", + "images": null + }, + "done": false +} +``` + +Final response: + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T19:22:45.499127Z", + "done": true, + "total_duration": 4883583458, + "load_duration": 1334875, + "prompt_eval_count": 26, + "prompt_eval_duration": 342546000, + "eval_count": 282, + "eval_duration": 4535599000 +} +``` + +#### Chat request (No streaming) + +##### Request + +```shell +curl http://localhost:11434/api/chat -d '{ + "model": "llama2", + "messages": [ + { + "role": "user", + "content": "why is the sky blue?" + } + ], + "stream": false +}' +``` + +##### Response + +```json +{ + "model": "registry.ollama.ai/library/llama2:latest", + "created_at": "2023-12-12T14:13:43.416799Z", + "message": { + "role": "assistant", + "content": "Hello! How are you today?" + }, + "done": true, + "total_duration": 5191566416, + "load_duration": 2154458, + "prompt_eval_count": 26, + "prompt_eval_duration": 383809000, + "eval_count": 298, + "eval_duration": 4799921000 +} +``` + +#### Chat request (With History) + +Send a chat message with a conversation history. You can use this same approach to start the conversation using multi-shot or chain-of-thought prompting. + +##### Request + +```shell +curl http://localhost:11434/api/chat -d '{ + "model": "llama2", + "messages": [ + { + "role": "user", + "content": "why is the sky blue?" + }, + { + "role": "assistant", + "content": "due to rayleigh scattering." + }, + { + "role": "user", + "content": "how is that different than mie scattering?" + } + ] +}' +``` + +##### Response + +A stream of JSON objects is returned: + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T08:52:19.385406455-07:00", + "message": { + "role": "assistant", + "content": "The" + }, + "done": false +} +``` + +Final response: + +```json +{ + "model": "llama2", + "created_at": "2023-08-04T19:22:45.499127Z", + "done": true, + "total_duration": 8113331500, + "load_duration": 6396458, + "prompt_eval_count": 61, + "prompt_eval_duration": 398801000, + "eval_count": 468, + "eval_duration": 7701267000 +} +``` + +#### Chat request (with images) + +##### Request + +Send a chat message with a conversation history. + +```shell +curl http://localhost:11434/api/chat -d '{ + "model": "llava", + "messages": [ + { + "role": "user", + "content": "what is in this image?", + "images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"] + } + ] +}' +``` + +##### Response + +```json +{ + "model": "llava", + "created_at": "2023-12-13T22:42:50.203334Z", + "message": { + "role": "assistant", + "content": " The image features a cute, little pig with an angry facial expression. It's wearing a heart on its shirt and is waving in the air. This scene appears to be part of a drawing or sketching project.", + "images": null + }, + "done": true, + "total_duration": 1668506709, + "load_duration": 1986209, + "prompt_eval_count": 26, + "prompt_eval_duration": 359682000, + "eval_count": 83, + "eval_duration": 1303285000 +} +``` + +#### Chat request (Reproducible outputs) + +##### Request + +```shell +curl http://localhost:11434/api/chat -d '{ + "model": "llama2", + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ], + "options": { + "seed": 101, + "temperature": 0 + } +}' +``` + +##### Response + +```json +{ + "model": "registry.ollama.ai/library/llama2:latest", + "created_at": "2023-12-12T14:13:43.416799Z", + "message": { + "role": "assistant", + "content": "Hello! How are you today?" + }, + "done": true, + "total_duration": 5191566416, + "load_duration": 2154458, + "prompt_eval_count": 26, + "prompt_eval_duration": 383809000, + "eval_count": 298, + "eval_duration": 4799921000 +} +``` + +## Create a Model + +```shell +POST /api/create +``` + +Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response. + +### Parameters + +- `name`: name of the model to create +- `modelfile` (optional): contents of the Modelfile +- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects +- `path` (optional): path to the Modelfile + +### Examples + +#### Create a new model + +Create a new model from a `Modelfile`. + +##### Request + +```shell +curl http://localhost:11434/api/create -d '{ + "name": "mario", + "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros." +}' +``` + +##### Response + +A stream of JSON objects. Notice that the final JSON object shows a `"status": "success"`. + +```json +{"status":"reading model metadata"} +{"status":"creating system layer"} +{"status":"using already created layer sha256:22f7f8ef5f4c791c1b03d7eb414399294764d7cc82c7e94aa81a1feb80a983a2"} +{"status":"using already created layer sha256:8c17c2ebb0ea011be9981cc3922db8ca8fa61e828c5d3f44cb6ae342bf80460b"} +{"status":"using already created layer sha256:7c23fb36d80141c4ab8cdbb61ee4790102ebd2bf7aeff414453177d4f2110e5d"} +{"status":"using already created layer sha256:2e0493f67d0c8c9c68a8aeacdf6a38a2151cb3c4c1d42accf296e19810527988"} +{"status":"using already created layer sha256:2759286baa875dc22de5394b4a925701b1896a7e3f8e53275c36f75a877a82c9"} +{"status":"writing layer sha256:df30045fe90f0d750db82a058109cecd6d4de9c90a3d75b19c09e5f64580bb42"} +{"status":"writing layer sha256:f18a68eb09bf925bb1b669490407c1b1251c5db98dc4d3d81f3088498ea55690"} +{"status":"writing manifest"} +{"status":"success"} +``` + +### Check if a Blob Exists + +```shell +HEAD /api/blobs/:digest +``` + +Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai. + +#### Query Parameters + +- `digest`: the SHA256 digest of the blob + +#### Examples + +##### Request + +```shell +curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2 +``` + +##### Response + +Return 200 OK if the blob exists, 404 Not Found if it does not. + +### Create a Blob + +```shell +POST /api/blobs/:digest +``` + +Create a blob from a file on the server. Returns the server file path. + +#### Query Parameters + +- `digest`: the expected SHA256 digest of the file + +#### Examples + +##### Request + +```shell +curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2 +``` + +##### Response + +Return 201 Created if the blob was successfully created, 400 Bad Request if the digest used is not expected. + +## List Local Models + +```shell +GET /api/tags +``` + +List models that are available locally. + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/tags +``` + +#### Response + +A single JSON object will be returned. + +```json +{ + "models": [ + { + "name": "codellama:13b", + "modified_at": "2023-11-04T14:56:49.277302595-07:00", + "size": 7365960935, + "digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697", + "details": { + "format": "gguf", + "family": "llama", + "families": null, + "parameter_size": "13B", + "quantization_level": "Q4_0" + } + }, + { + "name": "llama2:latest", + "modified_at": "2023-12-07T09:32:18.757212583-08:00", + "size": 3825819519, + "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e", + "details": { + "format": "gguf", + "family": "llama", + "families": null, + "parameter_size": "7B", + "quantization_level": "Q4_0" + } + } + ] +} +``` + +## Show Model Information + +```shell +POST /api/show +``` + +Show information about a model including details, modelfile, template, parameters, license, and system prompt. + +### Parameters + +- `name`: name of the model to show + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/show -d '{ + "name": "llama2" +}' +``` + +#### Response + +```json +{ + "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSSISTANT:\"", + "parameters": "num_ctx 4096\nstop \u003c/s\u003e\nstop USER:\nstop ASSSISTANT:", + "template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: ", + "details": { + "format": "gguf", + "family": "llama", + "families": ["llama", "clip"], + "parameter_size": "7B", + "quantization_level": "Q4_0" + } +} +``` + +## Copy a Model + +```shell +POST /api/copy +``` + +Copy a model. Creates a model with another name from an existing model. + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/copy -d '{ + "source": "llama2", + "destination": "llama2-backup" +}' +``` + +#### Response + +Returns a 200 OK if successful, or a 404 Not Found if the source model doesn't exist. + +## Delete a Model + +```shell +DELETE /api/delete +``` + +Delete a model and its data. + +### Parameters + +- `name`: model name to delete + +### Examples + +#### Request + +```shell +curl -X DELETE http://localhost:11434/api/delete -d '{ + "name": "llama2:13b" +}' +``` + +#### Response + +Returns a 200 OK if successful, 404 Not Found if the model to be deleted doesn't exist. + +## Pull a Model + +```shell +POST /api/pull +``` + +Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress. + +### Parameters + +- `name`: name of the model to pull +- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development. +- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/pull -d '{ + "name": "llama2" +}' +``` + +#### Response + +If `stream` is not specified, or set to `true`, a stream of JSON objects is returned: + +The first object is the manifest: + +```json +{ + "status": "pulling manifest" +} +``` + +Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included. The number of files to be downloaded depends on the number of layers specified in the manifest. + +```json +{ + "status": "downloading digestname", + "digest": "digestname", + "total": 2142590208, + "completed": 241970 +} +``` + +After all the files are downloaded, the final responses are: + +```json +{ + "status": "verifying sha256 digest" +} +{ + "status": "writing manifest" +} +{ + "status": "removing any unused layers" +} +{ + "status": "success" +} +``` + +if `stream` is set to false, then the response is a single JSON object: + +```json +{ + "status": "success" +} +``` + +## Push a Model + +```shell +POST /api/push +``` + +Upload a model to a model library. Requires registering for ollama.ai and adding a public key first. + +### Parameters + +- `name`: name of the model to push in the form of `/:` +- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development. +- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/push -d '{ + "name": "mattw/pygmalion:latest" +}' +``` + +#### Response + +If `stream` is not specified, or set to `true`, a stream of JSON objects is returned: + +```json +{ "status": "retrieving manifest" } +``` + +and then: + +```json +{ + "status": "starting upload", + "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", + "total": 1928429856 +} +``` + +Then there is a series of uploading responses: + +```json +{ + "status": "starting upload", + "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab", + "total": 1928429856 +} +``` + +Finally, when the upload is complete: + +```json +{"status":"pushing manifest"} +{"status":"success"} +``` + +If `stream` is set to `false`, then the response is a single JSON object: + +```json +{ "status": "success" } +``` + +## Generate Embeddings + +```shell +POST /api/embeddings +``` + +Generate embeddings from a model + +### Parameters + +- `model`: name of model to generate embeddings from +- `prompt`: text to generate embeddings for + +Advanced parameters: + +- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature` +- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`) + +### Examples + +#### Request + +```shell +curl http://localhost:11434/api/embeddings -d '{ + "model": "all-minilm", + "prompt": "Here is an article about llamas..." +}' +``` + +#### Response + +```json +{ + "embedding": [ + 0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313, + 0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281 + ] +} +``` diff --git a/ollama-client/build.gradle.kts b/ollama-client/build.gradle.kts new file mode 100644 index 0000000..fa51bd7 --- /dev/null +++ b/ollama-client/build.gradle.kts @@ -0,0 +1,14 @@ +plugins { + `maven-publish` +} + +kotlin { + jvm() + sourceSets { + commonMain { + dependencies { + api(projects.ollamaClient.ollamaClientCore) + } + } + } +} \ No newline at end of file diff --git a/ollama-client/jvm/.gitkeep b/ollama-client/jvm/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/ollama-client/ollama-client-core/build.gradle.kts b/ollama-client/ollama-client-core/build.gradle.kts new file mode 100644 index 0000000..ec91f97 --- /dev/null +++ b/ollama-client/ollama-client-core/build.gradle.kts @@ -0,0 +1,51 @@ +plugins { + alias(libs.plugins.kotlinx.serialization) + alias(libs.plugins.kover) + `maven-publish` +} + +kotlin { + jvm() + macosArm64() + macosX64() + + sourceSets { + commonMain.dependencies { + // put your Multiplatform dependencies here + implementation(libs.kotlinx.coroutines.core) + api(libs.kotlinx.serialization.json) + api(libs.bundles.ktor.client) + api(projects.common) + } + + commonTest.dependencies { + implementation(libs.ktor.client.mock) + api(projects.common) + } + + macosMain.dependencies { + api(libs.ktor.client.darwin) + } + + jvmMain.dependencies { + api(libs.ktor.client.cio) + } + + jvmTest.dependencies { + implementation(project.dependencies.platform(libs.junit.bom)) + implementation(libs.bundles.jvm.test) + implementation(libs.kotlinx.coroutines.test) + implementation(libs.koin.test) + implementation(libs.koin.test.junit5) + implementation(libs.app.cash.turbine) + implementation("com.tngtech.archunit:archunit-junit5:1.1.0") + implementation("org.reflections:reflections:0.10.2") + } + } +} + +tasks { + named("jvmTest") { + useJUnitPlatform() + } +} \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt new file mode 100644 index 0000000..08ee94c --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt @@ -0,0 +1,12 @@ +package com.tddworks.ollama.api + +/** + * @author hanrw + * @date 2024/4/14 17:32 + */ +class Ollama { + companion object { + const val BASE_URL = "https://ollama.com" + const val ANTHROPIC_VERSION = "1.0.0" + } +} \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt new file mode 100644 index 0000000..85617f0 --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt @@ -0,0 +1,8 @@ +package com.tddworks.ollama.api.chat + +import kotlinx.coroutines.flow.Flow + +interface OllamaChatApi { + suspend fun stream(request: OllamaChatRequest): Flow + suspend fun request(request: OllamaChatRequest): OllamaChatResponse +} \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt new file mode 100644 index 0000000..5a6acec --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt @@ -0,0 +1,24 @@ +package com.tddworks.ollama.api.chat + +import com.tddworks.common.network.api.StreamableRequest +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + + +@Serializable +data class OllamaChatRequest( + @SerialName("model") val model: String, + @SerialName("messages") val messages: List, + @SerialName("format") val format: String? = null, +// @SerialName("options") val options: Map? = null, +// @SerialName("stream") val stream: Boolean? = null, + @SerialName("keep_alive") val keepAlive: String? = null, +) : StreamableRequest + + +@Serializable +data class OllamaChatMessage( + @SerialName("role") val role: String, + @SerialName("content") val content: String, + @SerialName("images") val images: List? = null, +) \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt new file mode 100644 index 0000000..c28c56a --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt @@ -0,0 +1,55 @@ +package com.tddworks.ollama.api.chat + +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * { + * "model": "llama2", + * "created_at": "2023-08-04T08:52:19.385406455-07:00", + * "message": { + * "role": "assistant", + * "content": "The" + * }, + * "done": false + * } + */ +@Serializable +data class OllamaChatResponse( + @SerialName("model") val model: String, + @SerialName("created_at") val createdAt: String, + @SerialName("message") val message: OllamaChatMessage? = null, + @SerialName("done") val done: Boolean?, + @SerialName("total_duration") val totalDuration: Long? = null, + @SerialName("load_duration") val loadDuration: Long? = null, + @SerialName("prompt_eval_count") val promptEvalCount: Int? = null, + @SerialName("prompt_eval_duration") val promptEvalDuration: Long? = null, + @SerialName("eval_count") val evalCount: Int? = null, + @SerialName("eval_duration") val evalDuration: Long? = null, +) + +/** + * { + * "model": "llama2", + * "created_at": "2023-08-04T19:22:45.499127Z", + * "done": true, + * "total_duration": 8113331500, + * "load_duration": 6396458, + * "prompt_eval_count": 61, + * "prompt_eval_duration": 398801000, + * "eval_count": 468, + * "eval_duration": 7701267000 + * } + */ +@Serializable +data class FinalOllamaChatResponse( + @SerialName("model") val model: String, + @SerialName("created_at") val createdAt: String, + @SerialName("done") val done: Boolean?, + @SerialName("total_duration") val totalDuration: Long?, + @SerialName("load_duration") val loadDuration: Long?, + @SerialName("prompt_eval_count") val promptEvalCount: Int?, + @SerialName("prompt_eval_duration") val promptEvalDuration: Long?, + @SerialName("eval_count") val evalCount: Int?, + @SerialName("eval_duration") val evalDuration: Long?, +) \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt new file mode 100644 index 0000000..13d6b8a --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt @@ -0,0 +1,45 @@ +package com.tddworks.ollama.api.chat.internal + +import com.tddworks.common.network.api.ktor.api.HttpRequester +import com.tddworks.common.network.api.ktor.api.performRequest +import com.tddworks.common.network.api.ktor.api.streamRequest +import com.tddworks.ollama.api.chat.OllamaChatApi +import com.tddworks.ollama.api.chat.OllamaChatRequest +import com.tddworks.ollama.api.chat.OllamaChatResponse +import io.ktor.client.request.* +import io.ktor.http.* +import kotlinx.coroutines.flow.Flow +import kotlinx.serialization.json.Json + +class DefaultOllamaChatApi( + private val requester: HttpRequester, + private val jsonLenient: Json = JsonLenient, +) : OllamaChatApi { + override suspend fun stream(request: OllamaChatRequest): Flow { + return requester.streamRequest { + method = HttpMethod.Post + url(path = CHAT_API_PATH) + setBody(request.asStreamRequest(jsonLenient)) + contentType(ContentType.Application.Json) + accept(ContentType.Text.EventStream) + headers { + append(HttpHeaders.CacheControl, "no-cache") + append(HttpHeaders.Connection, "keep-alive") + } + } + } + + override suspend fun request(request: OllamaChatRequest): OllamaChatResponse { + return requester.performRequest { + method = HttpMethod.Post + url(path = CHAT_API_PATH) + setBody(request) + contentType(ContentType.Application.Json) + } + } + + companion object { + const val CHAT_API_PATH = "/api/chat" + } +} + diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt new file mode 100644 index 0000000..cef7574 --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt @@ -0,0 +1,23 @@ +package com.tddworks.ollama.api.chat.internal + +import com.tddworks.ollama.api.chat.internal.json.ollamaModule +import kotlinx.serialization.json.Json + + +/** + * Represents a JSON object that allows for leniency and ignores unknown keys. + * + * @property isLenient Removes JSON specification restriction (RFC-4627) and makes parser more liberal to the malformed input. In lenient mode quoted boolean literals, and unquoted string literals are allowed. + * Its relaxations can be expanded in the future, so that lenient parser becomes even more permissive to invalid value in the input, replacing them with defaults. + * false by default. + * @property ignoreUnknownKeys Specifies whether encounters of unknown properties in the input JSON should be ignored instead of throwing SerializationException. false by default.. + */ +val JsonLenient = Json { + isLenient = true + ignoreUnknownKeys = true + // https://github.com/Kotlin/kotlinx.serialization/blob/master/docs/json.md#class-discriminator-for-polymorphism + classDiscriminator = "#class" + serializersModule = ollamaModule + encodeDefaults = true + explicitNulls = false +} diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt new file mode 100644 index 0000000..4b606a8 --- /dev/null +++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt @@ -0,0 +1,32 @@ +package com.tddworks.ollama.api.chat.internal.json + +import com.tddworks.common.network.api.StreamableRequest +import com.tddworks.ollama.api.chat.OllamaChatRequest +import kotlinx.serialization.modules.SerializersModule +import kotlinx.serialization.modules.polymorphic + +/** + * The `SerializersModule` that defines the serialization and deserialization + * rules for the `StreamableRequest` class and its subclasses. + */ +val ollamaModule = SerializersModule { + /** + * Registers a polymorphic serialization/deserialization for the + * `StreamableRequest` class. + */ + polymorphic(StreamableRequest::class) { + /** + * Registers a subclass serializer for the `OllamaChatRequest` class. + * + * @param OllamaChatRequest.serializer() The serializer for the `OllamaChatRequest` class. + */ + subclass(OllamaChatRequest::class, OllamaChatRequest.serializer()) + /** + * Registers a default deserializer for the `StreamableRequest` class. + * + * @param { OllamaChatRequest.serializer() } The deserializer for the `StreamableRequest` class. + */ + defaultDeserializer { OllamaChatRequest.serializer() } + } +} + diff --git a/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt new file mode 100644 index 0000000..677a17d --- /dev/null +++ b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt @@ -0,0 +1,43 @@ +package com.tddworks.ollama.api + + +import com.tddworks.common.network.api.ktor.internal.JsonLenient +import io.ktor.client.* +import io.ktor.client.engine.mock.* +import io.ktor.client.plugins.* +import io.ktor.client.plugins.contentnegotiation.* +import io.ktor.client.request.* +import io.ktor.http.* +import io.ktor.serialization.kotlinx.* + +/** + * See https://ktor.io/docs/http-client-testing.html#usage + */ +fun mockHttpClient(mockResponse: String) = HttpClient(MockEngine) { + + val headers = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString())) + + install(ContentNegotiation) { + register(ContentType.Application.Json, KotlinxSerializationConverter(JsonLenient)) + } + + engine { + addHandler { request -> + if (request.url.encodedPath == "/api/chat") { + respond(mockResponse, HttpStatusCode.OK, headers) + } else { + error("Unhandled ${request.url.encodedPath}") + } + } + } + + defaultRequest { + url { + protocol = URLProtocol.HTTPS + host = "api.lemonsqueezy.com" + } + + header(HttpHeaders.ContentType, ContentType.Application.Json) + contentType(ContentType.Application.Json) + } +} \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt new file mode 100644 index 0000000..f855832 --- /dev/null +++ b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt @@ -0,0 +1,73 @@ +package com.tddworks.ollama.api.internal + +import com.tddworks.common.network.api.ktor.internal.DefaultHttpRequester +import com.tddworks.common.network.api.ktor.internal.createHttpClient +import com.tddworks.di.initKoin +import com.tddworks.ollama.api.Ollama +import com.tddworks.ollama.api.chat.OllamaChatMessage +import com.tddworks.ollama.api.chat.OllamaChatRequest +import com.tddworks.ollama.api.chat.internal.DefaultOllamaChatApi +import com.tddworks.ollama.api.chat.internal.JsonLenient +import kotlinx.coroutines.test.runTest +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.koin.test.junit5.AutoCloseKoinTest + +class DefaultOllamaChatApiITest : AutoCloseKoinTest() { + + @BeforeEach + fun setUp() { + initKoin() + } + + + @Test + fun `should return correct base url`() { + assertEquals("api.anthropic.com", Ollama.BASE_URL) + } + + + @Test + fun `should return stream response`() = runTest { + val ollamaChatApi = DefaultOllamaChatApi( + requester = DefaultHttpRequester( + createHttpClient( + url = { "localhost" }, + json = JsonLenient, + ) + ) + ) + + ollamaChatApi.stream( + OllamaChatRequest( + model = "llama2", + messages = listOf( + OllamaChatMessage( + role = "user", + content = "hello" + ) + ) + ) + ).collect { + println("stream response: $it") + } + } + +// @Test +// fun `should return create response`() = runTest { +// //Client request(POST https://klaude.asusual.life/v1/messages) invalid: 401 Unauthorized. Text: "{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}" +// //Client request(POST https://klaude.asusual.life/v1/messages) invalid: 400 Bad Request. Text: "{"type":"error","error":{"type":"invalid_request_error","message":"anthropic-version: header is required"}}" +// val anthropic = getInstance() +// +// val r = anthropic.create( +// CreateMessageRequest( +// messages = listOf(Message.user("hello")), +// maxTokens = 1024, +// model = Model.CLAUDE_3_HAIKU +// ) +// ) +// +// assertNotNull(r.content[0].text) +// } +} \ No newline at end of file diff --git a/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt new file mode 100644 index 0000000..e6b43e4 --- /dev/null +++ b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt @@ -0,0 +1,152 @@ +import com.tddworks.common.network.api.ktor.internal.DefaultHttpRequester +import com.tddworks.ollama.api.chat.OllamaChatMessage +import com.tddworks.ollama.api.chat.OllamaChatRequest +import com.tddworks.ollama.api.chat.OllamaChatResponse +import com.tddworks.ollama.api.chat.internal.DefaultOllamaChatApi +import com.tddworks.ollama.api.chat.internal.JsonLenient +import com.tddworks.ollama.api.mockHttpClient +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.toList +import kotlinx.coroutines.test.* +import kotlinx.serialization.json.Json +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.extension.AfterEachCallback +import org.junit.jupiter.api.extension.BeforeEachCallback +import org.junit.jupiter.api.extension.ExtensionContext +import org.junit.jupiter.api.extension.RegisterExtension +import org.koin.dsl.module +import org.koin.test.KoinTest +import org.koin.test.junit5.KoinTestExtension + +class TestKoinCoroutineExtension(private val testDispatcher: TestDispatcher = StandardTestDispatcher()) : + BeforeEachCallback, AfterEachCallback { + override fun beforeEach(context: ExtensionContext?) { + Dispatchers.setMain(testDispatcher) + } + + override fun afterEach(context: ExtensionContext?) { + Dispatchers.resetMain() + } +} + +class DefaultOllamaChatApiTest : KoinTest { + @JvmField + @RegisterExtension + // This extension is used to set the main dispatcher to a test dispatcher + // launch coroutine eagerly + // same scheduling behavior as would have in a real app/production + val testKoinCoroutineExtension = TestKoinCoroutineExtension(UnconfinedTestDispatcher()) + + // for kotlin/com/tddworks/common/network/api/ktor/api/Stream.kt required + // fun json(): Json { + // return getInstance() + // } + @JvmField + @RegisterExtension + val koinTestExtension = KoinTestExtension.create { + modules( + module { + single { JsonLenient } + }) + } + + @Test + fun `should return stream of JSON response`() = runTest { + // Given + val request = OllamaChatRequest( + model = "llama2", + messages = listOf( + OllamaChatMessage( + role = "user", + content = "why is the sky blue?" + ) + ) + ) + + val api = DefaultOllamaChatApi( + DefaultHttpRequester( + httpClient = mockHttpClient("data: { \"model\": \"llama2\", \"created_at\": \"2023-08-04T08:52:19.385406455-07:00\", \"message\": { \"role\": \"assistant\", \"content\": \"The\", \"images\": null }, \"done\": false }") + ) + ) + + // When + val responses = api.stream(request).toList() + + // Then + assertEquals( + listOf( + OllamaChatResponse( + model = "llama2", + createdAt = "2023-08-04T08:52:19.385406455-07:00", + message = OllamaChatMessage( + role = "assistant", + content = "The" + ), + done = false + ) + ), responses + ) + } + + @Test + fun `should return single JSON response`() = runTest { + // Given + val request = OllamaChatRequest( + model = "llama2", + messages = listOf( + OllamaChatMessage( + role = "user", + content = "why is the sky blue?" + ) + ) + ) + + val api = DefaultOllamaChatApi( + DefaultHttpRequester( + httpClient = mockHttpClient( + """ + { + "model": "llama2", + "created_at": "2023-12-12T14:13:43.416799Z", + "message": { + "role": "assistant", + "content": "Hello! How are you today?" + }, + "done": true, + "total_duration": 5191566416, + "load_duration": 2154458, + "prompt_eval_count": 26, + "prompt_eval_duration": 383809000, + "eval_count": 298, + "eval_duration": 4799921000 + } + """.trimIndent() + ) + ) + ) + + // When + val response = api.request(request) + + // Then + assertEquals( + OllamaChatResponse( + model = "llama2", + createdAt = "2023-12-12T14:13:43.416799Z", + message = OllamaChatMessage( + role = "assistant", + content = "Hello! How are you today?" + ), + done = true, + totalDuration = 5191566416, + loadDuration = 2154458, + promptEvalCount = 26, + promptEvalDuration = 383809000, + evalCount = 298, + evalDuration = 4799921000 + ), response + ) + } +} +