From 5276e8b4a83da97e07e64d99eb97892c40519e3f Mon Sep 17 00:00:00 2001
From: slam <slamhan1987@gmail.com>
Date: Mon, 15 Apr 2024 18:55:36 +0800
Subject: [PATCH] feat(BE-190): As a user, i want able to use ollama-client

---
 ollama-client/api.md                          | 1040 +++++++++++++++++
 ollama-client/build.gradle.kts                |   14 +
 ollama-client/jvm/.gitkeep                    |    0
 .../ollama-client-core/build.gradle.kts       |   51 +
 .../kotlin/com/tddworks/ollama/api/Ollama.kt  |   12 +
 .../tddworks/ollama/api/chat/OllamaChatApi.kt |    8 +
 .../ollama/api/chat/OllamaChatRequest.kt      |   24 +
 .../ollama/api/chat/OllamaChatResponse.kt     |   55 +
 .../api/chat/internal/DefaultOllamaChatApi.kt |   45 +
 .../ollama/api/chat/internal/JsonLenient.kt   |   23 +
 .../api/chat/internal/json/OllamaModule.kt    |   32 +
 .../com/tddworks/ollama/api/MockHttpClient.kt |   43 +
 .../api/internal/DefaultOllamaChatApiITest.kt |   73 ++
 .../api/internal/DefaultOllamaChatApiTest.kt  |  152 +++
 14 files changed, 1572 insertions(+)
 create mode 100644 ollama-client/api.md
 create mode 100644 ollama-client/build.gradle.kts
 create mode 100644 ollama-client/jvm/.gitkeep
 create mode 100644 ollama-client/ollama-client-core/build.gradle.kts
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt
 create mode 100644 ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt
 create mode 100644 ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt
 create mode 100644 ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt
 create mode 100644 ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt

diff --git a/ollama-client/api.md b/ollama-client/api.md
new file mode 100644
index 0000000..aba605f
--- /dev/null
+++ b/ollama-client/api.md
@@ -0,0 +1,1040 @@
+# API
+
+## Endpoints
+
+- [Generate a completion](#generate-a-completion)
+- [Generate a chat completion](#generate-a-chat-completion)
+- [Create a Model](#create-a-model)
+- [List Local Models](#list-local-models)
+- [Show Model Information](#show-model-information)
+- [Copy a Model](#copy-a-model)
+- [Delete a Model](#delete-a-model)
+- [Pull a Model](#pull-a-model)
+- [Push a Model](#push-a-model)
+- [Generate Embeddings](#generate-embeddings)
+
+## Conventions
+
+### Model names
+
+Model names follow a `model:tag` format, where `model` can have an optional namespace such as `example/model`. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
+
+### Durations
+
+All durations are returned in nanoseconds.
+
+### Streaming responses
+
+Certain endpoints stream responses as JSON objects and can optional return non-streamed responses.
+
+## Generate a completion
+
+```shell
+POST /api/generate
+```
+
+Generate a response for a given prompt with a provided model. This is a streaming endpoint, so there will be a series of responses. The final response object will include statistics and additional data from the request.
+
+### Parameters
+
+- `model`: (required) the [model name](#model-names)
+- `prompt`: the prompt to generate a response for
+- `images`: (optional) a list of base64-encoded images (for multimodal models such as `llava`)
+
+Advanced parameters (optional):
+
+- `format`: the format to return a response in. Currently the only accepted value is `json`
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `system`: system message to (overrides what is defined in the `Modelfile`)
+- `template`: the prompt template to use (overrides what is defined in the `Modelfile`)
+- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
+- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
+- `raw`: if `true` no formatting will be applied to the prompt. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API
+- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
+
+#### JSON mode
+
+Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as a valid JSON object. See the JSON mode [example](#request-json-mode) below.
+
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
+
+### Examples
+
+#### Generate request (Streaming)
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
+  "prompt": "Why is the sky blue?"
+}'
+```
+
+##### Response
+
+A stream of JSON objects is returned:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T08:52:19.385406455-07:00",
+  "response": "The",
+  "done": false
+}
+```
+
+The final response in the stream also includes additional data about the generation:
+
+- `total_duration`: time spent generating the response
+- `load_duration`: time spent in nanoseconds loading the model
+- `prompt_eval_count`: number of tokens in the prompt
+- `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
+- `eval_count`: number of tokens the response
+- `eval_duration`: time in nanoseconds spent generating the response
+- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
+- `response`: empty if the response was streamed, if not streamed, this will contain the full response
+
+To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "",
+  "done": true,
+  "context": [1, 2, 3],
+  "total_duration": 10706818083,
+  "load_duration": 6338219291,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 130079000,
+  "eval_count": 259,
+  "eval_duration": 4232710000
+}
+```
+
+#### Request (No streaming)
+
+##### Request
+
+A response can be received in one reply when streaming is off.
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
+  "prompt": "Why is the sky blue?",
+  "stream": false
+}'
+```
+
+##### Response
+
+If `stream` is set to `false`, the response will be a single JSON object:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "The sky is blue because it is the color of the sky.",
+  "done": true,
+  "context": [1, 2, 3],
+  "total_duration": 5043500667,
+  "load_duration": 5025959,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 325953000,
+  "eval_count": 290,
+  "eval_duration": 4709213000
+}
+```
+
+#### Request (JSON mode)
+
+> When `format` is set to `json`, the output will always be a well-formed JSON object. It's important to also instruct the model to respond in JSON.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
+  "prompt": "What color is the sky at different times of the day? Respond using JSON",
+  "format": "json",
+  "stream": false
+}'
+```
+
+##### Response
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-11-09T21:07:55.186497Z",
+  "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
+  "done": true,
+  "context": [1, 2, 3],
+  "total_duration": 4648158584,
+  "load_duration": 4071084,
+  "prompt_eval_count": 36,
+  "prompt_eval_duration": 439038000,
+  "eval_count": 180,
+  "eval_duration": 4196918000
+}
+```
+
+The value of `response` will be a string containing JSON similar to:
+
+```json
+{
+  "morning": {
+    "color": "blue"
+  },
+  "noon": {
+    "color": "blue-gray"
+  },
+  "afternoon": {
+    "color": "warm gray"
+  },
+  "evening": {
+    "color": "orange"
+  }
+}
+```
+
+#### Request (with images)
+
+To submit images to multimodal models such as `llava` or `bakllava`, provide a list of base64-encoded `images`:
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llava",
+  "prompt":"What is in this picture?",
+  "stream": false,
+  "images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"]
+}'
+```
+
+#### Response
+
+```
+{
+  "model": "llava",
+  "created_at": "2023-11-03T15:36:02.583064Z",
+  "response": "A happy cartoon character, which is cute and cheerful.",
+  "done": true,
+  "context": [1, 2, 3],
+  "total_duration": 2938432250,
+  "load_duration": 2559292,
+  "prompt_eval_count": 1,
+  "prompt_eval_duration": 2195557000,
+  "eval_count": 44,
+  "eval_duration": 736432000
+}
+```
+
+#### Request (Raw Mode)
+
+In some cases, you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable templating. Also note that raw mode will not return a context.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "mistral",
+  "prompt": "[INST] why is the sky blue? [/INST]",
+  "raw": true,
+  "stream": false
+}'
+```
+
+#### Request (Reproducible outputs)
+
+For reproducible outputs, set `temperature` to 0 and `seed` to a number:
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "mistral",
+  "prompt": "Why is the sky blue?",
+  "options": {
+    "seed": 123,
+    "temperature": 0
+  }
+}'
+```
+
+##### Response
+
+```json
+{
+  "model": "mistral",
+  "created_at": "2023-11-03T15:36:02.583064Z",
+  "response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
+  "done": true,
+  "total_duration": 8493852375,
+  "load_duration": 6589624375,
+  "prompt_eval_count": 14,
+  "prompt_eval_duration": 119039000,
+  "eval_count": 110,
+  "eval_duration": 1779061000
+}
+```
+
+#### Generate request (With options)
+
+If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
+  "prompt": "Why is the sky blue?",
+  "stream": false,
+  "options": {
+    "num_keep": 5,
+    "seed": 42,
+    "num_predict": 100,
+    "top_k": 20,
+    "top_p": 0.9,
+    "tfs_z": 0.5,
+    "typical_p": 0.7,
+    "repeat_last_n": 33,
+    "temperature": 0.8,
+    "repeat_penalty": 1.2,
+    "presence_penalty": 1.5,
+    "frequency_penalty": 1.0,
+    "mirostat": 1,
+    "mirostat_tau": 0.8,
+    "mirostat_eta": 0.6,
+    "penalize_newline": true,
+    "stop": ["\n", "user:"],
+    "numa": false,
+    "num_ctx": 1024,
+    "num_batch": 2,
+    "num_gqa": 1,
+    "num_gpu": 1,
+    "main_gpu": 0,
+    "low_vram": false,
+    "f16_kv": true,
+    "vocab_only": false,
+    "use_mmap": true,
+    "use_mlock": false,
+    "rope_frequency_base": 1.1,
+    "rope_frequency_scale": 0.8,
+    "num_thread": 8
+  }
+}'
+```
+
+##### Response
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "response": "The sky is blue because it is the color of the sky.",
+  "done": true,
+  "context": [1, 2, 3],
+  "total_duration": 4935886791,
+  "load_duration": 534986708,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 107345000,
+  "eval_count": 237,
+  "eval_duration": 4289432000
+}
+```
+
+#### Load a model
+
+If an empty prompt is provided, the model will be loaded into memory.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2"
+}'
+```
+
+##### Response
+
+A single JSON object is returned:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-12-18T19:52:07.071755Z",
+  "response": "",
+  "done": true
+}
+```
+
+## Generate a chat completion
+
+```shell
+POST /api/chat
+```
+
+Generate the next message in a chat with a provided model. This is a streaming endpoint, so there will be a series of responses. Streaming can be disabled using `"stream": false`. The final response object will include statistics and additional data from the request.
+
+### Parameters
+
+- `model`: (required) the [model name](#model-names)
+- `messages`: the messages of the chat, this can be used to keep a chat memory
+
+The `message` object has the following fields:
+
+- `role`: the role of the message, either `system`, `user` or `assistant`
+- `content`: the content of the message
+- `images` (optional): a list of images to include in the message (for multimodal models such as `llava`)
+
+Advanced parameters (optional):
+
+- `format`: the format to return a response in. Currently the only accepted value is `json`
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
+- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
+
+### Examples
+
+#### Chat Request (Streaming)
+
+##### Request
+
+Send a chat message with a streaming response.
+
+```shell
+curl http://localhost:11434/api/chat -d '{
+  "model": "llama2",
+  "messages": [
+    {
+      "role": "user",
+      "content": "why is the sky blue?"
+    }
+  ]
+}'
+```
+
+##### Response
+
+A stream of JSON objects is returned:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T08:52:19.385406455-07:00",
+  "message": {
+    "role": "assistant",
+    "content": "The",
+    "images": null
+  },
+  "done": false
+}
+```
+
+Final response:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "done": true,
+  "total_duration": 4883583458,
+  "load_duration": 1334875,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 342546000,
+  "eval_count": 282,
+  "eval_duration": 4535599000
+}
+```
+
+#### Chat request (No streaming)
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/chat -d '{
+  "model": "llama2",
+  "messages": [
+    {
+      "role": "user",
+      "content": "why is the sky blue?"
+    }
+  ],
+  "stream": false
+}'
+```
+
+##### Response
+
+```json
+{
+  "model": "registry.ollama.ai/library/llama2:latest",
+  "created_at": "2023-12-12T14:13:43.416799Z",
+  "message": {
+    "role": "assistant",
+    "content": "Hello! How are you today?"
+  },
+  "done": true,
+  "total_duration": 5191566416,
+  "load_duration": 2154458,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 383809000,
+  "eval_count": 298,
+  "eval_duration": 4799921000
+}
+```
+
+#### Chat request (With History)
+
+Send a chat message with a conversation history. You can use this same approach to start the conversation using multi-shot or chain-of-thought prompting.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/chat -d '{
+  "model": "llama2",
+  "messages": [
+    {
+      "role": "user",
+      "content": "why is the sky blue?"
+    },
+    {
+      "role": "assistant",
+      "content": "due to rayleigh scattering."
+    },
+    {
+      "role": "user",
+      "content": "how is that different than mie scattering?"
+    }
+  ]
+}'
+```
+
+##### Response
+
+A stream of JSON objects is returned:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T08:52:19.385406455-07:00",
+  "message": {
+    "role": "assistant",
+    "content": "The"
+  },
+  "done": false
+}
+```
+
+Final response:
+
+```json
+{
+  "model": "llama2",
+  "created_at": "2023-08-04T19:22:45.499127Z",
+  "done": true,
+  "total_duration": 8113331500,
+  "load_duration": 6396458,
+  "prompt_eval_count": 61,
+  "prompt_eval_duration": 398801000,
+  "eval_count": 468,
+  "eval_duration": 7701267000
+}
+```
+
+#### Chat request (with images)
+
+##### Request
+
+Send a chat message with a conversation history.
+
+```shell
+curl http://localhost:11434/api/chat -d '{
+  "model": "llava",
+  "messages": [
+    {
+      "role": "user",
+      "content": "what is in this image?",
+      "images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"]
+    }
+  ]
+}'
+```
+
+##### Response
+
+```json
+{
+  "model": "llava",
+  "created_at": "2023-12-13T22:42:50.203334Z",
+  "message": {
+    "role": "assistant",
+    "content": " The image features a cute, little pig with an angry facial expression. It's wearing a heart on its shirt and is waving in the air. This scene appears to be part of a drawing or sketching project.",
+    "images": null
+  },
+  "done": true,
+  "total_duration": 1668506709,
+  "load_duration": 1986209,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 359682000,
+  "eval_count": 83,
+  "eval_duration": 1303285000
+}
+```
+
+#### Chat request (Reproducible outputs)
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/chat -d '{
+  "model": "llama2",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello!"
+    }
+  ],
+  "options": {
+    "seed": 101,
+    "temperature": 0
+  }
+}'
+```
+
+##### Response
+
+```json
+{
+  "model": "registry.ollama.ai/library/llama2:latest",
+  "created_at": "2023-12-12T14:13:43.416799Z",
+  "message": {
+    "role": "assistant",
+    "content": "Hello! How are you today?"
+  },
+  "done": true,
+  "total_duration": 5191566416,
+  "load_duration": 2154458,
+  "prompt_eval_count": 26,
+  "prompt_eval_duration": 383809000,
+  "eval_count": 298,
+  "eval_duration": 4799921000
+}
+```
+
+## Create a Model
+
+```shell
+POST /api/create
+```
+
+Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation must also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response.
+
+### Parameters
+
+- `name`: name of the model to create
+- `modelfile` (optional): contents of the Modelfile
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
+- `path` (optional): path to the Modelfile
+
+### Examples
+
+#### Create a new model
+
+Create a new model from a `Modelfile`.
+
+##### Request
+
+```shell
+curl http://localhost:11434/api/create -d '{
+  "name": "mario",
+  "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
+}'
+```
+
+##### Response
+
+A stream of JSON objects. Notice that the final JSON object shows a `"status": "success"`.
+
+```json
+{"status":"reading model metadata"}
+{"status":"creating system layer"}
+{"status":"using already created layer sha256:22f7f8ef5f4c791c1b03d7eb414399294764d7cc82c7e94aa81a1feb80a983a2"}
+{"status":"using already created layer sha256:8c17c2ebb0ea011be9981cc3922db8ca8fa61e828c5d3f44cb6ae342bf80460b"}
+{"status":"using already created layer sha256:7c23fb36d80141c4ab8cdbb61ee4790102ebd2bf7aeff414453177d4f2110e5d"}
+{"status":"using already created layer sha256:2e0493f67d0c8c9c68a8aeacdf6a38a2151cb3c4c1d42accf296e19810527988"}
+{"status":"using already created layer sha256:2759286baa875dc22de5394b4a925701b1896a7e3f8e53275c36f75a877a82c9"}
+{"status":"writing layer sha256:df30045fe90f0d750db82a058109cecd6d4de9c90a3d75b19c09e5f64580bb42"}
+{"status":"writing layer sha256:f18a68eb09bf925bb1b669490407c1b1251c5db98dc4d3d81f3088498ea55690"}
+{"status":"writing manifest"}
+{"status":"success"}
+```
+
+### Check if a Blob Exists
+
+```shell
+HEAD /api/blobs/:digest
+```
+
+Ensures that the file blob used for a FROM or ADAPTER field exists on the server. This is checking your Ollama server and not Ollama.ai.
+
+#### Query Parameters
+
+- `digest`: the SHA256 digest of the blob
+
+#### Examples
+
+##### Request
+
+```shell
+curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
+```
+
+##### Response
+
+Return 200 OK if the blob exists, 404 Not Found if it does not.
+
+### Create a Blob
+
+```shell
+POST /api/blobs/:digest
+```
+
+Create a blob from a file on the server. Returns the server file path.
+
+#### Query Parameters
+
+- `digest`: the expected SHA256 digest of the file
+
+#### Examples
+
+##### Request
+
+```shell
+curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
+```
+
+##### Response
+
+Return 201 Created if the blob was successfully created, 400 Bad Request if the digest used is not expected.
+
+## List Local Models
+
+```shell
+GET /api/tags
+```
+
+List models that are available locally.
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/tags
+```
+
+#### Response
+
+A single JSON object will be returned.
+
+```json
+{
+  "models": [
+    {
+      "name": "codellama:13b",
+      "modified_at": "2023-11-04T14:56:49.277302595-07:00",
+      "size": 7365960935,
+      "digest": "9f438cb9cd581fc025612d27f7c1a6669ff83a8bb0ed86c94fcf4c5440555697",
+      "details": {
+        "format": "gguf",
+        "family": "llama",
+        "families": null,
+        "parameter_size": "13B",
+        "quantization_level": "Q4_0"
+      }
+    },
+    {
+      "name": "llama2:latest",
+      "modified_at": "2023-12-07T09:32:18.757212583-08:00",
+      "size": 3825819519,
+      "digest": "fe938a131f40e6f6d40083c9f0f430a515233eb2edaa6d72eb85c50d64f2300e",
+      "details": {
+        "format": "gguf",
+        "family": "llama",
+        "families": null,
+        "parameter_size": "7B",
+        "quantization_level": "Q4_0"
+      }
+    }
+  ]
+}
+```
+
+## Show Model Information
+
+```shell
+POST /api/show
+```
+
+Show information about a model including details, modelfile, template, parameters, license, and system prompt.
+
+### Parameters
+
+- `name`: name of the model to show
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/show -d '{
+  "name": "llama2"
+}'
+```
+
+#### Response
+
+```json
+{
+  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSSISTANT:\"",
+  "parameters": "num_ctx                        4096\nstop                           \u003c/s\u003e\nstop                           USER:\nstop                           ASSSISTANT:",
+  "template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: ",
+  "details": {
+    "format": "gguf",
+    "family": "llama",
+    "families": ["llama", "clip"],
+    "parameter_size": "7B",
+    "quantization_level": "Q4_0"
+  }
+}
+```
+
+## Copy a Model
+
+```shell
+POST /api/copy
+```
+
+Copy a model. Creates a model with another name from an existing model.
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/copy -d '{
+  "source": "llama2",
+  "destination": "llama2-backup"
+}'
+```
+
+#### Response
+
+Returns a 200 OK if successful, or a 404 Not Found if the source model doesn't exist.
+
+## Delete a Model
+
+```shell
+DELETE /api/delete
+```
+
+Delete a model and its data.
+
+### Parameters
+
+- `name`: model name to delete
+
+### Examples
+
+#### Request
+
+```shell
+curl -X DELETE http://localhost:11434/api/delete -d '{
+  "name": "llama2:13b"
+}'
+```
+
+#### Response
+
+Returns a 200 OK if successful, 404 Not Found if the model to be deleted doesn't exist.
+
+## Pull a Model
+
+```shell
+POST /api/pull
+```
+
+Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.
+
+### Parameters
+
+- `name`: name of the model to pull
+- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/pull -d '{
+  "name": "llama2"
+}'
+```
+
+#### Response
+
+If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:
+
+The first object is the manifest:
+
+```json
+{
+  "status": "pulling manifest"
+}
+```
+
+Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included. The number of files to be downloaded depends on the number of layers specified in the manifest.
+
+```json
+{
+  "status": "downloading digestname",
+  "digest": "digestname",
+  "total": 2142590208,
+  "completed": 241970
+}
+```
+
+After all the files are downloaded, the final responses are:
+
+```json
+{
+    "status": "verifying sha256 digest"
+}
+{
+    "status": "writing manifest"
+}
+{
+    "status": "removing any unused layers"
+}
+{
+    "status": "success"
+}
+```
+
+if `stream` is set to false, then the response is a single JSON object:
+
+```json
+{
+  "status": "success"
+}
+```
+
+## Push a Model
+
+```shell
+POST /api/push
+```
+
+Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.
+
+### Parameters
+
+- `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
+- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
+- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/push -d '{
+  "name": "mattw/pygmalion:latest"
+}'
+```
+
+#### Response
+
+If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:
+
+```json
+{ "status": "retrieving manifest" }
+```
+
+and then:
+
+```json
+{
+  "status": "starting upload",
+  "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
+  "total": 1928429856
+}
+```
+
+Then there is a series of uploading responses:
+
+```json
+{
+  "status": "starting upload",
+  "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
+  "total": 1928429856
+}
+```
+
+Finally, when the upload is complete:
+
+```json
+{"status":"pushing manifest"}
+{"status":"success"}
+```
+
+If `stream` is set to `false`, then the response is a single JSON object:
+
+```json
+{ "status": "success" }
+```
+
+## Generate Embeddings
+
+```shell
+POST /api/embeddings
+```
+
+Generate embeddings from a model
+
+### Parameters
+
+- `model`: name of model to generate embeddings from
+- `prompt`: text to generate embeddings for
+
+Advanced parameters:
+
+- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
+- `keep_alive`: controls how long the model will stay loaded into memory following the request (default: `5m`)
+
+### Examples
+
+#### Request
+
+```shell
+curl http://localhost:11434/api/embeddings -d '{
+  "model": "all-minilm",
+  "prompt": "Here is an article about llamas..."
+}'
+```
+
+#### Response
+
+```json
+{
+  "embedding": [
+    0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
+    0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
+  ]
+}
+```
diff --git a/ollama-client/build.gradle.kts b/ollama-client/build.gradle.kts
new file mode 100644
index 0000000..fa51bd7
--- /dev/null
+++ b/ollama-client/build.gradle.kts
@@ -0,0 +1,14 @@
+plugins {
+    `maven-publish`
+}
+
+kotlin {
+    jvm()
+    sourceSets {
+        commonMain {
+            dependencies {
+                api(projects.ollamaClient.ollamaClientCore)
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ollama-client/jvm/.gitkeep b/ollama-client/jvm/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/ollama-client/ollama-client-core/build.gradle.kts b/ollama-client/ollama-client-core/build.gradle.kts
new file mode 100644
index 0000000..ec91f97
--- /dev/null
+++ b/ollama-client/ollama-client-core/build.gradle.kts
@@ -0,0 +1,51 @@
+plugins {
+    alias(libs.plugins.kotlinx.serialization)
+    alias(libs.plugins.kover)
+    `maven-publish`
+}
+
+kotlin {
+    jvm()
+    macosArm64()
+    macosX64()
+
+    sourceSets {
+        commonMain.dependencies {
+            // put your Multiplatform dependencies here
+            implementation(libs.kotlinx.coroutines.core)
+            api(libs.kotlinx.serialization.json)
+            api(libs.bundles.ktor.client)
+            api(projects.common)
+        }
+
+        commonTest.dependencies {
+            implementation(libs.ktor.client.mock)
+            api(projects.common)
+        }
+
+        macosMain.dependencies {
+            api(libs.ktor.client.darwin)
+        }
+
+        jvmMain.dependencies {
+            api(libs.ktor.client.cio)
+        }
+
+        jvmTest.dependencies {
+            implementation(project.dependencies.platform(libs.junit.bom))
+            implementation(libs.bundles.jvm.test)
+            implementation(libs.kotlinx.coroutines.test)
+            implementation(libs.koin.test)
+            implementation(libs.koin.test.junit5)
+            implementation(libs.app.cash.turbine)
+            implementation("com.tngtech.archunit:archunit-junit5:1.1.0")
+            implementation("org.reflections:reflections:0.10.2")
+        }
+    }
+}
+
+tasks {
+    named<Test>("jvmTest") {
+        useJUnitPlatform()
+    }
+}
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt
new file mode 100644
index 0000000..08ee94c
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/Ollama.kt
@@ -0,0 +1,12 @@
+package com.tddworks.ollama.api
+
+/**
+ * @author  hanrw
+ * @date  2024/4/14 17:32
+ */
+class Ollama {
+    companion object {
+        const val BASE_URL = "https://ollama.com"
+        const val ANTHROPIC_VERSION = "1.0.0"
+    }
+}
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt
new file mode 100644
index 0000000..85617f0
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatApi.kt
@@ -0,0 +1,8 @@
+package com.tddworks.ollama.api.chat
+
+import kotlinx.coroutines.flow.Flow
+
+interface OllamaChatApi {
+    suspend fun stream(request: OllamaChatRequest): Flow<OllamaChatResponse>
+    suspend fun request(request: OllamaChatRequest): OllamaChatResponse
+}
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt
new file mode 100644
index 0000000..5a6acec
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatRequest.kt
@@ -0,0 +1,24 @@
+package com.tddworks.ollama.api.chat
+
+import com.tddworks.common.network.api.StreamableRequest
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+
+
+@Serializable
+data class OllamaChatRequest(
+    @SerialName("model") val model: String,
+    @SerialName("messages") val messages: List<OllamaChatMessage>,
+    @SerialName("format") val format: String? = null,
+//    @SerialName("options") val options: Map<String, Any>? = null,
+//    @SerialName("stream") val stream: Boolean? = null,
+    @SerialName("keep_alive") val keepAlive: String? = null,
+) : StreamableRequest
+
+
+@Serializable
+data class OllamaChatMessage(
+    @SerialName("role") val role: String,
+    @SerialName("content") val content: String,
+    @SerialName("images") val images: List<String>? = null,
+)
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt
new file mode 100644
index 0000000..c28c56a
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/OllamaChatResponse.kt
@@ -0,0 +1,55 @@
+package com.tddworks.ollama.api.chat
+
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+
+/**
+ * {
+ *   "model": "llama2",
+ *   "created_at": "2023-08-04T08:52:19.385406455-07:00",
+ *   "message": {
+ *     "role": "assistant",
+ *     "content": "The"
+ *   },
+ *   "done": false
+ * }
+ */
+@Serializable
+data class OllamaChatResponse(
+    @SerialName("model") val model: String,
+    @SerialName("created_at") val createdAt: String,
+    @SerialName("message") val message: OllamaChatMessage? = null,
+    @SerialName("done") val done: Boolean?,
+    @SerialName("total_duration") val totalDuration: Long? = null,
+    @SerialName("load_duration") val loadDuration: Long? = null,
+    @SerialName("prompt_eval_count") val promptEvalCount: Int? = null,
+    @SerialName("prompt_eval_duration") val promptEvalDuration: Long? = null,
+    @SerialName("eval_count") val evalCount: Int? = null,
+    @SerialName("eval_duration") val evalDuration: Long? = null,
+)
+
+/**
+ * {
+ *   "model": "llama2",
+ *   "created_at": "2023-08-04T19:22:45.499127Z",
+ *   "done": true,
+ *   "total_duration": 8113331500,
+ *   "load_duration": 6396458,
+ *   "prompt_eval_count": 61,
+ *   "prompt_eval_duration": 398801000,
+ *   "eval_count": 468,
+ *   "eval_duration": 7701267000
+ * }
+ */
+@Serializable
+data class FinalOllamaChatResponse(
+    @SerialName("model") val model: String,
+    @SerialName("created_at") val createdAt: String,
+    @SerialName("done") val done: Boolean?,
+    @SerialName("total_duration") val totalDuration: Long?,
+    @SerialName("load_duration") val loadDuration: Long?,
+    @SerialName("prompt_eval_count") val promptEvalCount: Int?,
+    @SerialName("prompt_eval_duration") val promptEvalDuration: Long?,
+    @SerialName("eval_count") val evalCount: Int?,
+    @SerialName("eval_duration") val evalDuration: Long?,
+)
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt
new file mode 100644
index 0000000..13d6b8a
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/DefaultOllamaChatApi.kt
@@ -0,0 +1,45 @@
+package com.tddworks.ollama.api.chat.internal
+
+import com.tddworks.common.network.api.ktor.api.HttpRequester
+import com.tddworks.common.network.api.ktor.api.performRequest
+import com.tddworks.common.network.api.ktor.api.streamRequest
+import com.tddworks.ollama.api.chat.OllamaChatApi
+import com.tddworks.ollama.api.chat.OllamaChatRequest
+import com.tddworks.ollama.api.chat.OllamaChatResponse
+import io.ktor.client.request.*
+import io.ktor.http.*
+import kotlinx.coroutines.flow.Flow
+import kotlinx.serialization.json.Json
+
+class DefaultOllamaChatApi(
+    private val requester: HttpRequester,
+    private val jsonLenient: Json = JsonLenient,
+) : OllamaChatApi {
+    override suspend fun stream(request: OllamaChatRequest): Flow<OllamaChatResponse> {
+        return requester.streamRequest<OllamaChatResponse> {
+            method = HttpMethod.Post
+            url(path = CHAT_API_PATH)
+            setBody(request.asStreamRequest(jsonLenient))
+            contentType(ContentType.Application.Json)
+            accept(ContentType.Text.EventStream)
+            headers {
+                append(HttpHeaders.CacheControl, "no-cache")
+                append(HttpHeaders.Connection, "keep-alive")
+            }
+        }
+    }
+
+    override suspend fun request(request: OllamaChatRequest): OllamaChatResponse {
+        return requester.performRequest {
+            method = HttpMethod.Post
+            url(path = CHAT_API_PATH)
+            setBody(request)
+            contentType(ContentType.Application.Json)
+        }
+    }
+
+    companion object {
+        const val CHAT_API_PATH = "/api/chat"
+    }
+}
+
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt
new file mode 100644
index 0000000..cef7574
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/JsonLenient.kt
@@ -0,0 +1,23 @@
+package com.tddworks.ollama.api.chat.internal
+
+import com.tddworks.ollama.api.chat.internal.json.ollamaModule
+import kotlinx.serialization.json.Json
+
+
+/**
+ * Represents a JSON object that allows for leniency and ignores unknown keys.
+ *
+ * @property isLenient Removes JSON specification restriction (RFC-4627) and makes parser more liberal to the malformed input. In lenient mode quoted boolean literals, and unquoted string literals are allowed.
+ * Its relaxations can be expanded in the future, so that lenient parser becomes even more permissive to invalid value in the input, replacing them with defaults.
+ * false by default.
+ * @property ignoreUnknownKeys Specifies whether encounters of unknown properties in the input JSON should be ignored instead of throwing SerializationException. false by default..
+ */
+val JsonLenient = Json {
+    isLenient = true
+    ignoreUnknownKeys = true
+    // https://github.com/Kotlin/kotlinx.serialization/blob/master/docs/json.md#class-discriminator-for-polymorphism
+    classDiscriminator = "#class"
+    serializersModule = ollamaModule
+    encodeDefaults = true
+    explicitNulls = false
+}
diff --git a/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt
new file mode 100644
index 0000000..4b606a8
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/commonMain/kotlin/com/tddworks/ollama/api/chat/internal/json/OllamaModule.kt
@@ -0,0 +1,32 @@
+package com.tddworks.ollama.api.chat.internal.json
+
+import com.tddworks.common.network.api.StreamableRequest
+import com.tddworks.ollama.api.chat.OllamaChatRequest
+import kotlinx.serialization.modules.SerializersModule
+import kotlinx.serialization.modules.polymorphic
+
+/**
+ * The `SerializersModule` that defines the serialization and deserialization
+ * rules for the `StreamableRequest` class and its subclasses.
+ */
+val ollamaModule = SerializersModule {
+    /**
+     * Registers a polymorphic serialization/deserialization for the
+     * `StreamableRequest` class.
+     */
+    polymorphic(StreamableRequest::class) {
+        /**
+         * Registers a subclass serializer for the `OllamaChatRequest` class.
+         *
+         * @param OllamaChatRequest.serializer() The serializer for the `OllamaChatRequest` class.
+         */
+        subclass(OllamaChatRequest::class, OllamaChatRequest.serializer())
+        /**
+         * Registers a default deserializer for the `StreamableRequest` class.
+         *
+         * @param { OllamaChatRequest.serializer() } The deserializer for the `StreamableRequest` class.
+         */
+        defaultDeserializer { OllamaChatRequest.serializer() }
+    }
+}
+
diff --git a/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt
new file mode 100644
index 0000000..677a17d
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/MockHttpClient.kt
@@ -0,0 +1,43 @@
+package com.tddworks.ollama.api
+
+
+import com.tddworks.common.network.api.ktor.internal.JsonLenient
+import io.ktor.client.*
+import io.ktor.client.engine.mock.*
+import io.ktor.client.plugins.*
+import io.ktor.client.plugins.contentnegotiation.*
+import io.ktor.client.request.*
+import io.ktor.http.*
+import io.ktor.serialization.kotlinx.*
+
+/**
+ * See https://ktor.io/docs/http-client-testing.html#usage
+ */
+fun mockHttpClient(mockResponse: String) = HttpClient(MockEngine) {
+
+    val headers = headersOf("Content-Type" to listOf(ContentType.Application.Json.toString()))
+
+    install(ContentNegotiation) {
+        register(ContentType.Application.Json, KotlinxSerializationConverter(JsonLenient))
+    }
+
+    engine {
+        addHandler { request ->
+            if (request.url.encodedPath == "/api/chat") {
+                respond(mockResponse, HttpStatusCode.OK, headers)
+            } else {
+                error("Unhandled ${request.url.encodedPath}")
+            }
+        }
+    }
+
+    defaultRequest {
+        url {
+            protocol = URLProtocol.HTTPS
+            host = "api.lemonsqueezy.com"
+        }
+
+        header(HttpHeaders.ContentType, ContentType.Application.Json)
+        contentType(ContentType.Application.Json)
+    }
+}
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt
new file mode 100644
index 0000000..f855832
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiITest.kt
@@ -0,0 +1,73 @@
+package com.tddworks.ollama.api.internal
+
+import com.tddworks.common.network.api.ktor.internal.DefaultHttpRequester
+import com.tddworks.common.network.api.ktor.internal.createHttpClient
+import com.tddworks.di.initKoin
+import com.tddworks.ollama.api.Ollama
+import com.tddworks.ollama.api.chat.OllamaChatMessage
+import com.tddworks.ollama.api.chat.OllamaChatRequest
+import com.tddworks.ollama.api.chat.internal.DefaultOllamaChatApi
+import com.tddworks.ollama.api.chat.internal.JsonLenient
+import kotlinx.coroutines.test.runTest
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.BeforeEach
+import org.junit.jupiter.api.Test
+import org.koin.test.junit5.AutoCloseKoinTest
+
+class DefaultOllamaChatApiITest : AutoCloseKoinTest() {
+
+    @BeforeEach
+    fun setUp() {
+        initKoin()
+    }
+
+
+    @Test
+    fun `should return correct base url`() {
+        assertEquals("api.anthropic.com", Ollama.BASE_URL)
+    }
+
+
+    @Test
+    fun `should return stream response`() = runTest {
+        val ollamaChatApi = DefaultOllamaChatApi(
+            requester = DefaultHttpRequester(
+                createHttpClient(
+                    url = { "localhost" },
+                    json = JsonLenient,
+                )
+            )
+        )
+
+        ollamaChatApi.stream(
+            OllamaChatRequest(
+                model = "llama2",
+                messages = listOf(
+                    OllamaChatMessage(
+                        role = "user",
+                        content = "hello"
+                    )
+                )
+            )
+        ).collect {
+            println("stream response: $it")
+        }
+    }
+
+//    @Test
+//    fun `should return create response`() = runTest {
+//        //Client request(POST https://klaude.asusual.life/v1/messages) invalid: 401 Unauthorized. Text: "{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}"
+//        //Client request(POST https://klaude.asusual.life/v1/messages) invalid: 400 Bad Request. Text: "{"type":"error","error":{"type":"invalid_request_error","message":"anthropic-version: header is required"}}"
+//        val anthropic = getInstance<Anthropic>()
+//
+//        val r = anthropic.create(
+//            CreateMessageRequest(
+//                messages = listOf(Message.user("hello")),
+//                maxTokens = 1024,
+//                model = Model.CLAUDE_3_HAIKU
+//            )
+//        )
+//
+//        assertNotNull(r.content[0].text)
+//    }
+}
\ No newline at end of file
diff --git a/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt
new file mode 100644
index 0000000..e6b43e4
--- /dev/null
+++ b/ollama-client/ollama-client-core/src/jvmTest/kotlin/com/tddworks/ollama/api/internal/DefaultOllamaChatApiTest.kt
@@ -0,0 +1,152 @@
+import com.tddworks.common.network.api.ktor.internal.DefaultHttpRequester
+import com.tddworks.ollama.api.chat.OllamaChatMessage
+import com.tddworks.ollama.api.chat.OllamaChatRequest
+import com.tddworks.ollama.api.chat.OllamaChatResponse
+import com.tddworks.ollama.api.chat.internal.DefaultOllamaChatApi
+import com.tddworks.ollama.api.chat.internal.JsonLenient
+import com.tddworks.ollama.api.mockHttpClient
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.flow.toList
+import kotlinx.coroutines.test.*
+import kotlinx.serialization.json.Json
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Test
+import org.junit.jupiter.api.extension.AfterEachCallback
+import org.junit.jupiter.api.extension.BeforeEachCallback
+import org.junit.jupiter.api.extension.ExtensionContext
+import org.junit.jupiter.api.extension.RegisterExtension
+import org.koin.dsl.module
+import org.koin.test.KoinTest
+import org.koin.test.junit5.KoinTestExtension
+
+class TestKoinCoroutineExtension(private val testDispatcher: TestDispatcher = StandardTestDispatcher()) :
+    BeforeEachCallback, AfterEachCallback {
+    override fun beforeEach(context: ExtensionContext?) {
+        Dispatchers.setMain(testDispatcher)
+    }
+
+    override fun afterEach(context: ExtensionContext?) {
+        Dispatchers.resetMain()
+    }
+}
+
+class DefaultOllamaChatApiTest : KoinTest {
+    @JvmField
+    @RegisterExtension
+    // This extension is used to set the main dispatcher to a test dispatcher
+    // launch coroutine eagerly
+    // same scheduling behavior as would have in a real app/production
+    val testKoinCoroutineExtension = TestKoinCoroutineExtension(UnconfinedTestDispatcher())
+
+    // for kotlin/com/tddworks/common/network/api/ktor/api/Stream.kt required
+    // fun json(): Json {
+    //    return getInstance()
+    // }
+    @JvmField
+    @RegisterExtension
+    val koinTestExtension = KoinTestExtension.create {
+        modules(
+            module {
+                single<Json> { JsonLenient }
+            })
+    }
+
+    @Test
+    fun `should return stream of JSON response`() = runTest {
+        // Given
+        val request = OllamaChatRequest(
+            model = "llama2",
+            messages = listOf(
+                OllamaChatMessage(
+                    role = "user",
+                    content = "why is the sky blue?"
+                )
+            )
+        )
+
+        val api = DefaultOllamaChatApi(
+            DefaultHttpRequester(
+                httpClient = mockHttpClient("data: { \"model\": \"llama2\", \"created_at\": \"2023-08-04T08:52:19.385406455-07:00\", \"message\": { \"role\": \"assistant\", \"content\": \"The\", \"images\": null }, \"done\": false }")
+            )
+        )
+
+        // When
+        val responses = api.stream(request).toList()
+
+        // Then
+        assertEquals(
+            listOf(
+                OllamaChatResponse(
+                    model = "llama2",
+                    createdAt = "2023-08-04T08:52:19.385406455-07:00",
+                    message = OllamaChatMessage(
+                        role = "assistant",
+                        content = "The"
+                    ),
+                    done = false
+                )
+            ), responses
+        )
+    }
+
+    @Test
+    fun `should return single JSON response`() = runTest {
+        // Given
+        val request = OllamaChatRequest(
+            model = "llama2",
+            messages = listOf(
+                OllamaChatMessage(
+                    role = "user",
+                    content = "why is the sky blue?"
+                )
+            )
+        )
+
+        val api = DefaultOllamaChatApi(
+            DefaultHttpRequester(
+                httpClient = mockHttpClient(
+                    """
+                    {
+                      "model": "llama2",
+                      "created_at": "2023-12-12T14:13:43.416799Z",
+                      "message": {
+                        "role": "assistant",
+                        "content": "Hello! How are you today?"
+                      },
+                      "done": true,
+                      "total_duration": 5191566416,
+                      "load_duration": 2154458,
+                      "prompt_eval_count": 26,
+                      "prompt_eval_duration": 383809000,
+                      "eval_count": 298,
+                      "eval_duration": 4799921000
+                    }
+                """.trimIndent()
+                )
+            )
+        )
+
+        // When
+        val response = api.request(request)
+
+        // Then
+        assertEquals(
+            OllamaChatResponse(
+                model = "llama2",
+                createdAt = "2023-12-12T14:13:43.416799Z",
+                message = OllamaChatMessage(
+                    role = "assistant",
+                    content = "Hello! How are you today?"
+                ),
+                done = true,
+                totalDuration = 5191566416,
+                loadDuration = 2154458,
+                promptEvalCount = 26,
+                promptEvalDuration = 383809000,
+                evalCount = 298,
+                evalDuration = 4799921000
+            ), response
+        )
+    }
+}
+