Preload and unload models from memory

lebrunel · Jan 8, 2025 · 7e134a1 · 7e134a1
1 parent 8559d13
commit 7e134a1
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 1 deletion.
diff --git a/lib/ollama.ex b/lib/ollama.ex
@@ -626,6 +626,63 @@ defmodule Ollama do
   end
 
 
+  schema :load_model, [
+    model: [
+      type: :string,
+      required: true,
+      doc: "Name of the model to load.",
+    ],
+    keep_alive: [
+      type: {:or, [:integer, :string]},
+      doc: "How long to keep the model loaded.",
+    ],
+  ]
+
+  @doc """
+  Load a model into memory without generating a completion. Optionally specify
+  a keep alive value (defaults to 5 minutes, set `-1` to permanently keep alive).
+
+  ## Options
+
+  #{doc(:load_model)}
+
+  ## Example
+
+      iex> Ollama.preload(client, model: "llama3.1", timeout: 3_600_000)
+      true
+  """
+  @spec preload(client(), keyword()) :: response()
+  def preload(%__MODULE__{} = client, params) when is_list(params) do
+    with {:ok, params} <- NimbleOptions.validate(params, schema(:load_model)) do
+      client
+      |> req(:post, "/generate", json: Enum.into(params, %{}))
+      |> res_bool()
+    end
+  end
+
+  @doc """
+  Stops a running model and unloads it from memory.
+
+  ## Options
+
+  - `:model` (`t:String.t/0`) - Required. Name of the model to unload.
+
+  ## Example
+
+      iex> Ollama.preload(client, model: "llama3.1")
+      true
+  """
+  @spec unload(client(), keyword()) :: response()
+  def unload(%__MODULE__{} = client, params) when is_list(params) do
+    with {:ok, params} <- NimbleOptions.validate(params, schema(:load_model)) do
+      params = Keyword.put(params, :keep_alive, 0)
+      client
+      |> req(:post, "/generate", json: Enum.into(params, %{}))
+      |> res_bool()
+    end
+  end
+
+
   schema :show_model, [
     name: [
       type: :string,

diff --git a/test/ollama_test.exs b/test/ollama_test.exs
@@ -201,6 +201,26 @@ defmodule OllamaTest do
     end
   end
 
+  describe "preload/1" do
+    test "loads a model into memory", %{client: client} do
+      assert {:ok, true} = Ollama.preload(client, model: "llama3.1")
+    end
+
+    test "returns false when model not found", %{client: client} do
+      assert {:ok, false} = Ollama.preload(client, model: "not-found")
+    end
+  end
+
+  describe "unload/1" do
+    test "unloads a model from memory", %{client: client} do
+      assert {:ok, true} = Ollama.unload(client, model: "llama3.1")
+    end
+
+    test "returns false when model not found", %{client: client} do
+      assert {:ok, false} = Ollama.preload(client, model: "not-found")
+    end
+  end
+
   describe "show_model/2" do
     test "shows information about a model", %{client: client} do
       assert {:ok, model} = Ollama.show_model(client, name: "llama2")

diff --git a/test/support/mock_server.ex b/test/support/mock_server.ex
@@ -391,7 +391,7 @@ defmodule Ollama.MockServer do
 
   post "/generate" do
     case conn.body_params do
-      %{"model" => "llama3.1", "format" => fmt} when is_map(fmt) ->
+      %{"format" => fmt} when is_map(fmt) ->
         respond(conn, :completion_structured)
       _ -> handle_request(conn, :completion)
     end