From 6c355c1e534f27a8a3b4d2930fe6e5a99c34c29a Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 12:17:16 -0800
Subject: [PATCH 01/14] WIP fragments: schema plus reading but not yet writing,
 refs #617

---
 llm/cli.py        | 39 +++++++++++++++++++
 llm/migrations.py | 39 +++++++++++++++++++
 llm/models.py     | 98 ++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 163 insertions(+), 13 deletions(-)

diff --git a/llm/cli.py b/llm/cli.py
index 5a9f20b4..d1261d2e 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -53,6 +53,23 @@
 DEFAULT_TEMPLATE = "prompt: "
 
 
+def resolve_fragments(fragments):
+    # These can be URLs or paths
+    resolved = []
+    for fragment in fragments:
+        if fragment.startswith("http://") or fragment.startswith("https://"):
+            response = httpx.get(fragment, follow_redirects=True)
+            response.raise_for_status()
+            resolved.append(response.text)
+        elif fragment == "-":
+            resolved.append(sys.stdin.read())
+        elif pathlib.Path(fragment).exists():
+            resolved.append(pathlib.Path(fragment).read_text())
+        else:
+            raise click.ClickException(f"Fragment {fragment} not found")
+    return resolved
+
+
 class AttachmentType(click.ParamType):
     name = "attachment"
 
@@ -174,6 +191,16 @@ def cli():
     multiple=True,
     help="key/value options for the model",
 )
+@click.option(
+    "fragments", "-f", "--fragment", multiple=True, help="Fragment to add to prompt"
+)
+@click.option(
+    "system_fragments",
+    "--sf",
+    "--system-fragment",
+    multiple=True,
+    help="Fragment to add to system prompt",
+)
 @click.option("-t", "--template", help="Template to use")
 @click.option(
     "-p",
@@ -209,6 +236,8 @@ def prompt(
     attachments,
     attachment_types,
     options,
+    fragments,
+    system_fragments,
     template,
     param,
     no_stream,
@@ -266,6 +295,7 @@ def read_prompt():
             and sys.stdin.isatty()
             and not attachments
             and not attachment_types
+            and not fragments
         ):
             # Hang waiting for input to stdin (unless --save)
             prompt = sys.stdin.read()
@@ -377,6 +407,9 @@ def read_prompt():
 
     prompt = read_prompt()
 
+    fragments = resolve_fragments(fragments)
+    system_fragments = resolve_fragments(system_fragments)
+
     prompt_method = model.prompt
     if conversation:
         prompt_method = conversation.prompt
@@ -388,8 +421,10 @@ async def inner():
                 if should_stream:
                     async for chunk in prompt_method(
                         prompt,
+                        fragments=fragments,
                         attachments=resolved_attachments,
                         system=system,
+                        system_fragments=system_fragments,
                         **validated_options,
                     ):
                         print(chunk, end="")
@@ -398,8 +433,10 @@ async def inner():
                 else:
                     response = prompt_method(
                         prompt,
+                        fragments=fragments,
                         attachments=resolved_attachments,
                         system=system,
+                        system_fragments=system_fragments,
                         **validated_options,
                     )
                     print(await response.text())
@@ -408,8 +445,10 @@ async def inner():
         else:
             response = prompt_method(
                 prompt,
+                fragments=fragments,
                 attachments=resolved_attachments,
                 system=system,
+                system_fragments=system_fragments,
                 **validated_options,
             )
             if should_stream:
diff --git a/llm/migrations.py b/llm/migrations.py
index 91da6429..9f0cf987 100644
--- a/llm/migrations.py
+++ b/llm/migrations.py
@@ -227,3 +227,42 @@ def m012_attachments_tables(db):
         ),
         pk=("response_id", "attachment_id"),
     )
+
+
+@migration
+def m013_fragments_tables(db):
+    db["fragments"].create(
+        {
+            "id": int,
+            "hash": str,
+            "content": str,
+            "alias": str,
+            "datetime_utc": str,
+            "source": str,
+        },
+        pk="id",
+    )
+    db["prompt_fragments"].create(
+        {
+            "response_id": str,
+            "fragment_id": str,
+            "order": int,
+        },
+        foreign_keys=(
+            ("response_id", "responses", "id"),
+            ("fragment_id", "fragments", "id"),
+        ),
+        pk=("response_id", "fragment_id"),
+    )
+    db["system_fragments"].create(
+        {
+            "response_id": str,
+            "fragment_id": str,
+            "order": int,
+        },
+        foreign_keys=(
+            ("response_id", "responses", "id"),
+            ("fragment_id", "fragments", "id"),
+        ),
+        pk=("response_id", "fragment_id"),
+    )
diff --git a/llm/models.py b/llm/models.py
index f5c8fd3b..36da2dae 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -89,10 +89,12 @@ def from_row(cls, row):
 
 @dataclass
 class Prompt:
-    prompt: str
+    _prompt: str
     model: "Model"
+    fragments: Optional[List[str]]
     attachments: Optional[List[Attachment]]
-    system: Optional[str]
+    _system: Optional[str]
+    system_fragments: Optional[List[str]]
     prompt_json: Optional[str]
     options: "Options"
 
@@ -101,18 +103,55 @@ def __init__(
         prompt,
         model,
         *,
+        fragments=None,
         attachments=None,
         system=None,
+        system_fragments=None,
         prompt_json=None,
         options=None,
     ):
-        self.prompt = prompt
+        self._prompt = prompt
         self.model = model
         self.attachments = list(attachments or [])
-        self.system = system
+        self.fragments = fragments or []
+        self._system = system
+        self.system_fragments = system_fragments or []
         self.prompt_json = prompt_json
         self.options = options or {}
 
+    @property
+    def prompt(self):
+        return "\n".join(self.fragments + [self._prompt])
+
+    @property
+    def system(self):
+        bits = [
+            bit.strip()
+            for bit in (self.system_fragments + [self._system or ""])
+            if bit.strip()
+        ]
+        return "\n\n".join(bits)
+
+    @classmethod
+    def from_row(cls, db, row, model):
+        all_fragments = list(db.query(FRAGMENT_SQL, {"response_id": row["id"]}))
+        fragments = [
+            row["content"] for row in all_fragments if row["fragment_type"] == "prompt"
+        ]
+        system_fragments = [
+            row["content"] for row in all_fragments if row["fragment_type"] == "system"
+        ]
+        breakpoint()
+        return cls(
+            prompt=row["prompt"],
+            model=model,
+            fragments=fragments,
+            attachments=[],
+            system=row["system"],
+            system_fragments=system_fragments,
+            options=model.Options(**json.loads(row["options_json"])),
+        )
+
 
 @dataclass
 class _BaseConversation:
@@ -138,8 +177,10 @@ def prompt(
         self,
         prompt: Optional[str],
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> "Response":
@@ -147,8 +188,10 @@ def prompt(
             Prompt(
                 prompt,
                 model=self.model,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 options=self.model.Options(**options),
             ),
             self.model,
@@ -163,8 +206,10 @@ def prompt(
         self,
         prompt: Optional[str],
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> "AsyncResponse":
@@ -172,8 +217,10 @@ def prompt(
             Prompt(
                 prompt,
                 model=self.model,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 options=self.model.Options(**options),
             ),
             self.model,
@@ -182,6 +229,26 @@ def prompt(
         )
 
 
+FRAGMENT_SQL = """
+select
+    'prompt' as fragment_type,
+    f.content,
+    pf."order" as ord
+from prompt_fragments pf
+join fragments f on pf.fragment_id = f.id
+where pf.response_id = :response_id
+union all
+select
+    'system' as fragment_type,
+    f.content,
+    sf."order" as ord
+from system_fragments sf
+join fragments f on sf.fragment_id = f.id
+where sf.response_id = :response_id
+order by fragment_type desc, ord asc;
+"""
+
+
 class _BaseResponse:
     """Base response class shared between sync and async responses"""
 
@@ -217,13 +284,7 @@ def from_row(cls, db, row):
 
         response = cls(
             model=model,
-            prompt=Prompt(
-                prompt=row["prompt"],
-                model=model,
-                attachments=[],
-                system=row["system"],
-                options=model.Options(**json.loads(row["options_json"])),
-            ),
+            prompt=Prompt.from_row(db, row, model),
             stream=False,
         )
         response.id = row["id"]
@@ -233,8 +294,8 @@ def from_row(cls, db, row):
         response._chunks = [row["response"]]
         # Attachments
         response.attachments = [
-            Attachment.from_row(arow)
-            for arow in db.query(
+            Attachment.from_row(attachment_row)
+            for attachment_row in db.query(
                 """
                 select attachments.* from attachments
                 join prompt_attachments on attachments.id = prompt_attachments.attachment_id
@@ -328,6 +389,9 @@ def datetime_utc(self) -> str:
         self._force()
         return self._start_utcnow.isoformat() if self._start_utcnow else ""
 
+    def text_or_raise(self) -> str:
+        return self.text()
+
     def __iter__(self) -> Iterator[str]:
         self._start = time.monotonic()
         self._start_utcnow = datetime.datetime.utcnow()
@@ -541,8 +605,10 @@ def prompt(
         self,
         prompt: str,
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> Response:
@@ -550,8 +616,10 @@ def prompt(
         return Response(
             Prompt(
                 prompt,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 model=self,
                 options=self.Options(**options),
             ),
@@ -578,8 +646,10 @@ def prompt(
         self,
         prompt: str,
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> AsyncResponse:
@@ -587,8 +657,10 @@ def prompt(
         return AsyncResponse(
             Prompt(
                 prompt,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 model=self,
                 options=self.Options(**options),
             ),

From 04f7f0b5d8e87b24fe00cc032539a69f4c3a46f8 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 12:18:43 -0800
Subject: [PATCH 02/14] Remove breakpoint()

---
 llm/models.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llm/models.py b/llm/models.py
index 36da2dae..f61d526c 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -141,7 +141,6 @@ def from_row(cls, db, row, model):
         system_fragments = [
             row["content"] for row in all_fragments if row["fragment_type"] == "system"
         ]
-        breakpoint()
         return cls(
             prompt=row["prompt"],
             model=model,

From c07ba0347dd1e925fbd5117385fb02e7ad43d317 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 20:04:17 -0800
Subject: [PATCH 03/14] Unique index on fragments.alias, refs #617

---
 llm/migrations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llm/migrations.py b/llm/migrations.py
index 9f0cf987..e0e7c36b 100644
--- a/llm/migrations.py
+++ b/llm/migrations.py
@@ -242,6 +242,7 @@ def m013_fragments_tables(db):
         },
         pk="id",
     )
+    db["fragments"].create_index(["alias"], unique=True)
     db["prompt_fragments"].create(
         {
             "response_id": str,

From 84f8363581c7672966d53fde8fc6eb606313c97f Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 21:18:04 -0800
Subject: [PATCH 04/14] Fragments are now persisted, added basic CLI commands

Refs #617
---
 llm/cli.py        | 107 +++++++++++++++++++++++++++++++++++++++++++---
 llm/migrations.py |  15 +++++--
 llm/models.py     |  33 +++++++++++---
 llm/utils.py      |  34 +++++++++++++++
 4 files changed, 172 insertions(+), 17 deletions(-)

diff --git a/llm/cli.py b/llm/cli.py
index d1261d2e..1e766c7d 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -32,7 +32,12 @@
 
 from .migrations import migrate
 from .plugins import pm, load_plugins
-from .utils import mimetype_from_path, mimetype_from_string
+from .utils import (
+    FragmentString,
+    ensure_fragment,
+    mimetype_from_path,
+    mimetype_from_string,
+)
 import base64
 import httpx
 import pathlib
@@ -44,7 +49,7 @@
 from sqlite_utils.utils import rows_from_file, Format
 import sys
 import textwrap
-from typing import cast, Optional, Iterable, Union, Tuple
+from typing import cast, Optional, Iterable, List, Union, Tuple
 import warnings
 import yaml
 
@@ -53,18 +58,22 @@
 DEFAULT_TEMPLATE = "prompt: "
 
 
-def resolve_fragments(fragments):
+def resolve_fragments(fragments: Iterable[str]) -> List[Tuple[str, str]]:
+    """
+    Resolve fragments into a list of (content, source) tuples
+    """
     # These can be URLs or paths
     resolved = []
     for fragment in fragments:
         if fragment.startswith("http://") or fragment.startswith("https://"):
             response = httpx.get(fragment, follow_redirects=True)
             response.raise_for_status()
-            resolved.append(response.text)
+            resolved.append(FragmentString(response.text, fragment))
         elif fragment == "-":
-            resolved.append(sys.stdin.read())
+            resolved.append(FragmentString(sys.stdin.read(), "-"))
         elif pathlib.Path(fragment).exists():
-            resolved.append(pathlib.Path(fragment).read_text())
+            path = pathlib.Path(fragment)
+            resolved.append(FragmentString(path.read_text(), str(path.resolve())))
         else:
             raise click.ClickException(f"Fragment {fragment} not found")
     return resolved
@@ -1226,6 +1235,92 @@ def aliases_path():
     click.echo(user_dir() / "aliases.json")
 
 
+@cli.group(
+    cls=DefaultGroup,
+    default="list",
+    default_if_no_args=True,
+)
+def fragments():
+    "Manage fragments"
+
+
+@fragments.command(name="list")
+@click.option("json_", "--json", is_flag=True, help="Output as JSON")
+def fragments_list(json_):
+    "List current fragments"
+    db = sqlite_utils.Database(logs_db_path())
+    migrate(db)
+    sql = """
+    select
+        fragments.id,
+        fragments.hash,
+        fragments.content,
+        fragments.datetime_utc,
+        fragments.source,
+        json_group_array(fragment_aliases.alias) filter (
+            where
+            fragment_aliases.alias is not null
+        ) as aliases
+    from
+        fragments
+    left join
+        fragment_aliases on fragment_aliases.fragment_id = fragments.id
+    group by
+        fragments.id, fragments.hash, fragments.content, fragments.datetime_utc, fragments.source;
+    """
+    results = list(db.query(sql))
+    for result in results:
+        result["aliases"] = json.loads(result["aliases"])
+    click.echo(json.dumps(results, indent=4))
+
+
+@fragments.command(name="set")
+@click.argument("alias")
+@click.argument("fragment")
+def fragments_set(alias, fragment):
+    """
+    Set an alias for a fragment
+
+    Accepts an alias and a file path, URL or '-' for stdin
+
+    Example usage:
+
+    \b
+        llm fragments set docs ./docs.md
+    """
+    resolved = resolve_fragments([fragment])[0]
+    db = sqlite_utils.Database(logs_db_path())
+    migrate(db)
+    alias_sql = """
+    insert into fragment_aliases (alias, fragment_id)
+    values (:alias, :fragment_id)
+    on conflict(alias) do update set
+        fragment_id = excluded.fragment_id;
+    """
+    with db.conn:
+        fragment_id = ensure_fragment(db, resolved)
+        db.conn.execute(alias_sql, {"alias": alias, "fragment_id": fragment_id})
+
+
+@fragments.command(name="remove")
+@click.argument("alias")
+def fragments_remove(alias):
+    """
+    Remove a fragment alias
+
+    Example usage:
+
+    \b
+        llm fragments remove docs
+    """
+    db = sqlite_utils.Database(logs_db_path())
+    migrate(db)
+    with db.conn:
+        db.conn.execute(
+            "delete from fragment_aliases where alias = :alias", {"alias": alias}
+        )
+
+
 @cli.command(name="plugins")
 @click.option("--all", help="Include built-in default plugins", is_flag=True)
 def plugins_list(all):
diff --git a/llm/migrations.py b/llm/migrations.py
index e0e7c36b..eb607422 100644
--- a/llm/migrations.py
+++ b/llm/migrations.py
@@ -236,17 +236,24 @@ def m013_fragments_tables(db):
             "id": int,
             "hash": str,
             "content": str,
-            "alias": str,
             "datetime_utc": str,
             "source": str,
         },
         pk="id",
     )
-    db["fragments"].create_index(["alias"], unique=True)
+    db["fragments"].create_index(["hash"], unique=True)
+    db["fragment_aliases"].create(
+        {
+            "alias": str,
+            "fragment_id": int,
+        },
+        foreign_keys=(("fragment_id", "fragments", "id"),),
+        pk="alias",
+    )
     db["prompt_fragments"].create(
         {
             "response_id": str,
-            "fragment_id": str,
+            "fragment_id": int,
             "order": int,
         },
         foreign_keys=(
@@ -258,7 +265,7 @@ def m013_fragments_tables(db):
     db["system_fragments"].create(
         {
             "response_id": str,
-            "fragment_id": str,
+            "fragment_id": int,
             "order": int,
         },
         foreign_keys=(
diff --git a/llm/models.py b/llm/models.py
index f61d526c..328de4d1 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -18,7 +18,7 @@
     Set,
     Union,
 )
-from .utils import mimetype_from_path, mimetype_from_string
+from .utils import ensure_fragment, mimetype_from_path, mimetype_from_string
 from abc import ABC, abstractmethod
 import json
 from pydantic import BaseModel
@@ -231,18 +231,18 @@ def prompt(
 FRAGMENT_SQL = """
 select
     'prompt' as fragment_type,
-    f.content,
+    fragments.content,
     pf."order" as ord
 from prompt_fragments pf
-join fragments f on pf.fragment_id = f.id
+join fragments on pf.fragment_id = fragments.id
 where pf.response_id = :response_id
 union all
 select
     'system' as fragment_type,
-    f.content,
+    fragments.content,
     sf."order" as ord
 from system_fragments sf
-join fragments f on sf.fragment_id = f.id
+join fragments on sf.fragment_id = fragments.id
 where sf.response_id = :response_id
 order by fragment_type desc, ord asc;
 """
@@ -324,8 +324,8 @@ def log_to_db(self, db):
         response = {
             "id": response_id,
             "model": self.model.model_id,
-            "prompt": self.prompt.prompt,
-            "system": self.prompt.system,
+            "prompt": self.prompt._prompt,
+            "system": self.prompt._system,
             "prompt_json": self._prompt_json,
             "options_json": {
                 key: value
@@ -339,6 +339,25 @@ def log_to_db(self, db):
             "datetime_utc": self.datetime_utc(),
         }
         db["responses"].insert(response)
+        # Persist any fragments
+        for i, fragment in enumerate(self.prompt.fragments):
+            fragment_id = ensure_fragment(db, fragment)
+            db["prompt_fragments"].insert(
+                {
+                    "response_id": response_id,
+                    "fragment_id": fragment_id,
+                    "order": i,
+                },
+            )
+        for i, fragment in enumerate(self.prompt.system_fragments):
+            fragment_id = ensure_fragment(db, fragment)
+            db["system_fragments"].insert(
+                {
+                    "response_id": response_id,
+                    "fragment_id": fragment_id,
+                    "order": i,
+                },
+            )
         # Persist any attachments - loop through with index
         for index, attachment in enumerate(self.prompt.attachments):
             attachment_id = attachment.id()
diff --git a/llm/utils.py b/llm/utils.py
index d2618dd4..94db09d7 100644
--- a/llm/utils.py
+++ b/llm/utils.py
@@ -1,4 +1,5 @@
 import click
+import hashlib
 import httpx
 import json
 import puremagic
@@ -10,6 +11,22 @@
 }
 
 
+class FragmentString(str):
+    def __new__(cls, content, source):
+        # We need to use __new__ since str is immutable
+        instance = super().__new__(cls, content)
+        return instance
+
+    def __init__(self, content, source):
+        self.source = source
+
+    def __str__(self):
+        return super().__str__()
+
+    def __repr__(self):
+        return super().__repr__()
+
+
 def mimetype_from_string(content) -> Optional[str]:
     try:
         type_ = puremagic.from_string(content, mime=True)
@@ -127,3 +144,20 @@ def logging_client() -> httpx.Client:
         transport=_LogTransport(httpx.HTTPTransport()),
         event_hooks={"request": [_no_accept_encoding], "response": [_log_response]},
     )
+
+
+def ensure_fragment(db, content):
+    sql = """
+    insert into fragments (hash, content, datetime_utc, source)
+    values (:hash, :content, datetime('now'), :source)
+    on conflict(hash) do nothing
+    """
+    hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
+    source = None
+    if isinstance(content, FragmentString):
+        source = content.source
+    with db.conn:
+        db.execute(sql, {"hash": hash, "content": content, "source": source})
+        return list(
+            db.query("select id from fragments where hash = :hash", {"hash": hash})
+        )[0]["id"]

From 07b73495967de586bdc9adfed4372232310b84df Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 12:20:20 -0800
Subject: [PATCH 05/14] text_or_raise() on sync Response too

Refs #632
---
 llm/models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llm/models.py b/llm/models.py
index 328de4d1..6243ebe3 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -395,6 +395,9 @@ def text(self) -> str:
         self._force()
         return "".join(self._chunks)
 
+    def text_or_raise(self) -> str:
+        return self.text()
+
     def json(self) -> Optional[Dict[str, Any]]:
         self._force()
         return self.response_json

From fcdac08a07f6dcf257cb8758bfc12b12cdfffbb5 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 12:31:48 -0800
Subject: [PATCH 06/14] Release 0.18

Refs #507, #600, #603, #608, #611, #612, #614
---
 docs/changelog.md | 13 +++++++++++++
 setup.py          |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 4651d8b9..4440bd55 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,18 @@
 # Changelog
 
+(v0_18)=
+## 0.18 (2024-11-17)
+
+- Initial support for async models. Plugins can now provide an `AsyncModel` subclass that can be accessed in the Python API using the new `llm.get_async_model(model_id)` method. See {ref}`async models in the Python API docs<python-api-async>` and {ref}`implementing async models in plugins <advanced-model-plugins-async>`. [#507](https://github.com/simonw/llm/issues/507)
+- OpenAI models all now include async models, so function calls such as `llm.get_async_model("gpt-4o-mini")` will return an async model.
+- `gpt-4o-audio-preview` model can be used to send audio attachments to the GPT-4o audio model. [#608](https://github.com/simonw/llm/issues/608)
+- Attachments can now be sent without requiring a prompt. [#611](https://github.com/simonw/llm/issues/611)
+- `llm models --options` now includes information on whether a model supports attachments. [#612](https://github.com/simonw/llm/issues/612)
+- `llm models --async` shows available async models.
+- Custom OpenAI-compatible models can now be marked as `can_stream: false` in the YAML if they do not support streaming. Thanks, [Chris Mungall](https://github.com/cmungall). [#600](https://github.com/simonw/llm/pull/600)
+- Fixed bug where OpenAI usage data was incorrectly serialized to JSON. [#614](https://github.com/simonw/llm/issues/614)
+- Standardized on `audio/wav` MIME type for audio attachments rather than `audio/wave`. [#603](https://github.com/simonw/llm/issues/603)
+
 (v0_18a1)=
 ## 0.18a1 (2024-11-14)
 
diff --git a/setup.py b/setup.py
index 15617e74..63bfc1e1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 import os
 
-VERSION = "0.18a1"
+VERSION = "0.18"
 
 
 def get_long_description():

From 5a51fe05231de33c749b2f7c251e6c983f3bd7ac Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Mon, 18 Nov 2024 05:21:08 +0000
Subject: [PATCH 07/14] Ran cog

---
 docs/help.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/docs/help.md b/docs/help.md
index 9db540a3..157897de 100644
--- a/docs/help.md
+++ b/docs/help.md
@@ -71,6 +71,7 @@ Commands:
   embed         Embed text and store or return the result
   embed-models  Manage available embedding models
   embed-multi   Store embeddings for multiple strings at once
+  fragments     Manage fragments
   install       Install packages from PyPI into the same environment as LLM
   keys          Manage stored API keys for different models
   logs          Tools for exploring logged prompts and responses
@@ -112,6 +113,8 @@ Options:
   --at, --attachment-type <TEXT TEXT>...
                                   Attachment with explicit mimetype
   -o, --option <TEXT TEXT>...     key/value options for the model
+  -f, --fragment TEXT             Fragment to add to prompt
+  --sf, --system-fragment TEXT    Fragment to add to system prompt
   -t, --template TEXT             Template to use
   -p, --param <TEXT TEXT>...      Parameters for template
   --no-stream                     Do not stream output
@@ -469,6 +472,66 @@ Options:
   --help  Show this message and exit.
 ```
 
+(help-fragments)=
+### llm fragments --help
+```
+Usage: llm fragments [OPTIONS] COMMAND [ARGS]...
+
+  Manage fragments
+
+Options:
+  --help  Show this message and exit.
+
+Commands:
+  list*   List current fragments
+  remove  Remove a fragment alias
+  set     Set an alias for a fragment
+```
+
+(help-fragments-list)=
+#### llm fragments list --help
+```
+Usage: llm fragments list [OPTIONS]
+
+  List current fragments
+
+Options:
+  --json  Output as JSON
+  --help  Show this message and exit.
+```
+
+(help-fragments-set)=
+#### llm fragments set --help
+```
+Usage: llm fragments set [OPTIONS] ALIAS FRAGMENT
+
+  Set an alias for a fragment
+
+  Accepts an alias and a file path, URL or '-' for stdin
+
+  Example usage:
+
+      llm fragments set docs ./docs.md
+
+Options:
+  --help  Show this message and exit.
+```
+
+(help-fragments-remove)=
+#### llm fragments remove --help
+```
+Usage: llm fragments remove [OPTIONS] ALIAS
+
+  Remove a fragment alias
+
+  Example usage:
+
+      llm fragments remove docs
+
+Options:
+  --help  Show this message and exit.
+```
+
 (help-plugins)=
 ### llm plugins --help
 ```

From dbc1df1c5b6aaecc10366ae935a0524a34bb4054 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 21:28:06 -0800
Subject: [PATCH 08/14] Don't hide exceptions during test runs

---
 llm/cli.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llm/cli.py b/llm/cli.py
index 1e766c7d..392ab411 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -468,7 +468,10 @@ async def inner():
             else:
                 print(response.text())
     except Exception as ex:
-        raise click.ClickException(str(ex))
+        if sys._called_from_test:
+            raise
+        else:
+            raise click.ClickException(str(ex))
 
     # Log to the database
     if (logs_on() or log) and not no_log and not async_:

From 4cd811d787679b35cb0f2340db1a9045c00a1904 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 21:28:29 -0800
Subject: [PATCH 09/14] Fix for error on self._prompt is None

---
 llm/models.py                   |  2 +-
 tests/test_cli_openai_models.py |  4 ++--
 tests/test_llm.py               | 37 ++++++++++++++++-----------------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/llm/models.py b/llm/models.py
index 6243ebe3..5ca3874c 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -121,7 +121,7 @@ def __init__(
 
     @property
     def prompt(self):
-        return "\n".join(self.fragments + [self._prompt])
+        return "\n".join(self.fragments + ([self._prompt] if self._prompt else []))
 
     @property
     def system(self):
diff --git a/tests/test_cli_openai_models.py b/tests/test_cli_openai_models.py
index 7cbab726..0cff22a9 100644
--- a/tests/test_cli_openai_models.py
+++ b/tests/test_cli_openai_models.py
@@ -140,6 +140,6 @@ def test_only_gpt4_audio_preview_allows_mp3_or_wav(httpx_mock, model, filetype):
     else:
         assert result.exit_code == 1
         long = "audio/mpeg" if filetype == "mp3" else "audio/wav"
-        assert (
-            f"This model does not support attachments of type '{long}'" in result.output
+        assert f"This model does not support attachments of type '{long}'" in str(
+            result
         )
diff --git a/tests/test_llm.py b/tests/test_llm.py
index 0e54cc91..e8fc7e5e 100644
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@@ -363,25 +363,24 @@ def test_openai_completion(mocked_openai_completion, user_path):
 
 def test_openai_completion_system_prompt_error():
     runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            "-m",
-            "gpt-3.5-turbo-instruct",
-            "Say this is a test",
-            "--no-stream",
-            "--key",
-            "x",
-            "--system",
-            "system prompts not allowed",
-        ],
-        catch_exceptions=False,
-    )
-    assert result.exit_code == 1
-    assert (
-        result.output
-        == "Error: System prompts are not supported for OpenAI completion models\n"
-    )
+    with pytest.raises(NotImplementedError) as ex:
+        runner.invoke(
+            cli,
+            [
+                "-m",
+                "gpt-3.5-turbo-instruct",
+                "Say this is a test",
+                "--no-stream",
+                "--key",
+                "x",
+                "--system",
+                "system prompts not allowed",
+            ],
+            catch_exceptions=False,
+        )
+        assert "System prompts are not supported for OpenAI completion models" in str(
+            ex
+        )
 
 
 def test_openai_completion_logprobs_stream(

From 05b86e578551b8211987f4a45e223091e199ebc4 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 21:36:06 -0800
Subject: [PATCH 10/14] Removed duplicate text_or_raise method

---
 llm/models.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llm/models.py b/llm/models.py
index 5ca3874c..e3cbf7bd 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -410,9 +410,6 @@ def datetime_utc(self) -> str:
         self._force()
         return self._start_utcnow.isoformat() if self._start_utcnow else ""
 
-    def text_or_raise(self) -> str:
-        return self.text()
-
     def __iter__(self) -> Iterator[str]:
         self._start = time.monotonic()
         self._start_utcnow = datetime.datetime.utcnow()

From 9b2d4d6256eb82a6a3f67d0e6300f8557778a4c9 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 21:36:49 -0800
Subject: [PATCH 11/14] mypy fix

---
 llm/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm/cli.py b/llm/cli.py
index 392ab411..934dd353 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -58,7 +58,7 @@
 DEFAULT_TEMPLATE = "prompt: "
 
 
-def resolve_fragments(fragments: Iterable[str]) -> List[Tuple[str, str]]:
+def resolve_fragments(fragments: Iterable[str]) -> List[FragmentString]:
     """
     Resolve fragments into a list of (content, source) tuples
     """

From 93e8f44feef30ba7fa1b9be8c5cbb4aa9005ede0 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 21:59:23 -0800
Subject: [PATCH 12/14] Fragment aliases work now, refs #617

---
 llm/cli.py | 65 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 13 deletions(-)

diff --git a/llm/cli.py b/llm/cli.py
index 934dd353..23b6fac3 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -58,10 +58,33 @@
 DEFAULT_TEMPLATE = "prompt: "
 
 
-def resolve_fragments(fragments: Iterable[str]) -> List[FragmentString]:
+class FragmentNotFound(Exception):
+    pass
+
+
+def resolve_fragments(
+    db: sqlite_utils.Database, fragments: Iterable[str]
+) -> List[FragmentString]:
     """
     Resolve fragments into a list of (content, source) tuples
     """
+
+    def _load_by_alias(fragment):
+        rows = list(
+            db.query(
+                """
+                select content, source from fragments
+                join fragment_aliases on fragments.id = fragment_aliases.fragment_id
+                where alias = :alias
+                """,
+                {"alias": fragment},
+            )
+        )
+        if rows:
+            row = rows[0]
+            return row["content"], row["source"]
+        return None, None
+
     # These can be URLs or paths
     resolved = []
     for fragment in fragments:
@@ -71,11 +94,20 @@ def resolve_fragments(fragments: Iterable[str]) -> List[FragmentString]:
             resolved.append(FragmentString(response.text, fragment))
         elif fragment == "-":
             resolved.append(FragmentString(sys.stdin.read(), "-"))
-        elif pathlib.Path(fragment).exists():
-            path = pathlib.Path(fragment)
-            resolved.append(FragmentString(path.read_text(), str(path.resolve())))
         else:
-            raise click.ClickException(f"Fragment {fragment} not found")
+            # Try from the DB
+            content, source = _load_by_alias(fragment)
+            if content is not None:
+                resolved.append(FragmentString(content, source))
+            else:
+                # Now try path
+                path = pathlib.Path(fragment)
+                if path.exists():
+                    resolved.append(
+                        FragmentString(path.read_text(), str(path.resolve()))
+                    )
+                else:
+                    raise FragmentNotFound(f"Fragment '{fragment}' not found")
     return resolved
 
 
@@ -284,6 +316,11 @@ def prompt(
 
     model_aliases = get_model_aliases()
 
+    log_path = logs_db_path()
+    (log_path.parent).mkdir(parents=True, exist_ok=True)
+    db = sqlite_utils.Database(log_path)
+    migrate(db)
+
     def read_prompt():
         nonlocal prompt
 
@@ -416,8 +453,11 @@ def read_prompt():
 
     prompt = read_prompt()
 
-    fragments = resolve_fragments(fragments)
-    system_fragments = resolve_fragments(system_fragments)
+    try:
+        fragments = resolve_fragments(db, fragments)
+        system_fragments = resolve_fragments(db, system_fragments)
+    except FragmentNotFound as ex:
+        raise click.ClickException(str(ex))
 
     prompt_method = model.prompt
     if conversation:
@@ -474,11 +514,7 @@ async def inner():
             raise click.ClickException(str(ex))
 
     # Log to the database
-    if (logs_on() or log) and not no_log and not async_:
-        log_path = logs_db_path()
-        (log_path.parent).mkdir(parents=True, exist_ok=True)
-        db = sqlite_utils.Database(log_path)
-        migrate(db)
+    if (logs_on() or log) and not no_log:
         response.log_to_db(db)
 
 
@@ -1291,8 +1327,11 @@ def fragments_set(alias, fragment):
     \b
         llm fragments set docs ./docs.md
     """
-    resolved = resolve_fragments([fragment])[0]
     db = sqlite_utils.Database(logs_db_path())
+    try:
+        resolved = resolve_fragments(db, [fragment])[0]
+    except FragmentNotFound as ex:
+        raise click.ClickException(str(ex))
     migrate(db)
     alias_sql = """
     insert into fragment_aliases (alias, fragment_id)

From deec4f757e78eb67c912589f467f0edb8c171145 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Sun, 17 Nov 2024 22:47:00 -0800
Subject: [PATCH 13/14] Fix for sys._called_from_test attribute error

---
 llm/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llm/cli.py b/llm/cli.py
index 23b6fac3..84ff88a7 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -508,7 +508,7 @@ async def inner():
             else:
                 print(response.text())
     except Exception as ex:
-        if sys._called_from_test:
+        if getattr(sys, "_called_from_test", False):
             raise
         else:
             raise click.ClickException(str(ex))

From bd8596ffd6e07611b47ba659b08b53bbe01c5c05 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Tue, 19 Nov 2024 12:16:33 -0800
Subject: [PATCH 14/14] WIP docs

---
 docs/usage.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/usage.md b/docs/usage.md
index dd44ff10..03cba95c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -102,6 +102,11 @@ cat llm/utils.py | llm -t pytest
 ```
 See {ref}`prompt templates <prompt-templates>` for more.
 
+(fragments)=
+### Fragments
+
+You can use the `-f/--fragment` option to reference fragments of context that you would like to load into your prompt. Fragments can be specified as URLs, file paths or as aliases to previously saved fragments.
+
 (conversation)=
 ### Continuing a conversation