diff --git a/docs/changelog.md b/docs/changelog.md
index 4651d8b9..4440bd55 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,18 @@
 # Changelog
 
+(v0_18)=
+## 0.18 (2024-11-17)
+
+- Initial support for async models. Plugins can now provide an `AsyncModel` subclass that can be accessed in the Python API using the new `llm.get_async_model(model_id)` method. See {ref}`async models in the Python API docs<python-api-async>` and {ref}`implementing async models in plugins <advanced-model-plugins-async>`. [#507](https://github.com/simonw/llm/issues/507)
+- OpenAI models all now include async models, so function calls such as `llm.get_async_model("gpt-4o-mini")` will return an async model.
+- `gpt-4o-audio-preview` model can be used to send audio attachments to the GPT-4o audio model. [#608](https://github.com/simonw/llm/issues/608)
+- Attachments can now be sent without requiring a prompt. [#611](https://github.com/simonw/llm/issues/611)
+- `llm models --options` now includes information on whether a model supports attachments. [#612](https://github.com/simonw/llm/issues/612)
+- `llm models --async` shows available async models.
+- Custom OpenAI-compatible models can now be marked as `can_stream: false` in the YAML if they do not support streaming. Thanks, [Chris Mungall](https://github.com/cmungall). [#600](https://github.com/simonw/llm/pull/600)
+- Fixed bug where OpenAI usage data was incorrectly serialized to JSON. [#614](https://github.com/simonw/llm/issues/614)
+- Standardized on `audio/wav` MIME type for audio attachments rather than `audio/wave`. [#603](https://github.com/simonw/llm/issues/603)
+
 (v0_18a1)=
 ## 0.18a1 (2024-11-14)
 
diff --git a/docs/help.md b/docs/help.md
index 9db540a3..157897de 100644
--- a/docs/help.md
+++ b/docs/help.md
@@ -71,6 +71,7 @@ Commands:
   embed         Embed text and store or return the result
   embed-models  Manage available embedding models
   embed-multi   Store embeddings for multiple strings at once
+  fragments     Manage fragments
   install       Install packages from PyPI into the same environment as LLM
   keys          Manage stored API keys for different models
   logs          Tools for exploring logged prompts and responses
@@ -112,6 +113,8 @@ Options:
   --at, --attachment-type <TEXT TEXT>...
                                   Attachment with explicit mimetype
   -o, --option <TEXT TEXT>...     key/value options for the model
+  -f, --fragment TEXT             Fragment to add to prompt
+  --sf, --system-fragment TEXT    Fragment to add to system prompt
   -t, --template TEXT             Template to use
   -p, --param <TEXT TEXT>...      Parameters for template
   --no-stream                     Do not stream output
@@ -469,6 +472,66 @@ Options:
   --help  Show this message and exit.
 ```
 
+(help-fragments)=
+### llm fragments --help
+```
+Usage: llm fragments [OPTIONS] COMMAND [ARGS]...
+
+  Manage fragments
+
+Options:
+  --help  Show this message and exit.
+
+Commands:
+  list*   List current fragments
+  remove  Remove a fragment alias
+  set     Set an alias for a fragment
+```
+
+(help-fragments-list)=
+#### llm fragments list --help
+```
+Usage: llm fragments list [OPTIONS]
+
+  List current fragments
+
+Options:
+  --json  Output as JSON
+  --help  Show this message and exit.
+```
+
+(help-fragments-set)=
+#### llm fragments set --help
+```
+Usage: llm fragments set [OPTIONS] ALIAS FRAGMENT
+
+  Set an alias for a fragment
+
+  Accepts an alias and a file path, URL or '-' for stdin
+
+  Example usage:
+
+      llm fragments set docs ./docs.md
+
+Options:
+  --help  Show this message and exit.
+```
+
+(help-fragments-remove)=
+#### llm fragments remove --help
+```
+Usage: llm fragments remove [OPTIONS] ALIAS
+
+  Remove a fragment alias
+
+  Example usage:
+
+      llm fragments remove docs
+
+Options:
+  --help  Show this message and exit.
+```
+
 (help-plugins)=
 ### llm plugins --help
 ```
diff --git a/docs/usage.md b/docs/usage.md
index dd44ff10..03cba95c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -102,6 +102,11 @@ cat llm/utils.py | llm -t pytest
 ```
 See {ref}`prompt templates <prompt-templates>` for more.
 
+(fragments)=
+### Fragments
+
+You can use the `-f/--fragment` option to reference fragments of context that you would like to load into your prompt. Fragments can be specified as URLs, file paths or as aliases to previously saved fragments.
+
 (conversation)=
 ### Continuing a conversation
 
diff --git a/llm/cli.py b/llm/cli.py
index 5a9f20b4..84ff88a7 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -32,7 +32,12 @@
 
 from .migrations import migrate
 from .plugins import pm, load_plugins
-from .utils import mimetype_from_path, mimetype_from_string
+from .utils import (
+    FragmentString,
+    ensure_fragment,
+    mimetype_from_path,
+    mimetype_from_string,
+)
 import base64
 import httpx
 import pathlib
@@ -44,7 +49,7 @@
 from sqlite_utils.utils import rows_from_file, Format
 import sys
 import textwrap
-from typing import cast, Optional, Iterable, Union, Tuple
+from typing import cast, Optional, Iterable, List, Union, Tuple
 import warnings
 import yaml
 
@@ -53,6 +58,59 @@
 DEFAULT_TEMPLATE = "prompt: "
 
 
+class FragmentNotFound(Exception):
+    pass
+
+
+def resolve_fragments(
+    db: sqlite_utils.Database, fragments: Iterable[str]
+) -> List[FragmentString]:
+    """
+    Resolve fragments into a list of (content, source) tuples
+    """
+
+    def _load_by_alias(fragment):
+        rows = list(
+            db.query(
+                """
+                select content, source from fragments
+                join fragment_aliases on fragments.id = fragment_aliases.fragment_id
+                where alias = :alias
+                """,
+                {"alias": fragment},
+            )
+        )
+        if rows:
+            row = rows[0]
+            return row["content"], row["source"]
+        return None, None
+
+    # These can be URLs or paths
+    resolved = []
+    for fragment in fragments:
+        if fragment.startswith("http://") or fragment.startswith("https://"):
+            response = httpx.get(fragment, follow_redirects=True)
+            response.raise_for_status()
+            resolved.append(FragmentString(response.text, fragment))
+        elif fragment == "-":
+            resolved.append(FragmentString(sys.stdin.read(), "-"))
+        else:
+            # Try from the DB
+            content, source = _load_by_alias(fragment)
+            if content is not None:
+                resolved.append(FragmentString(content, source))
+            else:
+                # Now try path
+                path = pathlib.Path(fragment)
+                if path.exists():
+                    resolved.append(
+                        FragmentString(path.read_text(), str(path.resolve()))
+                    )
+                else:
+                    raise FragmentNotFound(f"Fragment '{fragment}' not found")
+    return resolved
+
+
 class AttachmentType(click.ParamType):
     name = "attachment"
 
@@ -174,6 +232,16 @@ def cli():
     multiple=True,
     help="key/value options for the model",
 )
+@click.option(
+    "fragments", "-f", "--fragment", multiple=True, help="Fragment to add to prompt"
+)
+@click.option(
+    "system_fragments",
+    "--sf",
+    "--system-fragment",
+    multiple=True,
+    help="Fragment to add to system prompt",
+)
 @click.option("-t", "--template", help="Template to use")
 @click.option(
     "-p",
@@ -209,6 +277,8 @@ def prompt(
     attachments,
     attachment_types,
     options,
+    fragments,
+    system_fragments,
     template,
     param,
     no_stream,
@@ -246,6 +316,11 @@ def prompt(
 
     model_aliases = get_model_aliases()
 
+    log_path = logs_db_path()
+    (log_path.parent).mkdir(parents=True, exist_ok=True)
+    db = sqlite_utils.Database(log_path)
+    migrate(db)
+
     def read_prompt():
         nonlocal prompt
 
@@ -266,6 +341,7 @@ def read_prompt():
             and sys.stdin.isatty()
             and not attachments
             and not attachment_types
+            and not fragments
         ):
             # Hang waiting for input to stdin (unless --save)
             prompt = sys.stdin.read()
@@ -377,6 +453,12 @@ def read_prompt():
 
     prompt = read_prompt()
 
+    try:
+        fragments = resolve_fragments(db, fragments)
+        system_fragments = resolve_fragments(db, system_fragments)
+    except FragmentNotFound as ex:
+        raise click.ClickException(str(ex))
+
     prompt_method = model.prompt
     if conversation:
         prompt_method = conversation.prompt
@@ -388,8 +470,10 @@ async def inner():
                 if should_stream:
                     async for chunk in prompt_method(
                         prompt,
+                        fragments=fragments,
                         attachments=resolved_attachments,
                         system=system,
+                        system_fragments=system_fragments,
                         **validated_options,
                     ):
                         print(chunk, end="")
@@ -398,8 +482,10 @@ async def inner():
                 else:
                     response = prompt_method(
                         prompt,
+                        fragments=fragments,
                         attachments=resolved_attachments,
                         system=system,
+                        system_fragments=system_fragments,
                         **validated_options,
                     )
                     print(await response.text())
@@ -408,8 +494,10 @@ async def inner():
         else:
             response = prompt_method(
                 prompt,
+                fragments=fragments,
                 attachments=resolved_attachments,
                 system=system,
+                system_fragments=system_fragments,
                 **validated_options,
             )
             if should_stream:
@@ -420,14 +508,13 @@ async def inner():
             else:
                 print(response.text())
     except Exception as ex:
-        raise click.ClickException(str(ex))
+        if getattr(sys, "_called_from_test", False):
+            raise
+        else:
+            raise click.ClickException(str(ex))
 
     # Log to the database
-    if (logs_on() or log) and not no_log and not async_:
-        log_path = logs_db_path()
-        (log_path.parent).mkdir(parents=True, exist_ok=True)
-        db = sqlite_utils.Database(log_path)
-        migrate(db)
+    if (logs_on() or log) and not no_log:
         response.log_to_db(db)
 
 
@@ -1187,6 +1274,95 @@ def aliases_path():
     click.echo(user_dir() / "aliases.json")
 
 
+@cli.group(
+    cls=DefaultGroup,
+    default="list",
+    default_if_no_args=True,
+)
+def fragments():
+    "Manage fragments"
+
+
+@fragments.command(name="list")
+@click.option("json_", "--json", is_flag=True, help="Output as JSON")
+def fragments_list(json_):
+    "List current fragments"
+    db = sqlite_utils.Database(logs_db_path())
+    migrate(db)
+    sql = """
+    select
+        fragments.id,
+        fragments.hash,
+        fragments.content,
+        fragments.datetime_utc,
+        fragments.source,
+        json_group_array(fragment_aliases.alias) filter (
+            where
+            fragment_aliases.alias is not null
+        ) as aliases
+    from
+        fragments
+    left join
+        fragment_aliases on fragment_aliases.fragment_id = fragments.id
+    group by
+        fragments.id, fragments.hash, fragments.content, fragments.datetime_utc, fragments.source;
+    """
+    results = list(db.query(sql))
+    for result in results:
+        result["aliases"] = json.loads(result["aliases"])
+    click.echo(json.dumps(results, indent=4))
+
+
+@fragments.command(name="set")
+@click.argument("alias")
+@click.argument("fragment")
+def fragments_set(alias, fragment):
+    """
+    Set an alias for a fragment
+
+    Accepts an alias and a file path, URL or '-' for stdin
+
+    Example usage:
+
+    \b
+        llm fragments set docs ./docs.md
+    """
+    db = sqlite_utils.Database(logs_db_path())
+    try:
+        resolved = resolve_fragments(db, [fragment])[0]
+    except FragmentNotFound as ex:
+        raise click.ClickException(str(ex))
+    migrate(db)
+    alias_sql = """
+    insert into fragment_aliases (alias, fragment_id)
+    values (:alias, :fragment_id)
+    on conflict(alias) do update set
+        fragment_id = excluded.fragment_id;
+    """
+    with db.conn:
+        fragment_id = ensure_fragment(db, resolved)
+        db.conn.execute(alias_sql, {"alias": alias, "fragment_id": fragment_id})
+
+
+@fragments.command(name="remove")
+@click.argument("alias")
+def fragments_remove(alias):
+    """
+    Remove a fragment alias
+
+    Example usage:
+
+    \b
+        llm fragments remove docs
+    """
+    db = sqlite_utils.Database(logs_db_path())
+    migrate(db)
+    with db.conn:
+        db.conn.execute(
+            "delete from fragment_aliases where alias = :alias", {"alias": alias}
+        )
+
+
 @cli.command(name="plugins")
 @click.option("--all", help="Include built-in default plugins", is_flag=True)
 def plugins_list(all):
diff --git a/llm/migrations.py b/llm/migrations.py
index 91da6429..eb607422 100644
--- a/llm/migrations.py
+++ b/llm/migrations.py
@@ -227,3 +227,50 @@ def m012_attachments_tables(db):
         ),
         pk=("response_id", "attachment_id"),
     )
+
+
+@migration
+def m013_fragments_tables(db):
+    db["fragments"].create(
+        {
+            "id": int,
+            "hash": str,
+            "content": str,
+            "datetime_utc": str,
+            "source": str,
+        },
+        pk="id",
+    )
+    db["fragments"].create_index(["hash"], unique=True)
+    db["fragment_aliases"].create(
+        {
+            "alias": str,
+            "fragment_id": int,
+        },
+        foreign_keys=(("fragment_id", "fragments", "id"),),
+        pk="alias",
+    )
+    db["prompt_fragments"].create(
+        {
+            "response_id": str,
+            "fragment_id": int,
+            "order": int,
+        },
+        foreign_keys=(
+            ("response_id", "responses", "id"),
+            ("fragment_id", "fragments", "id"),
+        ),
+        pk=("response_id", "fragment_id"),
+    )
+    db["system_fragments"].create(
+        {
+            "response_id": str,
+            "fragment_id": int,
+            "order": int,
+        },
+        foreign_keys=(
+            ("response_id", "responses", "id"),
+            ("fragment_id", "fragments", "id"),
+        ),
+        pk=("response_id", "fragment_id"),
+    )
diff --git a/llm/models.py b/llm/models.py
index f5c8fd3b..e3cbf7bd 100644
--- a/llm/models.py
+++ b/llm/models.py
@@ -18,7 +18,7 @@
     Set,
     Union,
 )
-from .utils import mimetype_from_path, mimetype_from_string
+from .utils import ensure_fragment, mimetype_from_path, mimetype_from_string
 from abc import ABC, abstractmethod
 import json
 from pydantic import BaseModel
@@ -89,10 +89,12 @@ def from_row(cls, row):
 
 @dataclass
 class Prompt:
-    prompt: str
+    _prompt: str
     model: "Model"
+    fragments: Optional[List[str]]
     attachments: Optional[List[Attachment]]
-    system: Optional[str]
+    _system: Optional[str]
+    system_fragments: Optional[List[str]]
     prompt_json: Optional[str]
     options: "Options"
 
@@ -101,18 +103,54 @@ def __init__(
         prompt,
         model,
         *,
+        fragments=None,
         attachments=None,
         system=None,
+        system_fragments=None,
         prompt_json=None,
         options=None,
     ):
-        self.prompt = prompt
+        self._prompt = prompt
         self.model = model
         self.attachments = list(attachments or [])
-        self.system = system
+        self.fragments = fragments or []
+        self._system = system
+        self.system_fragments = system_fragments or []
         self.prompt_json = prompt_json
         self.options = options or {}
 
+    @property
+    def prompt(self):
+        return "\n".join(self.fragments + ([self._prompt] if self._prompt else []))
+
+    @property
+    def system(self):
+        bits = [
+            bit.strip()
+            for bit in (self.system_fragments + [self._system or ""])
+            if bit.strip()
+        ]
+        return "\n\n".join(bits)
+
+    @classmethod
+    def from_row(cls, db, row, model):
+        all_fragments = list(db.query(FRAGMENT_SQL, {"response_id": row["id"]}))
+        fragments = [
+            row["content"] for row in all_fragments if row["fragment_type"] == "prompt"
+        ]
+        system_fragments = [
+            row["content"] for row in all_fragments if row["fragment_type"] == "system"
+        ]
+        return cls(
+            prompt=row["prompt"],
+            model=model,
+            fragments=fragments,
+            attachments=[],
+            system=row["system"],
+            system_fragments=system_fragments,
+            options=model.Options(**json.loads(row["options_json"])),
+        )
+
 
 @dataclass
 class _BaseConversation:
@@ -138,8 +176,10 @@ def prompt(
         self,
         prompt: Optional[str],
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> "Response":
@@ -147,8 +187,10 @@ def prompt(
             Prompt(
                 prompt,
                 model=self.model,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 options=self.model.Options(**options),
             ),
             self.model,
@@ -163,8 +205,10 @@ def prompt(
         self,
         prompt: Optional[str],
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> "AsyncResponse":
@@ -172,8 +216,10 @@ def prompt(
             Prompt(
                 prompt,
                 model=self.model,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 options=self.model.Options(**options),
             ),
             self.model,
@@ -182,6 +228,26 @@ def prompt(
         )
 
 
+FRAGMENT_SQL = """
+select
+    'prompt' as fragment_type,
+    fragments.content,
+    pf."order" as ord
+from prompt_fragments pf
+join fragments on pf.fragment_id = fragments.id
+where pf.response_id = :response_id
+union all
+select
+    'system' as fragment_type,
+    fragments.content,
+    sf."order" as ord
+from system_fragments sf
+join fragments on sf.fragment_id = fragments.id
+where sf.response_id = :response_id
+order by fragment_type desc, ord asc;
+"""
+
+
 class _BaseResponse:
     """Base response class shared between sync and async responses"""
 
@@ -217,13 +283,7 @@ def from_row(cls, db, row):
 
         response = cls(
             model=model,
-            prompt=Prompt(
-                prompt=row["prompt"],
-                model=model,
-                attachments=[],
-                system=row["system"],
-                options=model.Options(**json.loads(row["options_json"])),
-            ),
+            prompt=Prompt.from_row(db, row, model),
             stream=False,
         )
         response.id = row["id"]
@@ -233,8 +293,8 @@ def from_row(cls, db, row):
         response._chunks = [row["response"]]
         # Attachments
         response.attachments = [
-            Attachment.from_row(arow)
-            for arow in db.query(
+            Attachment.from_row(attachment_row)
+            for attachment_row in db.query(
                 """
                 select attachments.* from attachments
                 join prompt_attachments on attachments.id = prompt_attachments.attachment_id
@@ -264,8 +324,8 @@ def log_to_db(self, db):
         response = {
             "id": response_id,
             "model": self.model.model_id,
-            "prompt": self.prompt.prompt,
-            "system": self.prompt.system,
+            "prompt": self.prompt._prompt,
+            "system": self.prompt._system,
             "prompt_json": self._prompt_json,
             "options_json": {
                 key: value
@@ -279,6 +339,25 @@ def log_to_db(self, db):
             "datetime_utc": self.datetime_utc(),
         }
         db["responses"].insert(response)
+        # Persist any fragments
+        for i, fragment in enumerate(self.prompt.fragments):
+            fragment_id = ensure_fragment(db, fragment)
+            db["prompt_fragments"].insert(
+                {
+                    "response_id": response_id,
+                    "fragment_id": fragment_id,
+                    "order": i,
+                },
+            )
+        for i, fragment in enumerate(self.prompt.system_fragments):
+            fragment_id = ensure_fragment(db, fragment)
+            db["system_fragments"].insert(
+                {
+                    "response_id": response_id,
+                    "fragment_id": fragment_id,
+                    "order": i,
+                },
+            )
         # Persist any attachments - loop through with index
         for index, attachment in enumerate(self.prompt.attachments):
             attachment_id = attachment.id()
@@ -316,6 +395,9 @@ def text(self) -> str:
         self._force()
         return "".join(self._chunks)
 
+    def text_or_raise(self) -> str:
+        return self.text()
+
     def json(self) -> Optional[Dict[str, Any]]:
         self._force()
         return self.response_json
@@ -541,8 +623,10 @@ def prompt(
         self,
         prompt: str,
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> Response:
@@ -550,8 +634,10 @@ def prompt(
         return Response(
             Prompt(
                 prompt,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 model=self,
                 options=self.Options(**options),
             ),
@@ -578,8 +664,10 @@ def prompt(
         self,
         prompt: str,
         *,
+        fragments: Optional[List[str]] = None,
         attachments: Optional[List[Attachment]] = None,
         system: Optional[str] = None,
+        system_fragments: Optional[List[str]] = None,
         stream: bool = True,
         **options,
     ) -> AsyncResponse:
@@ -587,8 +675,10 @@ def prompt(
         return AsyncResponse(
             Prompt(
                 prompt,
+                fragments=fragments,
                 attachments=attachments,
                 system=system,
+                system_fragments=system_fragments,
                 model=self,
                 options=self.Options(**options),
             ),
diff --git a/llm/utils.py b/llm/utils.py
index d2618dd4..94db09d7 100644
--- a/llm/utils.py
+++ b/llm/utils.py
@@ -1,4 +1,5 @@
 import click
+import hashlib
 import httpx
 import json
 import puremagic
@@ -10,6 +11,22 @@
 }
 
 
+class FragmentString(str):
+    def __new__(cls, content, source):
+        # We need to use __new__ since str is immutable
+        instance = super().__new__(cls, content)
+        return instance
+
+    def __init__(self, content, source):
+        self.source = source
+
+    def __str__(self):
+        return super().__str__()
+
+    def __repr__(self):
+        return super().__repr__()
+
+
 def mimetype_from_string(content) -> Optional[str]:
     try:
         type_ = puremagic.from_string(content, mime=True)
@@ -127,3 +144,20 @@ def logging_client() -> httpx.Client:
         transport=_LogTransport(httpx.HTTPTransport()),
         event_hooks={"request": [_no_accept_encoding], "response": [_log_response]},
     )
+
+
+def ensure_fragment(db, content):
+    sql = """
+    insert into fragments (hash, content, datetime_utc, source)
+    values (:hash, :content, datetime('now'), :source)
+    on conflict(hash) do nothing
+    """
+    hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
+    source = None
+    if isinstance(content, FragmentString):
+        source = content.source
+    with db.conn:
+        db.execute(sql, {"hash": hash, "content": content, "source": source})
+        return list(
+            db.query("select id from fragments where hash = :hash", {"hash": hash})
+        )[0]["id"]
diff --git a/setup.py b/setup.py
index 15617e74..63bfc1e1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 import os
 
-VERSION = "0.18a1"
+VERSION = "0.18"
 
 
 def get_long_description():
diff --git a/tests/test_cli_openai_models.py b/tests/test_cli_openai_models.py
index 7cbab726..0cff22a9 100644
--- a/tests/test_cli_openai_models.py
+++ b/tests/test_cli_openai_models.py
@@ -140,6 +140,6 @@ def test_only_gpt4_audio_preview_allows_mp3_or_wav(httpx_mock, model, filetype):
     else:
         assert result.exit_code == 1
         long = "audio/mpeg" if filetype == "mp3" else "audio/wav"
-        assert (
-            f"This model does not support attachments of type '{long}'" in result.output
+        assert f"This model does not support attachments of type '{long}'" in str(
+            result
         )
diff --git a/tests/test_llm.py b/tests/test_llm.py
index 0e54cc91..e8fc7e5e 100644
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@@ -363,25 +363,24 @@ def test_openai_completion(mocked_openai_completion, user_path):
 
 def test_openai_completion_system_prompt_error():
     runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        [
-            "-m",
-            "gpt-3.5-turbo-instruct",
-            "Say this is a test",
-            "--no-stream",
-            "--key",
-            "x",
-            "--system",
-            "system prompts not allowed",
-        ],
-        catch_exceptions=False,
-    )
-    assert result.exit_code == 1
-    assert (
-        result.output
-        == "Error: System prompts are not supported for OpenAI completion models\n"
-    )
+    with pytest.raises(NotImplementedError) as ex:
+        runner.invoke(
+            cli,
+            [
+                "-m",
+                "gpt-3.5-turbo-instruct",
+                "Say this is a test",
+                "--no-stream",
+                "--key",
+                "x",
+                "--system",
+                "system prompts not allowed",
+            ],
+            catch_exceptions=False,
+        )
+        assert "System prompts are not supported for OpenAI completion models" in str(
+            ex
+        )
 
 
 def test_openai_completion_logprobs_stream(