diff --git a/docs/changelog.md b/docs/changelog.md index 4651d8b9..4440bd55 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,18 @@ # Changelog +(v0_18)= +## 0.18 (2024-11-17) + +- Initial support for async models. Plugins can now provide an `AsyncModel` subclass that can be accessed in the Python API using the new `llm.get_async_model(model_id)` method. See {ref}`async models in the Python API docs` and {ref}`implementing async models in plugins `. [#507](https://github.com/simonw/llm/issues/507) +- OpenAI models all now include async models, so function calls such as `llm.get_async_model("gpt-4o-mini")` will return an async model. +- `gpt-4o-audio-preview` model can be used to send audio attachments to the GPT-4o audio model. [#608](https://github.com/simonw/llm/issues/608) +- Attachments can now be sent without requiring a prompt. [#611](https://github.com/simonw/llm/issues/611) +- `llm models --options` now includes information on whether a model supports attachments. [#612](https://github.com/simonw/llm/issues/612) +- `llm models --async` shows available async models. +- Custom OpenAI-compatible models can now be marked as `can_stream: false` in the YAML if they do not support streaming. Thanks, [Chris Mungall](https://github.com/cmungall). [#600](https://github.com/simonw/llm/pull/600) +- Fixed bug where OpenAI usage data was incorrectly serialized to JSON. [#614](https://github.com/simonw/llm/issues/614) +- Standardized on `audio/wav` MIME type for audio attachments rather than `audio/wave`. [#603](https://github.com/simonw/llm/issues/603) + (v0_18a1)= ## 0.18a1 (2024-11-14) diff --git a/docs/help.md b/docs/help.md index 9db540a3..157897de 100644 --- a/docs/help.md +++ b/docs/help.md @@ -71,6 +71,7 @@ Commands: embed Embed text and store or return the result embed-models Manage available embedding models embed-multi Store embeddings for multiple strings at once + fragments Manage fragments install Install packages from PyPI into the same environment as LLM keys Manage stored API keys for different models logs Tools for exploring logged prompts and responses @@ -112,6 +113,8 @@ Options: --at, --attachment-type ... Attachment with explicit mimetype -o, --option ... key/value options for the model + -f, --fragment TEXT Fragment to add to prompt + --sf, --system-fragment TEXT Fragment to add to system prompt -t, --template TEXT Template to use -p, --param ... Parameters for template --no-stream Do not stream output @@ -469,6 +472,66 @@ Options: --help Show this message and exit. ``` +(help-fragments)= +### llm fragments --help +``` +Usage: llm fragments [OPTIONS] COMMAND [ARGS]... + + Manage fragments + +Options: + --help Show this message and exit. + +Commands: + list* List current fragments + remove Remove a fragment alias + set Set an alias for a fragment +``` + +(help-fragments-list)= +#### llm fragments list --help +``` +Usage: llm fragments list [OPTIONS] + + List current fragments + +Options: + --json Output as JSON + --help Show this message and exit. +``` + +(help-fragments-set)= +#### llm fragments set --help +``` +Usage: llm fragments set [OPTIONS] ALIAS FRAGMENT + + Set an alias for a fragment + + Accepts an alias and a file path, URL or '-' for stdin + + Example usage: + + llm fragments set docs ./docs.md + +Options: + --help Show this message and exit. +``` + +(help-fragments-remove)= +#### llm fragments remove --help +``` +Usage: llm fragments remove [OPTIONS] ALIAS + + Remove a fragment alias + + Example usage: + + llm fragments remove docs + +Options: + --help Show this message and exit. +``` + (help-plugins)= ### llm plugins --help ``` diff --git a/docs/usage.md b/docs/usage.md index dd44ff10..03cba95c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -102,6 +102,11 @@ cat llm/utils.py | llm -t pytest ``` See {ref}`prompt templates ` for more. +(fragments)= +### Fragments + +You can use the `-f/--fragment` option to reference fragments of context that you would like to load into your prompt. Fragments can be specified as URLs, file paths or as aliases to previously saved fragments. + (conversation)= ### Continuing a conversation diff --git a/llm/cli.py b/llm/cli.py index 5a9f20b4..84ff88a7 100644 --- a/llm/cli.py +++ b/llm/cli.py @@ -32,7 +32,12 @@ from .migrations import migrate from .plugins import pm, load_plugins -from .utils import mimetype_from_path, mimetype_from_string +from .utils import ( + FragmentString, + ensure_fragment, + mimetype_from_path, + mimetype_from_string, +) import base64 import httpx import pathlib @@ -44,7 +49,7 @@ from sqlite_utils.utils import rows_from_file, Format import sys import textwrap -from typing import cast, Optional, Iterable, Union, Tuple +from typing import cast, Optional, Iterable, List, Union, Tuple import warnings import yaml @@ -53,6 +58,59 @@ DEFAULT_TEMPLATE = "prompt: " +class FragmentNotFound(Exception): + pass + + +def resolve_fragments( + db: sqlite_utils.Database, fragments: Iterable[str] +) -> List[FragmentString]: + """ + Resolve fragments into a list of (content, source) tuples + """ + + def _load_by_alias(fragment): + rows = list( + db.query( + """ + select content, source from fragments + join fragment_aliases on fragments.id = fragment_aliases.fragment_id + where alias = :alias + """, + {"alias": fragment}, + ) + ) + if rows: + row = rows[0] + return row["content"], row["source"] + return None, None + + # These can be URLs or paths + resolved = [] + for fragment in fragments: + if fragment.startswith("http://") or fragment.startswith("https://"): + response = httpx.get(fragment, follow_redirects=True) + response.raise_for_status() + resolved.append(FragmentString(response.text, fragment)) + elif fragment == "-": + resolved.append(FragmentString(sys.stdin.read(), "-")) + else: + # Try from the DB + content, source = _load_by_alias(fragment) + if content is not None: + resolved.append(FragmentString(content, source)) + else: + # Now try path + path = pathlib.Path(fragment) + if path.exists(): + resolved.append( + FragmentString(path.read_text(), str(path.resolve())) + ) + else: + raise FragmentNotFound(f"Fragment '{fragment}' not found") + return resolved + + class AttachmentType(click.ParamType): name = "attachment" @@ -174,6 +232,16 @@ def cli(): multiple=True, help="key/value options for the model", ) +@click.option( + "fragments", "-f", "--fragment", multiple=True, help="Fragment to add to prompt" +) +@click.option( + "system_fragments", + "--sf", + "--system-fragment", + multiple=True, + help="Fragment to add to system prompt", +) @click.option("-t", "--template", help="Template to use") @click.option( "-p", @@ -209,6 +277,8 @@ def prompt( attachments, attachment_types, options, + fragments, + system_fragments, template, param, no_stream, @@ -246,6 +316,11 @@ def prompt( model_aliases = get_model_aliases() + log_path = logs_db_path() + (log_path.parent).mkdir(parents=True, exist_ok=True) + db = sqlite_utils.Database(log_path) + migrate(db) + def read_prompt(): nonlocal prompt @@ -266,6 +341,7 @@ def read_prompt(): and sys.stdin.isatty() and not attachments and not attachment_types + and not fragments ): # Hang waiting for input to stdin (unless --save) prompt = sys.stdin.read() @@ -377,6 +453,12 @@ def read_prompt(): prompt = read_prompt() + try: + fragments = resolve_fragments(db, fragments) + system_fragments = resolve_fragments(db, system_fragments) + except FragmentNotFound as ex: + raise click.ClickException(str(ex)) + prompt_method = model.prompt if conversation: prompt_method = conversation.prompt @@ -388,8 +470,10 @@ async def inner(): if should_stream: async for chunk in prompt_method( prompt, + fragments=fragments, attachments=resolved_attachments, system=system, + system_fragments=system_fragments, **validated_options, ): print(chunk, end="") @@ -398,8 +482,10 @@ async def inner(): else: response = prompt_method( prompt, + fragments=fragments, attachments=resolved_attachments, system=system, + system_fragments=system_fragments, **validated_options, ) print(await response.text()) @@ -408,8 +494,10 @@ async def inner(): else: response = prompt_method( prompt, + fragments=fragments, attachments=resolved_attachments, system=system, + system_fragments=system_fragments, **validated_options, ) if should_stream: @@ -420,14 +508,13 @@ async def inner(): else: print(response.text()) except Exception as ex: - raise click.ClickException(str(ex)) + if getattr(sys, "_called_from_test", False): + raise + else: + raise click.ClickException(str(ex)) # Log to the database - if (logs_on() or log) and not no_log and not async_: - log_path = logs_db_path() - (log_path.parent).mkdir(parents=True, exist_ok=True) - db = sqlite_utils.Database(log_path) - migrate(db) + if (logs_on() or log) and not no_log: response.log_to_db(db) @@ -1187,6 +1274,95 @@ def aliases_path(): click.echo(user_dir() / "aliases.json") +@cli.group( + cls=DefaultGroup, + default="list", + default_if_no_args=True, +) +def fragments(): + "Manage fragments" + + +@fragments.command(name="list") +@click.option("json_", "--json", is_flag=True, help="Output as JSON") +def fragments_list(json_): + "List current fragments" + db = sqlite_utils.Database(logs_db_path()) + migrate(db) + sql = """ + select + fragments.id, + fragments.hash, + fragments.content, + fragments.datetime_utc, + fragments.source, + json_group_array(fragment_aliases.alias) filter ( + where + fragment_aliases.alias is not null + ) as aliases + from + fragments + left join + fragment_aliases on fragment_aliases.fragment_id = fragments.id + group by + fragments.id, fragments.hash, fragments.content, fragments.datetime_utc, fragments.source; + """ + results = list(db.query(sql)) + for result in results: + result["aliases"] = json.loads(result["aliases"]) + click.echo(json.dumps(results, indent=4)) + + +@fragments.command(name="set") +@click.argument("alias") +@click.argument("fragment") +def fragments_set(alias, fragment): + """ + Set an alias for a fragment + + Accepts an alias and a file path, URL or '-' for stdin + + Example usage: + + \b + llm fragments set docs ./docs.md + """ + db = sqlite_utils.Database(logs_db_path()) + try: + resolved = resolve_fragments(db, [fragment])[0] + except FragmentNotFound as ex: + raise click.ClickException(str(ex)) + migrate(db) + alias_sql = """ + insert into fragment_aliases (alias, fragment_id) + values (:alias, :fragment_id) + on conflict(alias) do update set + fragment_id = excluded.fragment_id; + """ + with db.conn: + fragment_id = ensure_fragment(db, resolved) + db.conn.execute(alias_sql, {"alias": alias, "fragment_id": fragment_id}) + + +@fragments.command(name="remove") +@click.argument("alias") +def fragments_remove(alias): + """ + Remove a fragment alias + + Example usage: + + \b + llm fragments remove docs + """ + db = sqlite_utils.Database(logs_db_path()) + migrate(db) + with db.conn: + db.conn.execute( + "delete from fragment_aliases where alias = :alias", {"alias": alias} + ) + + @cli.command(name="plugins") @click.option("--all", help="Include built-in default plugins", is_flag=True) def plugins_list(all): diff --git a/llm/migrations.py b/llm/migrations.py index 91da6429..eb607422 100644 --- a/llm/migrations.py +++ b/llm/migrations.py @@ -227,3 +227,50 @@ def m012_attachments_tables(db): ), pk=("response_id", "attachment_id"), ) + + +@migration +def m013_fragments_tables(db): + db["fragments"].create( + { + "id": int, + "hash": str, + "content": str, + "datetime_utc": str, + "source": str, + }, + pk="id", + ) + db["fragments"].create_index(["hash"], unique=True) + db["fragment_aliases"].create( + { + "alias": str, + "fragment_id": int, + }, + foreign_keys=(("fragment_id", "fragments", "id"),), + pk="alias", + ) + db["prompt_fragments"].create( + { + "response_id": str, + "fragment_id": int, + "order": int, + }, + foreign_keys=( + ("response_id", "responses", "id"), + ("fragment_id", "fragments", "id"), + ), + pk=("response_id", "fragment_id"), + ) + db["system_fragments"].create( + { + "response_id": str, + "fragment_id": int, + "order": int, + }, + foreign_keys=( + ("response_id", "responses", "id"), + ("fragment_id", "fragments", "id"), + ), + pk=("response_id", "fragment_id"), + ) diff --git a/llm/models.py b/llm/models.py index f5c8fd3b..e3cbf7bd 100644 --- a/llm/models.py +++ b/llm/models.py @@ -18,7 +18,7 @@ Set, Union, ) -from .utils import mimetype_from_path, mimetype_from_string +from .utils import ensure_fragment, mimetype_from_path, mimetype_from_string from abc import ABC, abstractmethod import json from pydantic import BaseModel @@ -89,10 +89,12 @@ def from_row(cls, row): @dataclass class Prompt: - prompt: str + _prompt: str model: "Model" + fragments: Optional[List[str]] attachments: Optional[List[Attachment]] - system: Optional[str] + _system: Optional[str] + system_fragments: Optional[List[str]] prompt_json: Optional[str] options: "Options" @@ -101,18 +103,54 @@ def __init__( prompt, model, *, + fragments=None, attachments=None, system=None, + system_fragments=None, prompt_json=None, options=None, ): - self.prompt = prompt + self._prompt = prompt self.model = model self.attachments = list(attachments or []) - self.system = system + self.fragments = fragments or [] + self._system = system + self.system_fragments = system_fragments or [] self.prompt_json = prompt_json self.options = options or {} + @property + def prompt(self): + return "\n".join(self.fragments + ([self._prompt] if self._prompt else [])) + + @property + def system(self): + bits = [ + bit.strip() + for bit in (self.system_fragments + [self._system or ""]) + if bit.strip() + ] + return "\n\n".join(bits) + + @classmethod + def from_row(cls, db, row, model): + all_fragments = list(db.query(FRAGMENT_SQL, {"response_id": row["id"]})) + fragments = [ + row["content"] for row in all_fragments if row["fragment_type"] == "prompt" + ] + system_fragments = [ + row["content"] for row in all_fragments if row["fragment_type"] == "system" + ] + return cls( + prompt=row["prompt"], + model=model, + fragments=fragments, + attachments=[], + system=row["system"], + system_fragments=system_fragments, + options=model.Options(**json.loads(row["options_json"])), + ) + @dataclass class _BaseConversation: @@ -138,8 +176,10 @@ def prompt( self, prompt: Optional[str], *, + fragments: Optional[List[str]] = None, attachments: Optional[List[Attachment]] = None, system: Optional[str] = None, + system_fragments: Optional[List[str]] = None, stream: bool = True, **options, ) -> "Response": @@ -147,8 +187,10 @@ def prompt( Prompt( prompt, model=self.model, + fragments=fragments, attachments=attachments, system=system, + system_fragments=system_fragments, options=self.model.Options(**options), ), self.model, @@ -163,8 +205,10 @@ def prompt( self, prompt: Optional[str], *, + fragments: Optional[List[str]] = None, attachments: Optional[List[Attachment]] = None, system: Optional[str] = None, + system_fragments: Optional[List[str]] = None, stream: bool = True, **options, ) -> "AsyncResponse": @@ -172,8 +216,10 @@ def prompt( Prompt( prompt, model=self.model, + fragments=fragments, attachments=attachments, system=system, + system_fragments=system_fragments, options=self.model.Options(**options), ), self.model, @@ -182,6 +228,26 @@ def prompt( ) +FRAGMENT_SQL = """ +select + 'prompt' as fragment_type, + fragments.content, + pf."order" as ord +from prompt_fragments pf +join fragments on pf.fragment_id = fragments.id +where pf.response_id = :response_id +union all +select + 'system' as fragment_type, + fragments.content, + sf."order" as ord +from system_fragments sf +join fragments on sf.fragment_id = fragments.id +where sf.response_id = :response_id +order by fragment_type desc, ord asc; +""" + + class _BaseResponse: """Base response class shared between sync and async responses""" @@ -217,13 +283,7 @@ def from_row(cls, db, row): response = cls( model=model, - prompt=Prompt( - prompt=row["prompt"], - model=model, - attachments=[], - system=row["system"], - options=model.Options(**json.loads(row["options_json"])), - ), + prompt=Prompt.from_row(db, row, model), stream=False, ) response.id = row["id"] @@ -233,8 +293,8 @@ def from_row(cls, db, row): response._chunks = [row["response"]] # Attachments response.attachments = [ - Attachment.from_row(arow) - for arow in db.query( + Attachment.from_row(attachment_row) + for attachment_row in db.query( """ select attachments.* from attachments join prompt_attachments on attachments.id = prompt_attachments.attachment_id @@ -264,8 +324,8 @@ def log_to_db(self, db): response = { "id": response_id, "model": self.model.model_id, - "prompt": self.prompt.prompt, - "system": self.prompt.system, + "prompt": self.prompt._prompt, + "system": self.prompt._system, "prompt_json": self._prompt_json, "options_json": { key: value @@ -279,6 +339,25 @@ def log_to_db(self, db): "datetime_utc": self.datetime_utc(), } db["responses"].insert(response) + # Persist any fragments + for i, fragment in enumerate(self.prompt.fragments): + fragment_id = ensure_fragment(db, fragment) + db["prompt_fragments"].insert( + { + "response_id": response_id, + "fragment_id": fragment_id, + "order": i, + }, + ) + for i, fragment in enumerate(self.prompt.system_fragments): + fragment_id = ensure_fragment(db, fragment) + db["system_fragments"].insert( + { + "response_id": response_id, + "fragment_id": fragment_id, + "order": i, + }, + ) # Persist any attachments - loop through with index for index, attachment in enumerate(self.prompt.attachments): attachment_id = attachment.id() @@ -316,6 +395,9 @@ def text(self) -> str: self._force() return "".join(self._chunks) + def text_or_raise(self) -> str: + return self.text() + def json(self) -> Optional[Dict[str, Any]]: self._force() return self.response_json @@ -541,8 +623,10 @@ def prompt( self, prompt: str, *, + fragments: Optional[List[str]] = None, attachments: Optional[List[Attachment]] = None, system: Optional[str] = None, + system_fragments: Optional[List[str]] = None, stream: bool = True, **options, ) -> Response: @@ -550,8 +634,10 @@ def prompt( return Response( Prompt( prompt, + fragments=fragments, attachments=attachments, system=system, + system_fragments=system_fragments, model=self, options=self.Options(**options), ), @@ -578,8 +664,10 @@ def prompt( self, prompt: str, *, + fragments: Optional[List[str]] = None, attachments: Optional[List[Attachment]] = None, system: Optional[str] = None, + system_fragments: Optional[List[str]] = None, stream: bool = True, **options, ) -> AsyncResponse: @@ -587,8 +675,10 @@ def prompt( return AsyncResponse( Prompt( prompt, + fragments=fragments, attachments=attachments, system=system, + system_fragments=system_fragments, model=self, options=self.Options(**options), ), diff --git a/llm/utils.py b/llm/utils.py index d2618dd4..94db09d7 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -1,4 +1,5 @@ import click +import hashlib import httpx import json import puremagic @@ -10,6 +11,22 @@ } +class FragmentString(str): + def __new__(cls, content, source): + # We need to use __new__ since str is immutable + instance = super().__new__(cls, content) + return instance + + def __init__(self, content, source): + self.source = source + + def __str__(self): + return super().__str__() + + def __repr__(self): + return super().__repr__() + + def mimetype_from_string(content) -> Optional[str]: try: type_ = puremagic.from_string(content, mime=True) @@ -127,3 +144,20 @@ def logging_client() -> httpx.Client: transport=_LogTransport(httpx.HTTPTransport()), event_hooks={"request": [_no_accept_encoding], "response": [_log_response]}, ) + + +def ensure_fragment(db, content): + sql = """ + insert into fragments (hash, content, datetime_utc, source) + values (:hash, :content, datetime('now'), :source) + on conflict(hash) do nothing + """ + hash = hashlib.sha256(content.encode("utf-8")).hexdigest() + source = None + if isinstance(content, FragmentString): + source = content.source + with db.conn: + db.execute(sql, {"hash": hash, "content": content, "source": source}) + return list( + db.query("select id from fragments where hash = :hash", {"hash": hash}) + )[0]["id"] diff --git a/setup.py b/setup.py index 15617e74..63bfc1e1 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages import os -VERSION = "0.18a1" +VERSION = "0.18" def get_long_description(): diff --git a/tests/test_cli_openai_models.py b/tests/test_cli_openai_models.py index 7cbab726..0cff22a9 100644 --- a/tests/test_cli_openai_models.py +++ b/tests/test_cli_openai_models.py @@ -140,6 +140,6 @@ def test_only_gpt4_audio_preview_allows_mp3_or_wav(httpx_mock, model, filetype): else: assert result.exit_code == 1 long = "audio/mpeg" if filetype == "mp3" else "audio/wav" - assert ( - f"This model does not support attachments of type '{long}'" in result.output + assert f"This model does not support attachments of type '{long}'" in str( + result ) diff --git a/tests/test_llm.py b/tests/test_llm.py index 0e54cc91..e8fc7e5e 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -363,25 +363,24 @@ def test_openai_completion(mocked_openai_completion, user_path): def test_openai_completion_system_prompt_error(): runner = CliRunner() - result = runner.invoke( - cli, - [ - "-m", - "gpt-3.5-turbo-instruct", - "Say this is a test", - "--no-stream", - "--key", - "x", - "--system", - "system prompts not allowed", - ], - catch_exceptions=False, - ) - assert result.exit_code == 1 - assert ( - result.output - == "Error: System prompts are not supported for OpenAI completion models\n" - ) + with pytest.raises(NotImplementedError) as ex: + runner.invoke( + cli, + [ + "-m", + "gpt-3.5-turbo-instruct", + "Say this is a test", + "--no-stream", + "--key", + "x", + "--system", + "system prompts not allowed", + ], + catch_exceptions=False, + ) + assert "System prompts are not supported for OpenAI completion models" in str( + ex + ) def test_openai_completion_logprobs_stream(