Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating OpenAI APIs, Support for GPT-4V(ision) #55

Merged
merged 1 commit into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,36 @@ Alfred aims to reduce annotation cost and time by making efficient use of LLMs,
![alt text](assets/poster.png)

# News Update

- **[[FlexGen](https://github.com/FMInference/FlexGen) Support]**
Alfred now supports FlexGen, a high-throughput inference pipeline with single GPU. Example usage:
- **[[GPT-4V(ision)(https://openai.com/research/gpt-4v-system-card) Support]**
Alfred now supports GPT-4V(ision). Use it to streamline your image annotation tasks! For example:
```python
from alfred import Client
flexgen = Client(model_type="flexgen", model="facebook/opt-30b", local_path='<model_path>', offload_dir="./flexgen-offload-cache")
openai = Client(model_type="openai", model="gpt-4-vision-preview")
image = ... # load your image
openai((image, f"What type is this document? Please choose from {label_space}"))
```

- **[[vLLM](https://github.com/vllm-project/vllm) Support]**
Alfred now supports vLLM accelerated models! To use:
```python
vLLMClient = Client(model_type="vllm", model=<your_favourite_model>)
```
- **[[Claude](https://console.anthropic.com/claude) Support]**
Alfred now supports Claude models through API. Similarly you can start a chat session with "Client.chat()"!

- **[[Llama](https://arxiv.org/pdf/2302.13971.pdf) Support]**
Alfred now supports locally hosted Llama through transformers.

- **[Embedding with Alfred]**
Get a vector representation for any input strings! Alfred now supports embedding from locally hosted huggingface models or api-based calls from Cohere and OpenAI. To use:
```python
Client.encode(Union[str, List[str]]) -> Union[torch.tensor, List[torch.tensor]]
```

- **[Server-side Caching for CLIP]**
Alfred has now incorporated an automatic server-side caching mechanism to avoid redudent encoding of text or image prompts. Further latency reduction!

- **[Chat with GPTs on Alfred]**

- **[Chat with GPTs or Claude on Alfred]**
Alfred now supports chat with openai api-based models, to use simply type:
```python
from alfred import Client

openai = Client(model_type="openai", model="gpt-3.5-turbo")
openai.chat()
gpt = Client(model_type="openai", model="gpt-3.5-turbo")
gpt.chat()
# or chat with claude from Anthropic!
claude = Client(model_type="anthropic", model="claude-2")
claude.chat()
```

# Citation
Expand All @@ -54,7 +55,7 @@ If you find Alfred useful, please cite the following work. Thank you!
@inproceedings{yu2023alfred,
title = {Alfred: A System for Prompted Weak Supervision},
author = {Yu, Peilin and Bach, Stephen H.},
booktitle = {ACL Demo},
booktitle = {ACL Systen Demonstration},
year = 2023,
}
```
Expand Down
12 changes: 10 additions & 2 deletions alfred/fm/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,17 @@ def forward(
raise ValueError(f"batch_policy {batch_policy} not supported")
else:
batch_policy = "static"
batched_queries = np.array_split(queries, len(queries))
pretokenized = False

if isinstance(queries[0], Tuple):
if isinstance(queries[0][0], Image.Image):
mode = "generate"
batched_queries = batch_multimodal(
queries, mode=self.multimodal_mode, batch_size=batch_size
)
else:
batched_queries = np.array_split(queries, len(queries))
else:
batched_queries = np.array_split(queries, len(queries))
if mode == "generate":
inferece_fn = self._generate_batch
elif mode == "score":
Expand Down
137 changes: 88 additions & 49 deletions alfred/fm/openai.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import json
import logging
import os
from typing import Optional, List, Any, Union
from typing import Optional, List, Any, Union, Tuple

import PIL.Image
import torch
import readline

from .model import APIAccessFoundationModel
from .response import CompletionResponse
from .utils import colorize_str, retry
from .utils import colorize_str, retry, encode_image

logger = logging.getLogger(__name__)

Expand All @@ -22,21 +23,22 @@
"OpenAI module not found. Please install it to use the OpenAI model."
)

from openai.error import (
from openai._exceptions import (
AuthenticationError,
APIError,
Timeout,
APITimeoutError,
RateLimitError,
InvalidRequestError,
BadRequestError,
APIConnectionError,
ServiceUnavailableError,
APIStatusError,
)

OPENAI_MODELS = (
"gpt-4",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-4-1106-preview",
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0613",
Expand All @@ -47,10 +49,19 @@
"text-curie-001",
"text-babbage-001",
"text-ada-001",
"text-embedding-ada-002",
"code-davinci-002",
)

OPENAI_EMBEDDING_MODELS = (
"text-davinci-001",
"text-curie-001",
"text-babbage-001",
"text-ada-001",
"text-embedding-ada-002",
)

OPENAI_VISION_MODELS = ("gpt-4-vision-preview",)


class OpenAIModel(APIAccessFoundationModel):
"""
Expand All @@ -59,25 +70,24 @@ class OpenAIModel(APIAccessFoundationModel):
This class provides a wrapper for the OpenAI API for generating completions.
"""

@staticmethod
@retry(
num_retries=3,
wait_time=0.1,
exceptions=(
AuthenticationError,
APIError,
Timeout,
RateLimitError,
InvalidRequestError,
APIConnectionError,
ServiceUnavailableError,
APITimeoutError,
RateLimitError,
APIError,
BadRequestError,
APIStatusError,
),
)
def _openai_query(
query: Union[str, List],
self,
query: Union[str, List, Tuple],
temperature: float = 0.0,
max_tokens: int = 3,
model: str = "text-davinci-002",
max_tokens: int = 64,
**kwargs: Any,
) -> str:
"""
Expand All @@ -89,8 +99,6 @@ def _openai_query(
:type temperature: float
:param max_tokens: The maximum number of tokens to be returned
:type max_tokens: int
:param model: The model to be used (choose from https://beta.openai.com/docs/api-reference/completions/create)
:type model: str
:param kwargs: Additional keyword arguments
:type kwargs: Any
:return: The generated completion
Expand All @@ -102,58 +110,79 @@ def _openai_query(
openai.api_key = openai_api_key

if chat:
return openai.ChatCompletion.create(
model=model,
return self.openai_client.chat.completions.create(
model=self.model_string,
messages=query,
max_tokens=max_tokens,
stop=None,
temperature=temperature,
stream=True,
)
else:
response = openai.Completion.create(
model=model,
prompt=query,
if self.model_string in OPENAI_VISION_MODELS:
img, prompt = query[0], query[1]
if isinstance(img, PIL.Image.Image):
img = encode_image(img, type="image")
elif isinstance(img, str):
img = img
query = [
{
"role": "user",
"content": [
{"type": "text", "text": f"{prompt}"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{img}"},
},
],
}
]
else:
query = [{"role": "user", "content": query}]
response = self.openai_client.chat.completions.create(
messages=query,
model=self.model_string,
temperature=temperature,
max_tokens=max_tokens,
)
return response["choices"][0]["text"]
return response.choices[0].message.content

@staticmethod
@retry(
num_retries=3,
wait_time=0.1,
exceptions=(
APIError,
Timeout,
RateLimitError,
InvalidRequestError,
AuthenticationError,
APIConnectionError,
ServiceUnavailableError,
APITimeoutError,
RateLimitError,
APIError,
BadRequestError,
APIStatusError,
),
)
def _openai_embedding_query(
self,
query_string: str,
model: str = "text-davinci-002",
**kwargs: Any,
) -> torch.Tensor:
"""
Run a single query to get the embedding through the foundation model

:param query_string: The prompt to be used for the query
:type query_string: str
:param model: The model to be used (choose from https://beta.openai.com/docs/api-reference/completions/create)
:type model: str
:return: The embeddings
:rtype: str
"""
openai_api_key = kwargs.get("openai_api_key", None)
if openai_api_key is not None:
openai.api_key = openai_api_key

return torch.tensor(
openai.Embedding.create(
input=[query_string.replace("\n", " ")], model=model
)["data"][0]["embedding"]
self.openai_client.embeddings.create(
input=[query_string.replace("\n", " ")], model=self.model_string
)
.data[0]
.embedding
)

def __init__(
Expand All @@ -172,8 +201,12 @@ def __init__(
:type api_key: Optional[str]
"""
assert (
model_string in OPENAI_MODELS
), f"Model {model_string} not found. Please choose from {OPENAI_MODELS}"
model_string
in OPENAI_MODELS + OPENAI_VISION_MODELS + OPENAI_EMBEDDING_MODELS
), (
f"Model {model_string} not found. "
f"Please choose from {OPENAI_MODELS} or {OPENAI_VISION_MODELS} or {OPENAI_EMBEDDING_MODELS}"
)

if "OPENAI_API_KEY" in os.environ:
openai.api_key = os.getenv("OPENAI_API_KEY")
Expand All @@ -189,11 +222,15 @@ def __init__(
)
openai.api_key = input("Please enter your OpenAI API key: ")
logger.log(logging.INFO, f"OpenAI model api key stored")

self.openai_client = openai.OpenAI(api_key=api_key)
if model_string in OPENAI_VISION_MODELS:
self.multimodal_mode = "autoregressive"
super().__init__(model_string, {"api_key": openai.api_key})

def _generate_batch(
self,
batch_instance: List[str],
batch_instance: Union[List[str], Tuple],
**kwargs,
) -> List[CompletionResponse]:
"""
Expand All @@ -203,7 +240,7 @@ def _generate_batch(
The generated completions are returned in a list of `CompletionResponse` objects.

:param batch_instance: A list of prompts for which to generate completions.
:type batch_instance: List[str]
:type batch_instance: List[str] or List[Tuple]
:param kwargs: Additional keyword arguments to pass to the OpenAI API.
:type kwargs: Any
:return: A list of `CompletionResponse` objects containing the generated completions.
Expand All @@ -212,11 +249,7 @@ def _generate_batch(
output = []
for query in batch_instance:
output.append(
CompletionResponse(
prediction=self._openai_query(
query, model=self.model_string, **kwargs
)
)
CompletionResponse(prediction=self._openai_query(query, **kwargs))
)
return output

Expand All @@ -238,11 +271,18 @@ def _encode_batch(
:return: A list of `torch.Tensor` objects containing the generated embeddings.
:rtype: List[torch.Tensor]
"""
if self.model_string not in OPENAI_EMBEDDING_MODELS:
logger.error(
f"Model {self.model_string} does not support embedding."
f"Please choose from {OPENAI_EMBEDDING_MODELS}"
)
raise ValueError(
f"Model {self.model_string} does not support embedding."
f"Please choose from {OPENAI_EMBEDDING_MODELS}"
)
output = []
for query in batch_instance:
output.append(
self._openai_embedding_query(query, model=self.model_string, **kwargs)
)
output.append(self._openai_embedding_query(query, **kwargs))
return output

def chat(self, **kwargs: Any):
Expand Down Expand Up @@ -300,7 +340,6 @@ def _feedback(feedback: str, no_newline=False):
for resp in self._openai_query(
message_log,
chat=True,
model=model,
temperature=temperature,
max_tokens=max_tokens,
):
Expand Down
Loading