BatsResearch · dotpyu · Nov 11, 2023 · Nov 11, 2023
diff --git a/README.md b/README.md
@@ -15,35 +15,36 @@ Alfred aims to reduce annotation cost and time by making efficient use of LLMs,
 ![alt text](assets/poster.png)
 
 # News Update
-
-- **[[FlexGen](https://github.com/FMInference/FlexGen) Support]**
-  Alfred now supports FlexGen, a high-throughput inference pipeline with single GPU. Example usage:
+- **[[GPT-4V(ision)(https://openai.com/research/gpt-4v-system-card) Support]**
+  Alfred now supports GPT-4V(ision). Use it to streamline your image annotation tasks! For example:
   ```python
-  from alfred import Client
-  flexgen = Client(model_type="flexgen", model="facebook/opt-30b", local_path='<model_path>', offload_dir="./flexgen-offload-cache")
+  openai = Client(model_type="openai", model="gpt-4-vision-preview")
+  image = ... # load your image
+  openai((image, f"What type is this document? Please choose from {label_space}"))
+  ```
+
+- **[[vLLM](https://github.com/vllm-project/vllm) Support]**
+  Alfred now supports vLLM accelerated models! To use:
+  ```python
+  vLLMClient = Client(model_type="vllm", model=<your_favourite_model>)
   ```
-- **[[Claude](https://console.anthropic.com/claude) Support]**
-  Alfred now supports Claude models through API. Similarly you can start a chat session with "Client.chat()"!
-
-- **[[Llama](https://arxiv.org/pdf/2302.13971.pdf) Support]**
-  Alfred now supports locally hosted Llama through transformers.
 
 - **[Embedding with Alfred]**
   Get a vector representation for any input strings! Alfred now supports embedding from locally hosted huggingface models or api-based calls from Cohere   and OpenAI. To use:
   ```python
   Client.encode(Union[str, List[str]]) -> Union[torch.tensor, List[torch.tensor]]
   ```
-
-- **[Server-side Caching for CLIP]**
-  Alfred has now incorporated an automatic server-side caching mechanism to avoid redudent encoding of text or image prompts. Further latency reduction! 
-
-- **[Chat with GPTs on Alfred]**
+
+- **[Chat with GPTs or Claude on Alfred]**
   Alfred now supports chat with openai api-based models, to use simply type:
   ```python
   from alfred import Client
 
-  openai = Client(model_type="openai", model="gpt-3.5-turbo")
-  openai.chat()
+  gpt = Client(model_type="openai", model="gpt-3.5-turbo")
+  gpt.chat()
+  # or chat with claude from Anthropic!
+  claude = Client(model_type="anthropic", model="claude-2")
+  claude.chat()
   ```
 
 # Citation
@@ -54,7 +55,7 @@ If you find Alfred useful, please cite the following work. Thank you!
 @inproceedings{yu2023alfred,
   title = {Alfred: A System for Prompted Weak Supervision},
   author = {Yu, Peilin and Bach, Stephen H.}, 
-  booktitle = {ACL Demo}, 
+  booktitle = {ACL Systen Demonstration}, 
   year = 2023, 
 }
 ```

diff --git a/alfred/fm/model.py b/alfred/fm/model.py
@@ -157,9 +157,17 @@ def forward(
                     raise ValueError(f"batch_policy {batch_policy} not supported")
         else:
             batch_policy = "static"
-            batched_queries = np.array_split(queries, len(queries))
             pretokenized = False
-
+            if isinstance(queries[0], Tuple):
+                if isinstance(queries[0][0], Image.Image):
+                    mode = "generate"
+                    batched_queries = batch_multimodal(
+                        queries, mode=self.multimodal_mode, batch_size=batch_size
+                    )
+                else:
+                    batched_queries = np.array_split(queries, len(queries))
+            else:
+                batched_queries = np.array_split(queries, len(queries))
         if mode == "generate":
             inferece_fn = self._generate_batch
         elif mode == "score":

diff --git a/alfred/fm/openai.py b/alfred/fm/openai.py
@@ -1,14 +1,15 @@
 import json
 import logging
 import os
-from typing import Optional, List, Any, Union
+from typing import Optional, List, Any, Union, Tuple
 
+import PIL.Image
 import torch
 import readline
 
 from .model import APIAccessFoundationModel
 from .response import CompletionResponse
-from .utils import colorize_str, retry
+from .utils import colorize_str, retry, encode_image
 
 logger = logging.getLogger(__name__)
 
@@ -22,21 +23,22 @@
         "OpenAI module not found. Please install it to use the OpenAI model."
     )
 
-from openai.error import (
+from openai._exceptions import (
     AuthenticationError,
     APIError,
-    Timeout,
+    APITimeoutError,
     RateLimitError,
-    InvalidRequestError,
+    BadRequestError,
     APIConnectionError,
-    ServiceUnavailableError,
+    APIStatusError,
 )
 
 OPENAI_MODELS = (
     "gpt-4",
     "gpt-4-0613",
     "gpt-4-32k",
     "gpt-4-32k-0613",
+    "gpt-4-1106-preview",
     "gpt-3.5-turbo",
     "gpt-3.5-turbo-16k",
     "gpt-3.5-turbo-0613",
@@ -47,10 +49,19 @@
     "text-curie-001",
     "text-babbage-001",
     "text-ada-001",
-    "text-embedding-ada-002",
     "code-davinci-002",
 )
 
+OPENAI_EMBEDDING_MODELS = (
+    "text-davinci-001",
+    "text-curie-001",
+    "text-babbage-001",
+    "text-ada-001",
+    "text-embedding-ada-002",
+)
+
+OPENAI_VISION_MODELS = ("gpt-4-vision-preview",)
+
 
 class OpenAIModel(APIAccessFoundationModel):
     """
@@ -59,25 +70,24 @@ class OpenAIModel(APIAccessFoundationModel):
     This class provides a wrapper for the OpenAI API for generating completions.
     """
 
-    @staticmethod
     @retry(
         num_retries=3,
         wait_time=0.1,
         exceptions=(
             AuthenticationError,
-            APIError,
-            Timeout,
-            RateLimitError,
-            InvalidRequestError,
             APIConnectionError,
-            ServiceUnavailableError,
+            APITimeoutError,
+            RateLimitError,
+            APIError,
+            BadRequestError,
+            APIStatusError,
         ),
     )
     def _openai_query(
-        query: Union[str, List],
+        self,
+        query: Union[str, List, Tuple],
         temperature: float = 0.0,
-        max_tokens: int = 3,
-        model: str = "text-davinci-002",
+        max_tokens: int = 64,
         **kwargs: Any,
     ) -> str:
         """
@@ -89,8 +99,6 @@ def _openai_query(
         :type temperature: float
         :param max_tokens: The maximum number of tokens to be returned
         :type max_tokens: int
-        :param model: The model to be used (choose from https://beta.openai.com/docs/api-reference/completions/create)
-        :type model: str
         :param kwargs: Additional keyword arguments
         :type kwargs: Any
         :return: The generated completion
@@ -102,58 +110,79 @@ def _openai_query(
             openai.api_key = openai_api_key
 
         if chat:
-            return openai.ChatCompletion.create(
-                model=model,
+            return self.openai_client.chat.completions.create(
+                model=self.model_string,
                 messages=query,
                 max_tokens=max_tokens,
                 stop=None,
                 temperature=temperature,
                 stream=True,
             )
         else:
-            response = openai.Completion.create(
-                model=model,
-                prompt=query,
+            if self.model_string in OPENAI_VISION_MODELS:
+                img, prompt = query[0], query[1]
+                if isinstance(img, PIL.Image.Image):
+                    img = encode_image(img, type="image")
+                elif isinstance(img, str):
+                    img = img
+                query = [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": f"{prompt}"},
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": f"data:image/jpeg;base64,{img}"},
+                            },
+                        ],
+                    }
+                ]
+            else:
+                query = [{"role": "user", "content": query}]
+            response = self.openai_client.chat.completions.create(
+                messages=query,
+                model=self.model_string,
                 temperature=temperature,
                 max_tokens=max_tokens,
             )
-            return response["choices"][0]["text"]
+            return response.choices[0].message.content
 
-    @staticmethod
     @retry(
         num_retries=3,
         wait_time=0.1,
         exceptions=(
-            APIError,
-            Timeout,
-            RateLimitError,
-            InvalidRequestError,
+            AuthenticationError,
             APIConnectionError,
-            ServiceUnavailableError,
+            APITimeoutError,
+            RateLimitError,
+            APIError,
+            BadRequestError,
+            APIStatusError,
         ),
     )
     def _openai_embedding_query(
+        self,
         query_string: str,
-        model: str = "text-davinci-002",
         **kwargs: Any,
     ) -> torch.Tensor:
         """
         Run a single query to get the embedding through the foundation model
 
         :param query_string: The prompt to be used for the query
         :type query_string: str
-        :param model: The model to be used (choose from https://beta.openai.com/docs/api-reference/completions/create)
-        :type model: str
         :return: The embeddings
         :rtype: str
         """
         openai_api_key = kwargs.get("openai_api_key", None)
         if openai_api_key is not None:
             openai.api_key = openai_api_key
+
         return torch.tensor(
-            openai.Embedding.create(
-                input=[query_string.replace("\n", " ")], model=model
-            )["data"][0]["embedding"]
+            self.openai_client.embeddings.create(
+                input=[query_string.replace("\n", " ")], model=self.model_string
+            )
+            .data[0]
+            .embedding
         )
 
     def __init__(
@@ -172,8 +201,12 @@ def __init__(
         :type api_key: Optional[str]
         """
         assert (
-            model_string in OPENAI_MODELS
-        ), f"Model {model_string} not found. Please choose from {OPENAI_MODELS}"
+            model_string
+            in OPENAI_MODELS + OPENAI_VISION_MODELS + OPENAI_EMBEDDING_MODELS
+        ), (
+            f"Model {model_string} not found. "
+            f"Please choose from {OPENAI_MODELS} or {OPENAI_VISION_MODELS} or {OPENAI_EMBEDDING_MODELS}"
+        )
 
         if "OPENAI_API_KEY" in os.environ:
             openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -189,11 +222,15 @@ def __init__(
                 )
                 openai.api_key = input("Please enter your OpenAI API key: ")
                 logger.log(logging.INFO, f"OpenAI model api key stored")
+
+        self.openai_client = openai.OpenAI(api_key=api_key)
+        if model_string in OPENAI_VISION_MODELS:
+            self.multimodal_mode = "autoregressive"
         super().__init__(model_string, {"api_key": openai.api_key})
 
     def _generate_batch(
         self,
-        batch_instance: List[str],
+        batch_instance: Union[List[str], Tuple],
         **kwargs,
     ) -> List[CompletionResponse]:
         """
@@ -203,7 +240,7 @@ def _generate_batch(
         The generated completions are returned in a list of `CompletionResponse` objects.
 
         :param batch_instance: A list of prompts for which to generate completions.
-        :type batch_instance: List[str]
+        :type batch_instance: List[str] or List[Tuple]
         :param kwargs: Additional keyword arguments to pass to the OpenAI API.
         :type kwargs: Any
         :return: A list of `CompletionResponse` objects containing the generated completions.
@@ -212,11 +249,7 @@ def _generate_batch(
         output = []
         for query in batch_instance:
             output.append(
-                CompletionResponse(
-                    prediction=self._openai_query(
-                        query, model=self.model_string, **kwargs
-                    )
-                )
+                CompletionResponse(prediction=self._openai_query(query, **kwargs))
             )
         return output
 
@@ -238,11 +271,18 @@ def _encode_batch(
         :return: A list of `torch.Tensor` objects containing the generated embeddings.
         :rtype: List[torch.Tensor]
         """
+        if self.model_string not in OPENAI_EMBEDDING_MODELS:
+            logger.error(
+                f"Model {self.model_string} does not support embedding."
+                f"Please choose from {OPENAI_EMBEDDING_MODELS}"
+            )
+            raise ValueError(
+                f"Model {self.model_string} does not support embedding."
+                f"Please choose from {OPENAI_EMBEDDING_MODELS}"
+            )
         output = []
         for query in batch_instance:
-            output.append(
-                self._openai_embedding_query(query, model=self.model_string, **kwargs)
-            )
+            output.append(self._openai_embedding_query(query, **kwargs))
         return output
 
     def chat(self, **kwargs: Any):
@@ -300,7 +340,6 @@ def _feedback(feedback: str, no_newline=False):
                 for resp in self._openai_query(
                     message_log,
                     chat=True,
-                    model=model,
                     temperature=temperature,
                     max_tokens=max_tokens,
                 ):