diff --git a/alfred/client/client.py b/alfred/client/client.py
index c258527..ecc7efc 100644
--- a/alfred/client/client.py
+++ b/alfred/client/client.py
@@ -77,6 +77,7 @@ def __init__(
                 "anthropic",
                 "cohere",
                 "ai21",
+                "google",
                 "torch",
                 "dummy",
             ], f"Invalid model type: {self.model_type}"
@@ -186,6 +187,10 @@ def __init__(
                 from ..fm.ai21 import AI21Model
 
                 self.model = AI21Model(self.model, **kwargs)
+            elif self.model_type == "google":
+                from ..fm.google import GoogleModel
+
+                self.model = GoogleModel(self.model, **kwargs)
             elif self.model_type == "dummy":
                 from ..fm.dummy import DummyModel
 
@@ -422,12 +427,12 @@ def chat(self, log_save_path: Optional[str] = None, **kwargs: Any):
         :param log_save_path: The file to save the chat logs.
         :type log_save_path: Optional[str]
         """
-        if self.model_type in ["openai", "anthropic"]:
+        if self.model_type in ["openai", "anthropic", "google"]:
             self.model.chat(log_save_path=log_save_path, **kwargs)
         else:
             logger.error(
-                "Chat APIs are only supported for Anthropic and OpenAI models."
+                "Chat APIs are only supported for Anthropic, Google Gemini and OpenAI models."
             )
             raise NotImplementedError(
-                "Currently Chat are only supported for Anthropic and OpenAI models."
+                "Currently Chat are only supported for Anthropic, Google Gemini and OpenAI models."
             )
diff --git a/alfred/fm/anthropic.py b/alfred/fm/anthropic.py
index 493dbe4..1f03f7e 100644
--- a/alfred/fm/anthropic.py
+++ b/alfred/fm/anthropic.py
@@ -8,7 +8,7 @@
 
 from .model import APIAccessFoundationModel
 from .response import CompletionResponse
-from .utils import colorize_str
+from .utils import colorize_str, type_print
 
 logger = logging.getLogger(__name__)
 
@@ -38,12 +38,12 @@ class AnthropicModel(APIAccessFoundationModel):
     """
 
     def _anthropic_query(
-        self,
-        query: Union[str, List],
-        temperature: float = 0.0,
-        max_tokens: int = 3,
-        model: str = "claude-instant-1",
-        **kwargs: Any,
+            self,
+            query: Union[str, List],
+            temperature: float = 0.0,
+            max_tokens: int = 3,
+            model: str = "claude-instant-1",
+            **kwargs: Any,
     ) -> str:
         """
         Run a single query through the foundation model
@@ -85,7 +85,7 @@ def _anthropic_query(
             return response["completion"]
 
     def __init__(
-        self, model_string: str = "claude-instant-1", api_key: Optional[str] = None
+            self, model_string: str = "claude-instant-1", api_key: Optional[str] = None
     ):
         """
         Initialize the Anthropic API wrapper.
@@ -100,7 +100,7 @@ def __init__(
         :type api_key: Optional[str]
         """
         assert (
-            model_string in ANTHROPIC_MODELS
+                model_string in ANTHROPIC_MODELS
         ), f"Model {model_string} not found. Please choose from {ANTHROPIC_MODELS}"
 
         if "ANTHROPIC_API_KEY" in os.environ:
@@ -124,9 +124,9 @@ def __init__(
         super().__init__(model_string, {"api_key": api_key})
 
     def _generate_batch(
-        self,
-        batch_instance: List[str],
-        **kwargs,
+            self,
+            batch_instance: List[str],
+            **kwargs,
     ) -> List[CompletionResponse]:
         """
         Generate completions for a batch of prompts using the anthropic API.
@@ -161,9 +161,13 @@ def _feedback(feedback: str, no_newline=False, override=False):
             if override:
                 print("\r", end="")
             print(
-                colorize_str("Chat AI: ", "GREEN") + feedback,
-                end="\n" if not no_newline else "",
+                colorize_str("Chat AI: ", "GREEN"),
+                end="",
             )
+            type_print(feedback)
+            print("",
+                  end="\n" if not no_newline else "",
+                  )
 
         model = kwargs.get("model", self.model_string)
         c_title = colorize_str("Alfred's Anthropic Chat", "BLUE")
@@ -201,11 +205,11 @@ def _feedback(feedback: str, no_newline=False, override=False):
                 message_log.append({"role": "user", "content": query})
                 response = []
                 for resp in self._anthropic_query(
-                    query,
-                    chat=True,
-                    model=model,
-                    temperature=temperature,
-                    max_tokens=max_tokens,
+                        query,
+                        chat=True,
+                        model=model,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
                 ):
                     if resp["stop_reason"] in ["stop", "stop_sequence"]:
                         break
diff --git a/alfred/fm/google.py b/alfred/fm/google.py
new file mode 100644
index 0000000..0180003
--- /dev/null
+++ b/alfred/fm/google.py
@@ -0,0 +1,293 @@
+import json
+import logging
+import os
+import sys
+from typing import Optional, List, Any, Union, Tuple
+
+import PIL.Image
+import torch
+
+from .model import APIAccessFoundationModel
+from .response import CompletionResponse
+from .utils import colorize_str, retry, type_print
+
+logger = logging.getLogger(__name__)
+
+try:
+    import google.generativeai as genai
+except ModuleNotFoundError:
+    logger.warning(
+        "Google GenAI module not found. Please install google-generativeai to use the Google model."
+    )
+    raise ModuleNotFoundError(
+        "Google GenAI module not found. Please install google-generativeai to use the Google model."
+    )
+
+GOOGLE_GENAI_MODELS = (
+    "gemini-pro",
+)
+
+GOOGLE_GENAI_VISION_MODELS = (
+    "gemini-pro-vision",
+)
+
+GOOGLE_GENAI_EMBEDDING_MODELS = (
+    "embedding-001",
+)
+
+class GoogleModel(APIAccessFoundationModel):
+    """
+    A wrapper for the Google API.
+
+    This class provides a wrapper for the Google API for generating completions.
+    """
+
+    @retry(
+        num_retries=3,
+        wait_time=0.1,
+        exceptions=(
+            Exception
+        ),
+    )
+    def _google_genai_query(
+        self,
+        query: Union[str, List, Tuple],
+        temperature: float = 0.0,
+        max_tokens: int = 64,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Run a single query through the foundation model
+
+        :param query: The prompt to be used for the query
+        :type query: Union[str, List]
+        :param temperature: The temperature of the model
+        :type temperature: float
+        :param max_tokens: The maximum number of tokens to be returned
+        :type max_tokens: int
+        :param kwargs: Additional keyword arguments
+        :type kwargs: Any
+        :return: The generated completion
+        :rtype: str
+        """
+        if self.model_string in GOOGLE_GENAI_VISION_MODELS:
+            img, prompt = query[0], query[1]
+            if not isinstance(img, PIL.Image.Image):
+                raise ValueError(f"Image type {type(img)} not supported. Please use PIL.Image!")
+            query = [
+                prompt, img
+            ] if len(prompt) > 0 else [img]
+        response = self.model.generate_content(
+            query,
+            generation_config=genai.types.GenerationConfig(
+                candidate_count=1,
+                stop_sequences=['x'],
+                max_output_tokens=max_tokens,
+                temperature=temperature,
+            )
+        )
+        return response.text
+
+    @retry(
+        num_retries=3,
+        wait_time=0.1,
+        exceptions=(
+            Exception
+        ),
+    )
+    def _google_genai_embedding_query(
+        self,
+        query_string: str,
+        **kwargs: Any,
+    ) -> torch.Tensor:
+        """
+        Run a single query to get the embedding through the foundation model
+
+        :param query_string: The prompt to be used for the query
+        :type query_string: str
+        :return: The embeddings
+        :rtype: str
+        """
+
+        return torch.tensor(
+            genai.embed_content(
+                model=f"models/{self.model_string}",
+                content=query_string,
+                task_type="retrieval_document",
+                title="Embedding of single string")
+        )
+
+    def __init__(
+        self, model_string: str = "gemini-pro", api_key: Optional[str] = None
+    ):
+        """
+        Initialize the Google API wrapper.
+
+        This function loads the API key for the Google API from an environment variable or a configuration file.
+        If neither is found, the user is prompted to enter the API key.
+        Note: .genai currently work with python 3.9+
+
+        :param model_string: The model to be used for generating completions.
+        :type model_string: str
+        :param api_key: The API key to be used for the Google API.
+        :type api_key: Optional[str]
+        """
+
+        # check python version above 3.9
+        if sys.version_info < (3, 9):
+            raise RuntimeError("Google GenAI requires Python 3.9+")
+        assert (
+            model_string
+            in GOOGLE_GENAI_MODELS + GOOGLE_GENAI_EMBEDDING_MODELS + GOOGLE_GENAI_VISION_MODELS
+        ), (
+            f"Model {model_string} not found. "
+            f"Please choose from {GOOGLE_GENAI_MODELS + GOOGLE_GENAI_EMBEDDING_MODELS + GOOGLE_GENAI_VISION_MODELS}"
+        )
+        if api_key is None:
+            if "GOOGLE_API_KEY" in os.environ:
+                api_key = os.getenv("GOOGLE_API_KEY")
+                logger.log(logging.INFO, f"Google api key found")
+            else:
+                logger.log(
+                    logging.INFO,
+                    "Google API key not found in config, Requesting User Input",
+                )
+                api_key = input("Please enter your Google API key: ")
+                logger.log(logging.INFO, f"Google model api key stored")
+
+        genai.configure(api_key=api_key)
+
+        if model_string in GOOGLE_GENAI_VISION_MODELS:
+            self.multimodal_mode = "autoregressive"
+
+        self.model = genai.GenerativeModel(model_string)
+
+        super().__init__(model_string, {"api_key": api_key})
+
+    def _generate_batch(
+        self,
+        batch_instance: Union[List[str], Tuple],
+        **kwargs,
+    ) -> List[CompletionResponse]:
+        """
+        Generate completions for a batch of prompts using the Google API.
+
+        This function generates completions for a batch of prompts using the Google API.
+        The generated completions are returned in a list of `CompletionResponse` objects.
+
+        :param batch_instance: A list of prompts for which to generate completions.
+        :type batch_instance: List[str] or List[Tuple]
+        :param kwargs: Additional keyword arguments to pass to the Google API.
+        :type kwargs: Any
+        :return: A list of `CompletionResponse` objects containing the generated completions.
+        :rtype: List[CompletionResponse]
+        """
+        output = []
+        for query in batch_instance:
+            output.append(
+                CompletionResponse(prediction=self._google_genai_query(query, **kwargs))
+            )
+        return output
+
+    def _encode_batch(
+        self,
+        batch_instance: [List[str]],
+        **kwargs,
+    ) -> List[torch.Tensor]:
+        """
+        Generate embeddings for a batch of prompts using the Google API.
+
+        This function generates embeddings for a batch of prompts using the Google API.
+        The generated embeddings are returned in a list of `torch.Tensor` objects.
+
+        :param batch_instance: A list of prompts
+        :type batch_instance: List[str]
+        :param kwargs: Additional keyword arguments to pass to the Google API.
+        :type kwargs: Any
+        :return: A list of `torch.Tensor` objects containing the generated embeddings.
+        :rtype: List[torch.Tensor]
+        """
+        if self.model_string not in GOOGLE_GENAI_EMBEDDING_MODELS:
+            logger.error(
+                f"Model {self.model_string} does not support embedding."
+                f"Please choose from {GOOGLE_GENAI_EMBEDDING_MODELS}"
+            )
+            raise ValueError(
+                f"Model {self.model_string} does not support embedding."
+                f"Please choose from {GOOGLE_GENAI_EMBEDDING_MODELS}"
+            )
+        output = []
+        for query in batch_instance:
+            output.append(self._google_genai_embedding_query(query, **kwargs))
+        return output
+
+    def chat(self, **kwargs: Any):
+        """
+        Launch an interactive chat session with the Google API.
+        """
+
+        def _feedback(feedback: str, no_newline=False):
+            print(
+                colorize_str("Chat AI: ", "GREEN") + feedback,
+                end="\n" if not no_newline else "",
+            )
+
+        model = kwargs.get("model", self.model_string)
+        c_title = colorize_str("Alfred's Google Gemini Chat", "BLUE")
+        c_model = colorize_str(model, "WARNING")
+        c_exit = colorize_str("exit", "FAIL")
+        c_ctrlc = colorize_str("Ctrl+C", "FAIL")
+
+        temperature = kwargs.get("temperature", 0.7)
+        max_tokens = kwargs.get("max_tokens", 1024)
+        log_save_path = kwargs.get("log_save_path", None)
+        manual_chat_sequence = kwargs.get("manual_chat_sequence", None)
+        save_as_markdown = kwargs.get("save_as_markdown", False)
+
+        print(f"Welcome to the {c_title} session!\nYou are using the {c_model} model.")
+        print(f"Type '{c_exit}' or hit {c_ctrlc} to exit the chat session.")
+
+        message_log = []
+        chat_history = []
+
+        self.chat_client = self.model.start_chat(history=chat_history)
+
+        print()
+        print("======== Chat Begin ========")
+        print()
+
+        try:
+            while True:
+                if manual_chat_sequence is not None:
+                    query = manual_chat_sequence.pop(0)
+                    _feedback(query, no_newline=True)
+                    print()
+                    if len(manual_chat_sequence) == 0:
+                        break
+                else:
+                    query = input(colorize_str("You: "))
+                if query == "exit":
+                    _feedback("Goodbye!")
+                    break
+                message_log.append({"role": "user", "content": query})
+                _feedback("", no_newline=True)
+                response = []
+                for resp in self.chat_client.send_message(query, stream=True):
+                    response.append(resp.text)
+                    type_print(resp.text)
+                print()
+                response = "".join(response).strip()
+                response = response.replace("\n", "")
+                message_log.append({"role": "assistant", "content": response})
+        except KeyboardInterrupt:
+            _feedback("Goodbye!")
+
+        print()
+        print("======== Chat End ========")
+        print()
+        print(colorize_str("Thank you for using Alfred!"))
+
+        if log_save_path:
+            with open(log_save_path, "w") as f:
+                json.dump(message_log, f)
+            print(f"Your chat log is saved to {log_save_path}")
diff --git a/alfred/fm/openai.py b/alfred/fm/openai.py
index db4f82d..fbc1dab 100644
--- a/alfred/fm/openai.py
+++ b/alfred/fm/openai.py
@@ -9,7 +9,7 @@
 
 from .model import APIAccessFoundationModel
 from .response import CompletionResponse
-from .utils import colorize_str, retry, encode_image
+from .utils import colorize_str, retry, encode_image, type_print
 
 logger = logging.getLogger(__name__)
 
@@ -347,7 +347,7 @@ def _feedback(feedback: str, no_newline=False):
                         break
                     try:
                         txt = resp.choices[0].delta.content
-                        print(txt, end="")
+                        type_print(txt)
                     except AttributeError:
                         txt = ""
                     response.append(txt)
diff --git a/alfred/fm/utils.py b/alfred/fm/utils.py
index 85cb8fb..9ae75c1 100644
--- a/alfred/fm/utils.py
+++ b/alfred/fm/utils.py
@@ -150,6 +150,33 @@ def batch_multimodal(queries: List[Query], mode: str, batch_size=64):
     return batches
 
 
+def check_pkg_available(pkg_name: str) -> bool:
+    """
+    Check if a package is available
+
+    :param pkg_name: The name of the package
+    :type pkg_name: str
+    :return: Whether the package is available
+    :rtype: bool
+    """
+    try:
+        __import__(pkg_name)
+        return True
+    except ImportError:
+        raise ImportError(f"Please install {pkg_name} to use this feature")
+
+
+def type_print(string, interval=.07, newline=False):
+    """
+    Print a string word by word to simulate typing
+    """
+    for word in string.split(" "):
+        print(word, end=" ", flush=True)
+        time.sleep(interval)
+    print("\b", end="", flush=True)
+    if newline: print("")
+
+
 def retry(num_retries=3, wait_time=0.1, exceptions=(Exception,)):
     """
     A decorator to retry a function call if it raises an exception.
diff --git a/alfred/run_server.py b/alfred/run_server.py
index a0e992e..6dd536a 100644
--- a/alfred/run_server.py
+++ b/alfred/run_server.py
@@ -2,7 +2,7 @@
 import logging
 from typing import Any
 
-import alfred.fm.remote.grpc as grpc_utils
+import fm.remote.grpc as grpc_utils
 
 logging.basicConfig(
     format="ALFRED %(levelname)s: %(asctime)-5s  %(message)s",
@@ -15,8 +15,8 @@
 
 class ModelServer:
     """
-    ModelServer is the server-side interface that wraps a certain alfred.fm class.
-    ModelServer is used to launch the specified alfred.fm model as a gRPC Server and find the proper port.
+    ModelServer is the server-side interface that wraps a certain alferd.fm class.
+    ModelServer is used to launch the specified alferd.fm model as a gRPC Server and find the proper port.
     """
 
     def __init__(
@@ -29,7 +29,7 @@ def __init__(
         """
 
         Constructor Alfred ModelServer on the Server Side.
-        This ModeServer launches the specified alfred.fm model on the server
+        This ModeServer launches the specified alferd.fm model on the server
         and map the model interfaces to the specified port number.
         If the port given is not available, the server will try to find the next available port.
 
@@ -56,51 +56,56 @@ def __init__(
             "vllm",
             "cohere",
             "ai21",
+            "google",
             "torch",
             "dummy",
         ], f"Invalid model type: {self.model_type}"
         if self.model_type == "huggingface":
-            from alfred.fm.huggingface import HuggingFaceModel
+            from .fm.huggingface import HuggingFaceModel
 
             self.model = HuggingFaceModel(self.model, **kwargs)
         elif self.model_type == "huggingfacevlm":
-            from alfred.fm.huggingfacevlm import HuggingFaceCLIPModel
+            from .fm.huggingfacevlm import HuggingFaceCLIPModel
 
             self.model = HuggingFaceCLIPModel(self.model, **kwargs)
         elif self.model_type == "huggingfacedocument":
-            from alfred.fm.huggingfacedocument import HuggingFaceDocumentModel
+            from .fm.huggingfacedocument import HuggingFaceDocumentModel
 
             self.model = HuggingFaceDocumentModel(self.model, **kwargs)
         elif self.model_type == "anthropic":
-            from alfred.fm.anthropic import AnthropicModel
+            from .fm.anthropic import AnthropicModel
 
             self.model = AnthropicModel(self.model, **kwargs)
         elif self.model_type == "openai":
-            from alfred.fm.openai import OpenAIModel
+            from .fm.openai import OpenAIModel
 
             self.model = OpenAIModel(self.model, **kwargs)
         elif self.model_type == "cohere":
-            from alfred.fm.cohere import CohereModel
+            from .fm.cohere import CohereModel
 
             self.model = CohereModel(self.model, **kwargs)
         elif self.model_type == "ai21":
-            from alfred.fm.ai21 import AI21Model
+            from .fm.ai21 import AI21Model
 
             self.model = AI21Model(self.model, **kwargs)
+        elif self.model_type == "google":
+            from .fm.google import GoogleModel
+
+            self.model = GoogleModel(self.model, **kwargs)
         elif self.model_type == "dummy":
-            from alfred.fm.dummy import DummyModel
+            from .fm.dummy import DummyModel
 
             self.model = DummyModel(self.model)
         elif self.model_type == "onnx":
-            from alfred.fm.onnx import ONNXModel
+            from .fm.onnx import ONNXModel
 
             self.model = ONNXModel(self.model, **kwargs)
         elif self.model_type == "flexgen":
-            from alfred.fm.flexgen import FlexGenModel
+            from .fm.flexgen import FlexGenModel
 
             self.model = FlexGenModel(self.model, **kwargs)
         elif self.model_type == "vllm":
-            from alfred.fm.vllm import vLLMModel
+            from .fm.vllm import vLLMModel
 
             self.model = vLLMModel(self.model, **kwargs)
         elif self.model_type == "tensorrt":
diff --git a/docs/README.md b/docs/README.md
index c9b1fff..1d7cdee 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -24,6 +24,7 @@ A full list of `Alfred` project modules.
         - [Cohere](alfred/fm/cohere.md#cohere)
         - [Dummy](alfred/fm/dummy.md#dummy)
         - [Flexgen](alfred/fm/flexgen.md#flexgen)
+        - [Google](alfred/fm/google.md#google)
         - [Huggingface](alfred/fm/huggingface.md#huggingface)
         - [Huggingfacedocument](alfred/fm/huggingfacedocument.md#huggingfacedocument)
         - [Huggingfacevlm](alfred/fm/huggingfacevlm.md#huggingfacevlm)
diff --git a/docs/alfred/client/cache/cache.md b/docs/alfred/client/cache/cache.md
index 762ad1c..573f809 100644
--- a/docs/alfred/client/cache/cache.md
+++ b/docs/alfred/client/cache/cache.md
@@ -372,4 +372,6 @@ Type: *str*
 ```python
 def to_metadata_string(**kwargs: Any) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/cache/dummy.md b/docs/alfred/client/cache/dummy.md
index 70d1d36..6c42100 100644
--- a/docs/alfred/client/cache/dummy.md
+++ b/docs/alfred/client/cache/dummy.md
@@ -195,4 +195,6 @@ Write a prompt-response pair to the cache
 ```python
 def write(self, prompt: str, response: str, metadata: Optional[str] = None):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/cache/sqlite.md b/docs/alfred/client/cache/sqlite.md
index c85b425..3d7ddfc 100644
--- a/docs/alfred/client/cache/sqlite.md
+++ b/docs/alfred/client/cache/sqlite.md
@@ -328,4 +328,6 @@ def write_batch(
     self, prompts: List[str], responses: List[str], metadata: Optional[str] = None
 ):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/client.md b/docs/alfred/client/client.md
index 56f3344..8088094 100644
--- a/docs/alfred/client/client.md
+++ b/docs/alfred/client/client.md
@@ -48,7 +48,7 @@ class Client:
 
 ### Client().__call__
 
-[Show source in client.py:303](../../../alfred/client/client.py#L303)
+[Show source in client.py:308](../../../alfred/client/client.py#L308)
 
 __call__() function to run the model on the queries.
 Equivalent to run() function.
@@ -76,7 +76,7 @@ def __call__(
 
 ### Client().calibrate
 
-[Show source in client.py:319](../../../alfred/client/client.py#L319)
+[Show source in client.py:324](../../../alfred/client/client.py#L324)
 
 calibrate are used to calibrate foundation models contextually given the template.
 A voter class may be passed to calibrate the model with a specific voter.
@@ -121,7 +121,7 @@ def calibrate(
 
 ### Client().chat
 
-[Show source in client.py:417](../../../alfred/client/client.py#L417)
+[Show source in client.py:422](../../../alfred/client/client.py#L422)
 
 Chat with the model APIs.
 Currently, Alfred supports Chat APIs from Anthropic and OpenAI
@@ -140,7 +140,7 @@ def chat(self, log_save_path: Optional[str] = None, **kwargs: Any):
 
 ### Client().encode
 
-[Show source in client.py:391](../../../alfred/client/client.py#L391)
+[Show source in client.py:396](../../../alfred/client/client.py#L396)
 
 embed() function to embed the queries.
 
@@ -163,7 +163,7 @@ def encode(
 
 ### Client().generate
 
-[Show source in client.py:262](../../../alfred/client/client.py#L262)
+[Show source in client.py:267](../../../alfred/client/client.py#L267)
 
 Wrapper function to generate the response(s) from the model. (For completion)
 
@@ -192,7 +192,7 @@ def generate(
 
 ### Client().remote_run
 
-[Show source in client.py:236](../../../alfred/client/client.py#L236)
+[Show source in client.py:241](../../../alfred/client/client.py#L241)
 
 Wrapper function for running the model on the queries thru a gRPC Server.
 
@@ -219,7 +219,7 @@ def remote_run(
 
 ### Client().run
 
-[Show source in client.py:216](../../../alfred/client/client.py#L216)
+[Show source in client.py:221](../../../alfred/client/client.py#L221)
 
 Run the model on the queries.
 
@@ -246,7 +246,7 @@ def run(
 
 ### Client().score
 
-[Show source in client.py:279](../../../alfred/client/client.py#L279)
+[Show source in client.py:284](../../../alfred/client/client.py#L284)
 
 Wrapper function to score the response(s) from the model. (For ranking)
 
@@ -276,4 +276,6 @@ def score(
     self, query: Union[RankedQuery, Dict, List[RankedQuery], List[str]], **kwargs: Any
 ) -> Union[Response, List[Response]]:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/ssh/sshtunnel.md b/docs/alfred/client/ssh/sshtunnel.md
index e387db8..57564f2 100644
--- a/docs/alfred/client/ssh/sshtunnel.md
+++ b/docs/alfred/client/ssh/sshtunnel.md
@@ -80,4 +80,6 @@ Stop the tunnel
 ```python
 def stop(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/ssh/utils.md b/docs/alfred/client/ssh/utils.md
index 35129c5..365dd93 100644
--- a/docs/alfred/client/ssh/utils.md
+++ b/docs/alfred/client/ssh/utils.md
@@ -94,4 +94,6 @@ Finds the next available port if given port is not available
 ```python
 def port_finder(port: Union[str, int], host: str = "") -> int:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/data/arrow.md b/docs/alfred/data/arrow.md
index 12952bd..b20f266 100644
--- a/docs/alfred/data/arrow.md
+++ b/docs/alfred/data/arrow.md
@@ -614,4 +614,6 @@ returns the version of the dataset
 ```python
 def version(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/data/dataset.md b/docs/alfred/data/dataset.md
index 5cbc453..ed90f1d 100644
--- a/docs/alfred/data/dataset.md
+++ b/docs/alfred/data/dataset.md
@@ -194,4 +194,6 @@ returns the version of the dataset
 @property
 def version(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/data/wrench.md b/docs/alfred/data/wrench.md
index 4c49d5b..4f2e817 100644
--- a/docs/alfred/data/wrench.md
+++ b/docs/alfred/data/wrench.md
@@ -64,4 +64,6 @@ returns the string representation of the dataset
 ```python
 def __repr__(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/ai21.md b/docs/alfred/fm/ai21.md
index da81656..847c7fa 100644
--- a/docs/alfred/fm/ai21.md
+++ b/docs/alfred/fm/ai21.md
@@ -24,4 +24,6 @@ This class provides a wrapper for the OpenAI API for generating completions.
 class AI21Model(APIAccessFoundationModel):
     def __init__(self, model_string: str = "j1-mid", api_key: Optional[str] = None):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/anthropic.md b/docs/alfred/fm/anthropic.md
index b2c4382..1160250 100644
--- a/docs/alfred/fm/anthropic.md
+++ b/docs/alfred/fm/anthropic.md
@@ -40,4 +40,6 @@ Launch an interactive chat session with the Anthropic API.
 ```python
 def chat(self, **kwargs: Any):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/cohere.md b/docs/alfred/fm/cohere.md
index 03b6b87..8e73833 100644
--- a/docs/alfred/fm/cohere.md
+++ b/docs/alfred/fm/cohere.md
@@ -24,4 +24,6 @@ This class provides a wrapper for the OpenAI API for generating completions.
 class CohereModel(APIAccessFoundationModel):
     def __init__(self, model_string: str = "xlarge", api_key: Optional[str] = None):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/dummy.md b/docs/alfred/fm/dummy.md
index cfe6ff7..20b2092 100644
--- a/docs/alfred/fm/dummy.md
+++ b/docs/alfred/fm/dummy.md
@@ -25,4 +25,6 @@ input as the output for both completion and outputs a raw logit of -1 for scorin
 class DummyModel(LocalAccessFoundationModel):
     def __init__(self, model: Optional[str] = None):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/flexgen.md b/docs/alfred/fm/flexgen.md
index 2f18ca4..b0be305 100644
--- a/docs/alfred/fm/flexgen.md
+++ b/docs/alfred/fm/flexgen.md
@@ -35,4 +35,6 @@ class FlexGenModel(LocalAccessFoundationModel):
         **kwargs: Any
     ):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/huggingface.md b/docs/alfred/fm/huggingface.md
index 41af3ff..d77d031 100644
--- a/docs/alfred/fm/huggingface.md
+++ b/docs/alfred/fm/huggingface.md
@@ -42,4 +42,6 @@ class HuggingFaceModel(LocalAccessFoundationModel):
         tokenizer: Optional[PreTrainedTokenizer] = None,
     ):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/huggingfacedocument.md b/docs/alfred/fm/huggingfacedocument.md
index 872a29d..2ad82cf 100644
--- a/docs/alfred/fm/huggingfacedocument.md
+++ b/docs/alfred/fm/huggingfacedocument.md
@@ -31,4 +31,6 @@ class HuggingFaceDocumentModel(LocalAccessFoundationModel):
         self, model_string: str, local_path: Optional[str] = None, **kwargs: Any
     ):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/huggingfacevlm.md b/docs/alfred/fm/huggingfacevlm.md
index b74c761..5061a37 100644
--- a/docs/alfred/fm/huggingfacevlm.md
+++ b/docs/alfred/fm/huggingfacevlm.md
@@ -29,4 +29,6 @@ class HuggingFaceCLIPModel(LocalAccessFoundationModel):
         text_cache_limit: int = 64,
     ):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/index.md b/docs/alfred/fm/index.md
index c60646b..150f8a9 100644
--- a/docs/alfred/fm/index.md
+++ b/docs/alfred/fm/index.md
@@ -16,6 +16,7 @@ Fm
 - [Cohere](./cohere.md)
 - [Dummy](./dummy.md)
 - [Flexgen](./flexgen.md)
+- [Google](./google.md)
 - [Huggingface](./huggingface.md)
 - [Huggingfacedocument](./huggingfacedocument.md)
 - [Huggingfacevlm](./huggingfacevlm.md)
diff --git a/docs/alfred/fm/model.md b/docs/alfred/fm/model.md
index 5ecdfe2..cc5468b 100644
--- a/docs/alfred/fm/model.md
+++ b/docs/alfred/fm/model.md
@@ -290,4 +290,6 @@ class LocalAccessFoundationModel(FoundationModel):
 
 #### See also
 
-- [FoundationModel](#foundationmodel)
\ No newline at end of file
+- [FoundationModel](#foundationmodel)
+
+
diff --git a/docs/alfred/fm/onnx.md b/docs/alfred/fm/onnx.md
index 60ed2a0..81b3368 100644
--- a/docs/alfred/fm/onnx.md
+++ b/docs/alfred/fm/onnx.md
@@ -26,4 +26,6 @@ class ONNXModel(LocalAccessFoundationModel):
         self, model_string: Optional[str] = None, local_path: Optional[str] = None
     ):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/openai.md b/docs/alfred/fm/openai.md
index 25e213c..622e9c2 100644
--- a/docs/alfred/fm/openai.md
+++ b/docs/alfred/fm/openai.md
@@ -40,4 +40,6 @@ Launch an interactive chat session with the OpenAI API.
 ```python
 def chat(self, **kwargs: Any):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/query/completion_query.md b/docs/alfred/fm/query/completion_query.md
index 35964aa..3a529df 100644
--- a/docs/alfred/fm/query/completion_query.md
+++ b/docs/alfred/fm/query/completion_query.md
@@ -145,4 +145,6 @@ returns the raw prompt content
 @property
 def prompt(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/query/query.md b/docs/alfred/fm/query/query.md
index 3ff4a2c..deedccd 100644
--- a/docs/alfred/fm/query/query.md
+++ b/docs/alfred/fm/query/query.md
@@ -82,4 +82,6 @@ Type: *str*
 ```python
 def serialize(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/query/ranked_query.md b/docs/alfred/fm/query/ranked_query.md
index ef79e4c..660ff01 100644
--- a/docs/alfred/fm/query/ranked_query.md
+++ b/docs/alfred/fm/query/ranked_query.md
@@ -195,4 +195,6 @@ returns the raw prompt content
 @property
 def prompt(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/remote/grpc.md b/docs/alfred/fm/remote/grpc.md
index 77928db..eb2862f 100644
--- a/docs/alfred/fm/remote/grpc.md
+++ b/docs/alfred/fm/remote/grpc.md
@@ -134,4 +134,6 @@ def restart(self):
 ```python
 def serve(self, credentials: Optional[grpc.ServerCredentials] = None):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/remote/protos/query_pb2.md b/docs/alfred/fm/remote/protos/query_pb2.md
index 014445b..841a283 100644
--- a/docs/alfred/fm/remote/protos/query_pb2.md
+++ b/docs/alfred/fm/remote/protos/query_pb2.md
@@ -8,4 +8,5 @@
 Query Pb2
 
 > Auto-generated documentation for [alfred.fm.remote.protos.query_pb2](../../../../../alfred/fm/remote/protos/query_pb2.py) module.
+
 - [Query Pb2](#query-pb2)
diff --git a/docs/alfred/fm/remote/protos/query_pb2_grpc.md b/docs/alfred/fm/remote/protos/query_pb2_grpc.md
index 36c6783..ae81705 100644
--- a/docs/alfred/fm/remote/protos/query_pb2_grpc.md
+++ b/docs/alfred/fm/remote/protos/query_pb2_grpc.md
@@ -146,4 +146,6 @@ class QueryServiceStub(object):
 ```python
 def add_QueryServiceServicer_to_server(servicer, server):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/remote/utils.md b/docs/alfred/fm/remote/utils.md
index ae2855b..6b1bd10 100644
--- a/docs/alfred/fm/remote/utils.md
+++ b/docs/alfred/fm/remote/utils.md
@@ -76,4 +76,6 @@ def port_finder(port: int) -> int:
 ```python
 def tensor_to_bytes(tensor):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/response/completion_response.md b/docs/alfred/fm/response/completion_response.md
index 376988c..209a7bc 100644
--- a/docs/alfred/fm/response/completion_response.md
+++ b/docs/alfred/fm/response/completion_response.md
@@ -120,4 +120,6 @@ Type: *float*
 @property
 def score(self) -> Dict:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/response/ranked_response.md b/docs/alfred/fm/response/ranked_response.md
index d8ccc54..8cbf9b5 100644
--- a/docs/alfred/fm/response/ranked_response.md
+++ b/docs/alfred/fm/response/ranked_response.md
@@ -137,4 +137,6 @@ Type: *dict*
 @property
 def scores(self) -> Dict:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/response/response.md b/docs/alfred/fm/response/response.md
index 5ddfef1..47b647b 100644
--- a/docs/alfred/fm/response/response.md
+++ b/docs/alfred/fm/response/response.md
@@ -100,4 +100,6 @@ Type: *str*
 ```python
 def serialize(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/utils.md b/docs/alfred/fm/utils.md
index 1101458..7422ac5 100644
--- a/docs/alfred/fm/utils.md
+++ b/docs/alfred/fm/utils.md
@@ -17,6 +17,7 @@ Utils
   - [TokenizedBatch](#tokenizedbatch)
   - [bcolors](#bcolors)
   - [batch_multimodal](#batch_multimodal)
+  - [check_pkg_available](#check_pkg_available)
   - [clear_cuda_cache](#clear_cuda_cache)
   - [colorize_str](#colorize_str)
   - [encode_image](#encode_image)
@@ -24,10 +25,11 @@ Utils
   - [reorder_array](#reorder_array)
   - [retry](#retry)
   - [tokenize](#tokenize)
+  - [type_print](#type_print)
 
 ## DynamicBatcher
 
-[Show source in utils.py:291](../../../alfred/fm/utils.py#L291)
+[Show source in utils.py:318](../../../alfred/fm/utils.py#L318)
 
 Dynamic Batching Utility
 Maximize GPU Utilization by batching queries of similar sizes
@@ -48,7 +50,7 @@ class DynamicBatcher:
 
 ### DynamicBatcher().batch
 
-[Show source in utils.py:420](../../../alfred/fm/utils.py#L420)
+[Show source in utils.py:447](../../../alfred/fm/utils.py#L447)
 
 Batch a list of instances into a list of batches.
 If the instances are of different sizes, they will be sorted by size
@@ -68,7 +70,7 @@ def batch(self) -> List:
 
 ### DynamicBatcher().merge_rank_response
 
-[Show source in utils.py:336](../../../alfred/fm/utils.py#L336)
+[Show source in utils.py:363](../../../alfred/fm/utils.py#L363)
 
 Merge a list of responses with raw logit into a single RankedResponse
 Assumption: Candidate Order is the same across all ranked queries
@@ -96,7 +98,7 @@ def merge_rank_response(
 
 ### DynamicBatcher().reorder
 
-[Show source in utils.py:379](../../../alfred/fm/utils.py#L379)
+[Show source in utils.py:406](../../../alfred/fm/utils.py#L406)
 
 Reordering the responses according to the original order of the queries
 
@@ -123,7 +125,7 @@ def reorder(self, inst: List, offset: Optional[int] = None) -> List:
 
 ## EmbeddingCache
 
-[Show source in utils.py:214](../../../alfred/fm/utils.py#L214)
+[Show source in utils.py:241](../../../alfred/fm/utils.py#L241)
 
 A simple embedding cache for VLM models
 
@@ -137,7 +139,7 @@ class EmbeddingCache:
 
 ### EmbeddingCache().get
 
-[Show source in utils.py:240](../../../alfred/fm/utils.py#L240)
+[Show source in utils.py:267](../../../alfred/fm/utils.py#L267)
 
 Process the inputs and retrieve from the cache/embed the inputs
 
@@ -166,7 +168,7 @@ def get(
 
 ## TokenizedBatch
 
-[Show source in utils.py:280](../../../alfred/fm/utils.py#L280)
+[Show source in utils.py:307](../../../alfred/fm/utils.py#L307)
 
 #### Signature
 
@@ -180,7 +182,7 @@ class TokenizedBatch:
 
 ## bcolors
 
-[Show source in utils.py:188](../../../alfred/fm/utils.py#L188)
+[Show source in utils.py:215](../../../alfred/fm/utils.py#L215)
 
 #### Signature
 
@@ -220,6 +222,31 @@ def batch_multimodal(queries: List[Query], mode: str, batch_size=64):
 
 
 
+## check_pkg_available
+
+[Show source in utils.py:153](../../../alfred/fm/utils.py#L153)
+
+Check if a package is available
+
+#### Arguments
+
+- `pkg_name` - The name of the package
+:type pkg_name: str
+
+#### Returns
+
+Whether the package is available
+Type: *bool*
+
+#### Signature
+
+```python
+def check_pkg_available(pkg_name: str) -> bool:
+    ...
+```
+
+
+
 ## clear_cuda_cache
 
 [Show source in utils.py:24](../../../alfred/fm/utils.py#L24)
@@ -237,7 +264,7 @@ def clear_cuda_cache():
 
 ## colorize_str
 
-[Show source in utils.py:200](../../../alfred/fm/utils.py#L200)
+[Show source in utils.py:227](../../../alfred/fm/utils.py#L227)
 
 #### Signature
 
@@ -332,7 +359,7 @@ def reorder_array(
 
 ## retry
 
-[Show source in utils.py:153](../../../alfred/fm/utils.py#L153)
+[Show source in utils.py:180](../../../alfred/fm/utils.py#L180)
 
 A decorator to retry a function call if it raises an exception.
 
@@ -386,4 +413,21 @@ Type: *List[int]*
 ```python
 def tokenize(inst, tokenizer, max_length=512):
     ...
-```
\ No newline at end of file
+```
+
+
+
+## type_print
+
+[Show source in utils.py:169](../../../alfred/fm/utils.py#L169)
+
+Print a string word by word to simulate typing
+
+#### Signature
+
+```python
+def type_print(string, interval=0.07, newline=False):
+    ...
+```
+
+
diff --git a/docs/alfred/fm/vllm.md b/docs/alfred/fm/vllm.md
index 55c9429..19ef4d6 100644
--- a/docs/alfred/fm/vllm.md
+++ b/docs/alfred/fm/vllm.md
@@ -24,4 +24,6 @@ source: https://github.com/vllm-project/vllm
 class vLLMModel(LocalAccessFoundationModel):
     def __init__(self, model: str, local_dir: str = None, **kwargs: Any):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/flyingsquid.md b/docs/alfred/labeling/flyingsquid.md
index 4291520..b7cfeed 100644
--- a/docs/alfred/labeling/flyingsquid.md
+++ b/docs/alfred/labeling/flyingsquid.md
@@ -34,4 +34,6 @@ class FlyingSquid(LabelModel):
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/labelmodel.md b/docs/alfred/labeling/labelmodel.md
index e6b4f85..4cc5514 100644
--- a/docs/alfred/labeling/labelmodel.md
+++ b/docs/alfred/labeling/labelmodel.md
@@ -49,4 +49,6 @@ def __call__(self, votes):
 @abc.abstractmethod
 def label(self, votes):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/majority_vote.md b/docs/alfred/labeling/majority_vote.md
index c371a54..57fe484 100644
--- a/docs/alfred/labeling/majority_vote.md
+++ b/docs/alfred/labeling/majority_vote.md
@@ -36,4 +36,6 @@ returns the majority vote for each response row
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/naive_bayes.md b/docs/alfred/labeling/naive_bayes.md
index 733c404..98ad442 100644
--- a/docs/alfred/labeling/naive_bayes.md
+++ b/docs/alfred/labeling/naive_bayes.md
@@ -47,4 +47,6 @@ Type: *np.ndarray*
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/nplm.md b/docs/alfred/labeling/nplm.md
index 4a2edc5..f5009f1 100644
--- a/docs/alfred/labeling/nplm.md
+++ b/docs/alfred/labeling/nplm.md
@@ -52,4 +52,6 @@ Type: *np.ndarray*
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/run_client_api.md b/docs/alfred/run_client_api.md
index 4d791bd..11d3afc 100644
--- a/docs/alfred/run_client_api.md
+++ b/docs/alfred/run_client_api.md
@@ -366,4 +366,6 @@ async def set_alfred_server_webhook_port(request: Request):
 @alfred_app.get("/status")
 async def status():
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/run_server.md b/docs/alfred/run_server.md
index f389c7c..425d628 100644
--- a/docs/alfred/run_server.md
+++ b/docs/alfred/run_server.md
@@ -14,8 +14,8 @@ Run Server
 
 [Show source in run_server.py:16](../../alfred/run_server.py#L16)
 
-ModelServer is the server-side interface that wraps a certain alfred.fm class.
-ModelServer is used to launch the specified alfred.fm model as a gRPC Server and find the proper port.
+ModelServer is the server-side interface that wraps a certain alferd.fm class.
+ModelServer is used to launch the specified alferd.fm model as a gRPC Server and find the proper port.
 
 #### Signature
 
@@ -29,7 +29,7 @@ class ModelServer:
 
 ## start_server
 
-[Show source in run_server.py:122](../../alfred/run_server.py#L122)
+[Show source in run_server.py:127](../../alfred/run_server.py#L127)
 
 Wrapper function to start gRPC Server.
 
@@ -43,4 +43,6 @@ Wrapper function to start gRPC Server.
 ```python
 def start_server(args: argparse.Namespace):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/template/image_template.md b/docs/alfred/template/image_template.md
index 20d384d..d3aee78 100644
--- a/docs/alfred/template/image_template.md
+++ b/docs/alfred/template/image_template.md
@@ -293,4 +293,6 @@ returns the template type
 @property
 def type(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/template/string_template.md b/docs/alfred/template/string_template.md
index 026db05..8cb3fb5 100644
--- a/docs/alfred/template/string_template.md
+++ b/docs/alfred/template/string_template.md
@@ -334,4 +334,6 @@ returns the template type
 @property
 def type(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/template/template.md b/docs/alfred/template/template.md
index 6dc779c..a39504a 100644
--- a/docs/alfred/template/template.md
+++ b/docs/alfred/template/template.md
@@ -219,4 +219,6 @@ returns the type of the template
 @abc.abstractmethod
 def type(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/voter/voter.md b/docs/alfred/voter/voter.md
index bd3be19..34872f2 100644
--- a/docs/alfred/voter/voter.md
+++ b/docs/alfred/voter/voter.md
@@ -114,4 +114,6 @@ def vote(
 
 #### See also
 
-- [Response](../fm/response/response.md#response)
\ No newline at end of file
+- [Response](../fm/response/response.md#response)
+
+