diff --git a/alfred/fm/utils.py b/alfred/fm/utils.py
index 536f134..0e9a5d4 100644
--- a/alfred/fm/utils.py
+++ b/alfred/fm/utils.py
@@ -277,6 +277,8 @@ def __init__(
         self.max_batch_size = max_batch_size
         self.ranked = False
         self.tokenizer = tokenizer
+        if self.tokenizer:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
         self.max_token_length = max_token_length
 
         if isinstance(self.queries[0], RankedQuery):
diff --git a/docs/alfred/client/cache/cache.md b/docs/alfred/client/cache/cache.md
index 932dfe2..b3d0296 100644
--- a/docs/alfred/client/cache/cache.md
+++ b/docs/alfred/client/cache/cache.md
@@ -372,4 +372,6 @@ Type: *str*
 ```python
 def to_metadata_string(**kwargs: Any) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/cache/dummy.md b/docs/alfred/client/cache/dummy.md
index ecdca0f..19892bf 100644
--- a/docs/alfred/client/cache/dummy.md
+++ b/docs/alfred/client/cache/dummy.md
@@ -195,4 +195,6 @@ Write a prompt-response pair to the cache
 ```python
 def write(self, prompt: str, response: str, metadata: Optional[str] = None):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/cache/sqlite.md b/docs/alfred/client/cache/sqlite.md
index bb5a834..a8422ae 100644
--- a/docs/alfred/client/cache/sqlite.md
+++ b/docs/alfred/client/cache/sqlite.md
@@ -328,4 +328,6 @@ def write_batch(
     self, prompts: List[str], responses: List[str], metadata: Optional[str] = None
 ):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/client.md b/docs/alfred/client/client.md
index c628df6..aa8f394 100644
--- a/docs/alfred/client/client.md
+++ b/docs/alfred/client/client.md
@@ -275,4 +275,6 @@ def score(
     self, query: Union[RankedQuery, Dict, List[RankedQuery], List[str]], **kwargs: Any
 ) -> Union[Response, List[Response]]:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/ssh/sshtunnel.md b/docs/alfred/client/ssh/sshtunnel.md
index ca7dec9..36c88ab 100644
--- a/docs/alfred/client/ssh/sshtunnel.md
+++ b/docs/alfred/client/ssh/sshtunnel.md
@@ -79,4 +79,6 @@ Stop the tunnel
 ```python
 def stop(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/client/ssh/utils.md b/docs/alfred/client/ssh/utils.md
index f17444f..c918781 100644
--- a/docs/alfred/client/ssh/utils.md
+++ b/docs/alfred/client/ssh/utils.md
@@ -94,4 +94,6 @@ Finds the next available port if given port is not available
 ```python
 def port_finder(port: Union[str, int], host: str = "") -> int:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/data/arrow.md b/docs/alfred/data/arrow.md
index 0dec50f..af302df 100644
--- a/docs/alfred/data/arrow.md
+++ b/docs/alfred/data/arrow.md
@@ -614,4 +614,6 @@ returns the version of the dataset
 ```python
 def version(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/data/dataset.md b/docs/alfred/data/dataset.md
index 8fc9cca..3ddc8d5 100644
--- a/docs/alfred/data/dataset.md
+++ b/docs/alfred/data/dataset.md
@@ -194,4 +194,6 @@ returns the version of the dataset
 @property
 def version(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/data/wrench.md b/docs/alfred/data/wrench.md
index 5600925..08e3213 100644
--- a/docs/alfred/data/wrench.md
+++ b/docs/alfred/data/wrench.md
@@ -64,4 +64,6 @@ returns the string representation of the dataset
 ```python
 def __repr__(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/ai21.md b/docs/alfred/fm/ai21.md
index fa32f4e..0d0920a 100644
--- a/docs/alfred/fm/ai21.md
+++ b/docs/alfred/fm/ai21.md
@@ -24,4 +24,6 @@ This class provides a wrapper for the OpenAI API for generating completions.
 class AI21Model(APIAccessFoundationModel):
     def __init__(self, model_string: str = "j1-large", api_key: Optional[str] = None):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/anthropic.md b/docs/alfred/fm/anthropic.md
index 6d047ab..b64b19f 100644
--- a/docs/alfred/fm/anthropic.md
+++ b/docs/alfred/fm/anthropic.md
@@ -38,4 +38,6 @@ Launch an interactive chat session with the Anthropic API.
 ```python
 def chat(self, **kwargs: Any):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/cohere.md b/docs/alfred/fm/cohere.md
index 03b6b87..8e73833 100644
--- a/docs/alfred/fm/cohere.md
+++ b/docs/alfred/fm/cohere.md
@@ -24,4 +24,6 @@ This class provides a wrapper for the OpenAI API for generating completions.
 class CohereModel(APIAccessFoundationModel):
     def __init__(self, model_string: str = "xlarge", api_key: Optional[str] = None):
         ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/dummy.md b/docs/alfred/fm/dummy.md
index b40d198..454f0ba 100644
--- a/docs/alfred/fm/dummy.md
+++ b/docs/alfred/fm/dummy.md
@@ -29,4 +29,6 @@ class DummyModel(LocalAccessFoundationModel):
 
 #### See also
 
-- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
\ No newline at end of file
+- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
+
+
diff --git a/docs/alfred/fm/flexgen.md b/docs/alfred/fm/flexgen.md
new file mode 100644
index 0000000..d30481b
--- /dev/null
+++ b/docs/alfred/fm/flexgen.md
@@ -0,0 +1,44 @@
+# Flexgen
+
+[Alfred Index](../../README.md#alfred-index) /
+[Alfred](../index.md#alfred) /
+[Fm](./index.md#fm) /
+Flexgen
+
+> Auto-generated documentation for [alfred.fm.flexgen](../../../alfred/fm/flexgen.py) module.
+
+- [Flexgen](#flexgen)
+  - [FlexGenModel](#flexgenmodel)
+
+## FlexGenModel
+
+[Show source in flexgen.py:13](../../../alfred/fm/flexgen.py#L13)
+
+FlexGenModel wraps a FlexGen model. FlexGen is used for High-throughput generative inference with single GPU.
+
+Currently, FlexGen supports OPT style models.
+
+source: https://github.com/FMInference/FlexGen
+paper: https://arxiv.org/pdf/2303.06865.pdf
+
+#### Signature
+
+```python
+class FlexGenModel(LocalAccessFoundationModel):
+    def __init__(
+        self,
+        model: str,
+        local_dir: str,
+        model_string: str,
+        policy: Union[List, Policy] = (100, 0, 100, 0, 100, 0),
+        offload_dir: str = "./flexgen_offload_cache",
+        **kwargs: Any
+    ):
+        ...
+```
+
+#### See also
+
+- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
+
+
diff --git a/docs/alfred/fm/huggingface.md b/docs/alfred/fm/huggingface.md
index 104c723..b5d6d16 100644
--- a/docs/alfred/fm/huggingface.md
+++ b/docs/alfred/fm/huggingface.md
@@ -45,4 +45,6 @@ class HuggingFaceModel(LocalAccessFoundationModel):
 
 #### See also
 
-- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
\ No newline at end of file
+- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
+
+
diff --git a/docs/alfred/fm/huggingfacevlm.md b/docs/alfred/fm/huggingfacevlm.md
index 51473ce..77a1677 100644
--- a/docs/alfred/fm/huggingfacevlm.md
+++ b/docs/alfred/fm/huggingfacevlm.md
@@ -33,4 +33,6 @@ class HuggingFaceCLIPModel(LocalAccessFoundationModel):
 
 #### See also
 
-- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
\ No newline at end of file
+- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
+
+
diff --git a/docs/alfred/fm/model.md b/docs/alfred/fm/model.md
index 710a22c..1778933 100644
--- a/docs/alfred/fm/model.md
+++ b/docs/alfred/fm/model.md
@@ -286,4 +286,6 @@ class LocalAccessFoundationModel(FoundationModel):
 
 #### See also
 
-- [FoundationModel](#foundationmodel)
\ No newline at end of file
+- [FoundationModel](#foundationmodel)
+
+
diff --git a/docs/alfred/fm/onnx.md b/docs/alfred/fm/onnx.md
index 449923b..f21acab 100644
--- a/docs/alfred/fm/onnx.md
+++ b/docs/alfred/fm/onnx.md
@@ -30,4 +30,6 @@ class ONNXModel(LocalAccessFoundationModel):
 
 #### See also
 
-- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
\ No newline at end of file
+- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel)
+
+
diff --git a/docs/alfred/fm/openai.md b/docs/alfred/fm/openai.md
index a127a2b..fb23699 100644
--- a/docs/alfred/fm/openai.md
+++ b/docs/alfred/fm/openai.md
@@ -13,7 +13,7 @@ Openai
 
 ## OpenAIModel
 
-[Show source in openai.py:49](../../../alfred/fm/openai.py#L49)
+[Show source in openai.py:50](../../../alfred/fm/openai.py#L50)
 
 A wrapper for the OpenAI API.
 
@@ -31,7 +31,7 @@ class OpenAIModel(APIAccessFoundationModel):
 
 ### OpenAIModel().chat
 
-[Show source in openai.py:228](../../../alfred/fm/openai.py#L228)
+[Show source in openai.py:229](../../../alfred/fm/openai.py#L229)
 
 Launch an interactive chat session with the OpenAI API.
 
@@ -40,4 +40,6 @@ Launch an interactive chat session with the OpenAI API.
 ```python
 def chat(self, **kwargs: Any):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/query/completion_query.md b/docs/alfred/fm/query/completion_query.md
index b0e98c8..8339e34 100644
--- a/docs/alfred/fm/query/completion_query.md
+++ b/docs/alfred/fm/query/completion_query.md
@@ -143,4 +143,6 @@ returns the raw prompt content
 @property
 def prompt(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/query/query.md b/docs/alfred/fm/query/query.md
index acbdcc4..92c0da6 100644
--- a/docs/alfred/fm/query/query.md
+++ b/docs/alfred/fm/query/query.md
@@ -82,4 +82,6 @@ Type: *str*
 ```python
 def serialize(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/query/ranked_query.md b/docs/alfred/fm/query/ranked_query.md
index 57bd2ec..c1c68f1 100644
--- a/docs/alfred/fm/query/ranked_query.md
+++ b/docs/alfred/fm/query/ranked_query.md
@@ -195,4 +195,6 @@ returns the raw prompt content
 @property
 def prompt(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/remote/grpc.md b/docs/alfred/fm/remote/grpc.md
index dfa9880..b921839 100644
--- a/docs/alfred/fm/remote/grpc.md
+++ b/docs/alfred/fm/remote/grpc.md
@@ -132,4 +132,6 @@ def restart(self):
 ```python
 def serve(self, credentials: Optional[grpc.ServerCredentials] = None):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/remote/protos/query_pb2.md b/docs/alfred/fm/remote/protos/query_pb2.md
index 014445b..841a283 100644
--- a/docs/alfred/fm/remote/protos/query_pb2.md
+++ b/docs/alfred/fm/remote/protos/query_pb2.md
@@ -8,4 +8,5 @@
 Query Pb2
 
 > Auto-generated documentation for [alfred.fm.remote.protos.query_pb2](../../../../../alfred/fm/remote/protos/query_pb2.py) module.
+
 - [Query Pb2](#query-pb2)
diff --git a/docs/alfred/fm/remote/protos/query_pb2_grpc.md b/docs/alfred/fm/remote/protos/query_pb2_grpc.md
index 781e262..b1fb984 100644
--- a/docs/alfred/fm/remote/protos/query_pb2_grpc.md
+++ b/docs/alfred/fm/remote/protos/query_pb2_grpc.md
@@ -146,4 +146,6 @@ class QueryServiceStub(object):
 ```python
 def add_QueryServiceServicer_to_server(servicer, server):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/remote/utils.md b/docs/alfred/fm/remote/utils.md
index f97bf67..ed39c98 100644
--- a/docs/alfred/fm/remote/utils.md
+++ b/docs/alfred/fm/remote/utils.md
@@ -76,4 +76,6 @@ def port_finder(port: int) -> int:
 ```python
 def tensor_to_bytes(tensor):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/response/completion_response.md b/docs/alfred/fm/response/completion_response.md
index 3277ba5..cdfac42 100644
--- a/docs/alfred/fm/response/completion_response.md
+++ b/docs/alfred/fm/response/completion_response.md
@@ -120,4 +120,6 @@ Type: *float*
 @property
 def score(self) -> Dict:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/response/ranked_response.md b/docs/alfred/fm/response/ranked_response.md
index cf1e961..4c9e97f 100644
--- a/docs/alfred/fm/response/ranked_response.md
+++ b/docs/alfred/fm/response/ranked_response.md
@@ -137,4 +137,6 @@ Type: *dict*
 @property
 def scores(self) -> Dict:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/response/response.md b/docs/alfred/fm/response/response.md
index 389184c..55d556e 100644
--- a/docs/alfred/fm/response/response.md
+++ b/docs/alfred/fm/response/response.md
@@ -100,4 +100,6 @@ Type: *str*
 ```python
 def serialize(self) -> str:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/fm/utils.md b/docs/alfred/fm/utils.md
index 3825d7c..4c543ad 100644
--- a/docs/alfred/fm/utils.md
+++ b/docs/alfred/fm/utils.md
@@ -47,7 +47,7 @@ class DynamicBatcher:
 
 ### DynamicBatcher().batch
 
-[Show source in utils.py:375](../../../alfred/fm/utils.py#L375)
+[Show source in utils.py:377](../../../alfred/fm/utils.py#L377)
 
 Batch a list of instances into a list of batches.
 If the instances are of different sizes, they will be sorted by size
@@ -67,7 +67,7 @@ def batch(self) -> List:
 
 ### DynamicBatcher().merge_rank_response
 
-[Show source in utils.py:289](../../../alfred/fm/utils.py#L289)
+[Show source in utils.py:291](../../../alfred/fm/utils.py#L291)
 
 Merge a list of responses with raw logit into a single RankedResponse
 Assumption: Candidate Order is the same across all ranked queries
@@ -95,7 +95,7 @@ def merge_rank_response(
 
 ### DynamicBatcher().reorder
 
-[Show source in utils.py:334](../../../alfred/fm/utils.py#L334)
+[Show source in utils.py:336](../../../alfred/fm/utils.py#L336)
 
 Reordering the responses according to the original order of the queries
 
@@ -361,4 +361,6 @@ Type: *List[int]*
 ```python
 def tokenize(inst, tokenizer, max_length=512):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/flyingsquid.md b/docs/alfred/labeling/flyingsquid.md
index eec789f..73dae10 100644
--- a/docs/alfred/labeling/flyingsquid.md
+++ b/docs/alfred/labeling/flyingsquid.md
@@ -34,4 +34,6 @@ class FlyingSquid(LabelModel):
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/labelmodel.md b/docs/alfred/labeling/labelmodel.md
index bea20d8..291f46e 100644
--- a/docs/alfred/labeling/labelmodel.md
+++ b/docs/alfred/labeling/labelmodel.md
@@ -49,4 +49,6 @@ def __call__(self, votes):
 @abc.abstractmethod
 def label(self, votes):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/majority_vote.md b/docs/alfred/labeling/majority_vote.md
index 94294e2..56dbf7e 100644
--- a/docs/alfred/labeling/majority_vote.md
+++ b/docs/alfred/labeling/majority_vote.md
@@ -36,4 +36,6 @@ returns the majority vote for each response row
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/naive_bayes.md b/docs/alfred/labeling/naive_bayes.md
index 5b24189..9662eab 100644
--- a/docs/alfred/labeling/naive_bayes.md
+++ b/docs/alfred/labeling/naive_bayes.md
@@ -47,4 +47,6 @@ Type: *np.ndarray*
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/labeling/nplm.md b/docs/alfred/labeling/nplm.md
index 132833a..6d7be6e 100644
--- a/docs/alfred/labeling/nplm.md
+++ b/docs/alfred/labeling/nplm.md
@@ -52,4 +52,6 @@ Type: *np.ndarray*
 ```python
 def label(self, votes: np.ndarray) -> np.ndarray:
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/run_client_api.md b/docs/alfred/run_client_api.md
index d1ea282..82faf32 100644
--- a/docs/alfred/run_client_api.md
+++ b/docs/alfred/run_client_api.md
@@ -366,4 +366,6 @@ async def set_alfred_server_webhook_port(request: Request):
 @alfred_app.get("/status")
 async def status():
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/run_server.md b/docs/alfred/run_server.md
index 9b75516..42ea36d 100644
--- a/docs/alfred/run_server.md
+++ b/docs/alfred/run_server.md
@@ -43,4 +43,6 @@ Wrapper function to start gRPC Server.
 ```python
 def start_server(args: argparse.Namespace):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/template/image_template.md b/docs/alfred/template/image_template.md
index b3f7c99..7f57260 100644
--- a/docs/alfred/template/image_template.md
+++ b/docs/alfred/template/image_template.md
@@ -293,4 +293,6 @@ returns the template type
 @property
 def type(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/template/string_template.md b/docs/alfred/template/string_template.md
index cde3ca0..8790f42 100644
--- a/docs/alfred/template/string_template.md
+++ b/docs/alfred/template/string_template.md
@@ -334,4 +334,6 @@ returns the template type
 @property
 def type(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/template/template.md b/docs/alfred/template/template.md
index 1377eb9..9f17393 100644
--- a/docs/alfred/template/template.md
+++ b/docs/alfred/template/template.md
@@ -219,4 +219,6 @@ returns the type of the template
 @abc.abstractmethod
 def type(self):
     ...
-```
\ No newline at end of file
+```
+
+
diff --git a/docs/alfred/voter/voter.md b/docs/alfred/voter/voter.md
index 760c81e..b61a0c4 100644
--- a/docs/alfred/voter/voter.md
+++ b/docs/alfred/voter/voter.md
@@ -114,4 +114,6 @@ def vote(
 
 #### See also
 
-- [Response](../fm/response/response.md#response)
\ No newline at end of file
+- [Response](../fm/response/response.md#response)
+
+
diff --git a/requirements.txt b/requirements.txt
index 9c692d8..e1d6999 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,9 +2,9 @@ datasets~=2.4.0
 paramiko>=2.7.2
 pyarrow>=3.0.0
 torch>=1.8.0
-accelerate~=0.20.3
 
-transformers>=4.20.0
+transformers
+accelerate
 numpy>=1.21.0
 tqdm>=4.62.3
 pandas>=1.3.5