diff --git a/alfred/fm/utils.py b/alfred/fm/utils.py index 536f134..0e9a5d4 100644 --- a/alfred/fm/utils.py +++ b/alfred/fm/utils.py @@ -277,6 +277,8 @@ def __init__( self.max_batch_size = max_batch_size self.ranked = False self.tokenizer = tokenizer + if self.tokenizer: + self.tokenizer.pad_token = self.tokenizer.eos_token self.max_token_length = max_token_length if isinstance(self.queries[0], RankedQuery): diff --git a/docs/alfred/client/cache/cache.md b/docs/alfred/client/cache/cache.md index 932dfe2..b3d0296 100644 --- a/docs/alfred/client/cache/cache.md +++ b/docs/alfred/client/cache/cache.md @@ -372,4 +372,6 @@ Type: *str* ```python def to_metadata_string(**kwargs: Any) -> str: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/client/cache/dummy.md b/docs/alfred/client/cache/dummy.md index ecdca0f..19892bf 100644 --- a/docs/alfred/client/cache/dummy.md +++ b/docs/alfred/client/cache/dummy.md @@ -195,4 +195,6 @@ Write a prompt-response pair to the cache ```python def write(self, prompt: str, response: str, metadata: Optional[str] = None): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/client/cache/sqlite.md b/docs/alfred/client/cache/sqlite.md index bb5a834..a8422ae 100644 --- a/docs/alfred/client/cache/sqlite.md +++ b/docs/alfred/client/cache/sqlite.md @@ -328,4 +328,6 @@ def write_batch( self, prompts: List[str], responses: List[str], metadata: Optional[str] = None ): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/client/client.md b/docs/alfred/client/client.md index c628df6..aa8f394 100644 --- a/docs/alfred/client/client.md +++ b/docs/alfred/client/client.md @@ -275,4 +275,6 @@ def score( self, query: Union[RankedQuery, Dict, List[RankedQuery], List[str]], **kwargs: Any ) -> Union[Response, List[Response]]: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/client/ssh/sshtunnel.md b/docs/alfred/client/ssh/sshtunnel.md index ca7dec9..36c88ab 100644 --- a/docs/alfred/client/ssh/sshtunnel.md +++ b/docs/alfred/client/ssh/sshtunnel.md @@ -79,4 +79,6 @@ Stop the tunnel ```python def stop(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/client/ssh/utils.md b/docs/alfred/client/ssh/utils.md index f17444f..c918781 100644 --- a/docs/alfred/client/ssh/utils.md +++ b/docs/alfred/client/ssh/utils.md @@ -94,4 +94,6 @@ Finds the next available port if given port is not available ```python def port_finder(port: Union[str, int], host: str = "") -> int: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/data/arrow.md b/docs/alfred/data/arrow.md index 0dec50f..af302df 100644 --- a/docs/alfred/data/arrow.md +++ b/docs/alfred/data/arrow.md @@ -614,4 +614,6 @@ returns the version of the dataset ```python def version(self) -> str: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/data/dataset.md b/docs/alfred/data/dataset.md index 8fc9cca..3ddc8d5 100644 --- a/docs/alfred/data/dataset.md +++ b/docs/alfred/data/dataset.md @@ -194,4 +194,6 @@ returns the version of the dataset @property def version(self) -> str: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/data/wrench.md b/docs/alfred/data/wrench.md index 5600925..08e3213 100644 --- a/docs/alfred/data/wrench.md +++ b/docs/alfred/data/wrench.md @@ -64,4 +64,6 @@ returns the string representation of the dataset ```python def __repr__(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/ai21.md b/docs/alfred/fm/ai21.md index fa32f4e..0d0920a 100644 --- a/docs/alfred/fm/ai21.md +++ b/docs/alfred/fm/ai21.md @@ -24,4 +24,6 @@ This class provides a wrapper for the OpenAI API for generating completions. class AI21Model(APIAccessFoundationModel): def __init__(self, model_string: str = "j1-large", api_key: Optional[str] = None): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/anthropic.md b/docs/alfred/fm/anthropic.md index 6d047ab..b64b19f 100644 --- a/docs/alfred/fm/anthropic.md +++ b/docs/alfred/fm/anthropic.md @@ -38,4 +38,6 @@ Launch an interactive chat session with the Anthropic API. ```python def chat(self, **kwargs: Any): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/cohere.md b/docs/alfred/fm/cohere.md index 03b6b87..8e73833 100644 --- a/docs/alfred/fm/cohere.md +++ b/docs/alfred/fm/cohere.md @@ -24,4 +24,6 @@ This class provides a wrapper for the OpenAI API for generating completions. class CohereModel(APIAccessFoundationModel): def __init__(self, model_string: str = "xlarge", api_key: Optional[str] = None): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/dummy.md b/docs/alfred/fm/dummy.md index b40d198..454f0ba 100644 --- a/docs/alfred/fm/dummy.md +++ b/docs/alfred/fm/dummy.md @@ -29,4 +29,6 @@ class DummyModel(LocalAccessFoundationModel): #### See also -- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) \ No newline at end of file +- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) + + diff --git a/docs/alfred/fm/flexgen.md b/docs/alfred/fm/flexgen.md new file mode 100644 index 0000000..d30481b --- /dev/null +++ b/docs/alfred/fm/flexgen.md @@ -0,0 +1,44 @@ +# Flexgen + +[Alfred Index](../../README.md#alfred-index) / +[Alfred](../index.md#alfred) / +[Fm](./index.md#fm) / +Flexgen + +> Auto-generated documentation for [alfred.fm.flexgen](../../../alfred/fm/flexgen.py) module. + +- [Flexgen](#flexgen) + - [FlexGenModel](#flexgenmodel) + +## FlexGenModel + +[Show source in flexgen.py:13](../../../alfred/fm/flexgen.py#L13) + +FlexGenModel wraps a FlexGen model. FlexGen is used for High-throughput generative inference with single GPU. + +Currently, FlexGen supports OPT style models. + +source: https://github.com/FMInference/FlexGen +paper: https://arxiv.org/pdf/2303.06865.pdf + +#### Signature + +```python +class FlexGenModel(LocalAccessFoundationModel): + def __init__( + self, + model: str, + local_dir: str, + model_string: str, + policy: Union[List, Policy] = (100, 0, 100, 0, 100, 0), + offload_dir: str = "./flexgen_offload_cache", + **kwargs: Any + ): + ... +``` + +#### See also + +- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) + + diff --git a/docs/alfred/fm/huggingface.md b/docs/alfred/fm/huggingface.md index 104c723..b5d6d16 100644 --- a/docs/alfred/fm/huggingface.md +++ b/docs/alfred/fm/huggingface.md @@ -45,4 +45,6 @@ class HuggingFaceModel(LocalAccessFoundationModel): #### See also -- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) \ No newline at end of file +- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) + + diff --git a/docs/alfred/fm/huggingfacevlm.md b/docs/alfred/fm/huggingfacevlm.md index 51473ce..77a1677 100644 --- a/docs/alfred/fm/huggingfacevlm.md +++ b/docs/alfred/fm/huggingfacevlm.md @@ -33,4 +33,6 @@ class HuggingFaceCLIPModel(LocalAccessFoundationModel): #### See also -- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) \ No newline at end of file +- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) + + diff --git a/docs/alfred/fm/model.md b/docs/alfred/fm/model.md index 710a22c..1778933 100644 --- a/docs/alfred/fm/model.md +++ b/docs/alfred/fm/model.md @@ -286,4 +286,6 @@ class LocalAccessFoundationModel(FoundationModel): #### See also -- [FoundationModel](#foundationmodel) \ No newline at end of file +- [FoundationModel](#foundationmodel) + + diff --git a/docs/alfred/fm/onnx.md b/docs/alfred/fm/onnx.md index 449923b..f21acab 100644 --- a/docs/alfred/fm/onnx.md +++ b/docs/alfred/fm/onnx.md @@ -30,4 +30,6 @@ class ONNXModel(LocalAccessFoundationModel): #### See also -- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) \ No newline at end of file +- [LocalAccessFoundationModel](./model.md#localaccessfoundationmodel) + + diff --git a/docs/alfred/fm/openai.md b/docs/alfred/fm/openai.md index a127a2b..fb23699 100644 --- a/docs/alfred/fm/openai.md +++ b/docs/alfred/fm/openai.md @@ -13,7 +13,7 @@ Openai ## OpenAIModel -[Show source in openai.py:49](../../../alfred/fm/openai.py#L49) +[Show source in openai.py:50](../../../alfred/fm/openai.py#L50) A wrapper for the OpenAI API. @@ -31,7 +31,7 @@ class OpenAIModel(APIAccessFoundationModel): ### OpenAIModel().chat -[Show source in openai.py:228](../../../alfred/fm/openai.py#L228) +[Show source in openai.py:229](../../../alfred/fm/openai.py#L229) Launch an interactive chat session with the OpenAI API. @@ -40,4 +40,6 @@ Launch an interactive chat session with the OpenAI API. ```python def chat(self, **kwargs: Any): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/query/completion_query.md b/docs/alfred/fm/query/completion_query.md index b0e98c8..8339e34 100644 --- a/docs/alfred/fm/query/completion_query.md +++ b/docs/alfred/fm/query/completion_query.md @@ -143,4 +143,6 @@ returns the raw prompt content @property def prompt(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/query/query.md b/docs/alfred/fm/query/query.md index acbdcc4..92c0da6 100644 --- a/docs/alfred/fm/query/query.md +++ b/docs/alfred/fm/query/query.md @@ -82,4 +82,6 @@ Type: *str* ```python def serialize(self) -> str: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/query/ranked_query.md b/docs/alfred/fm/query/ranked_query.md index 57bd2ec..c1c68f1 100644 --- a/docs/alfred/fm/query/ranked_query.md +++ b/docs/alfred/fm/query/ranked_query.md @@ -195,4 +195,6 @@ returns the raw prompt content @property def prompt(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/remote/grpc.md b/docs/alfred/fm/remote/grpc.md index dfa9880..b921839 100644 --- a/docs/alfred/fm/remote/grpc.md +++ b/docs/alfred/fm/remote/grpc.md @@ -132,4 +132,6 @@ def restart(self): ```python def serve(self, credentials: Optional[grpc.ServerCredentials] = None): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/remote/protos/query_pb2.md b/docs/alfred/fm/remote/protos/query_pb2.md index 014445b..841a283 100644 --- a/docs/alfred/fm/remote/protos/query_pb2.md +++ b/docs/alfred/fm/remote/protos/query_pb2.md @@ -8,4 +8,5 @@ Query Pb2 > Auto-generated documentation for [alfred.fm.remote.protos.query_pb2](../../../../../alfred/fm/remote/protos/query_pb2.py) module. + - [Query Pb2](#query-pb2) diff --git a/docs/alfred/fm/remote/protos/query_pb2_grpc.md b/docs/alfred/fm/remote/protos/query_pb2_grpc.md index 781e262..b1fb984 100644 --- a/docs/alfred/fm/remote/protos/query_pb2_grpc.md +++ b/docs/alfred/fm/remote/protos/query_pb2_grpc.md @@ -146,4 +146,6 @@ class QueryServiceStub(object): ```python def add_QueryServiceServicer_to_server(servicer, server): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/remote/utils.md b/docs/alfred/fm/remote/utils.md index f97bf67..ed39c98 100644 --- a/docs/alfred/fm/remote/utils.md +++ b/docs/alfred/fm/remote/utils.md @@ -76,4 +76,6 @@ def port_finder(port: int) -> int: ```python def tensor_to_bytes(tensor): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/response/completion_response.md b/docs/alfred/fm/response/completion_response.md index 3277ba5..cdfac42 100644 --- a/docs/alfred/fm/response/completion_response.md +++ b/docs/alfred/fm/response/completion_response.md @@ -120,4 +120,6 @@ Type: *float* @property def score(self) -> Dict: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/response/ranked_response.md b/docs/alfred/fm/response/ranked_response.md index cf1e961..4c9e97f 100644 --- a/docs/alfred/fm/response/ranked_response.md +++ b/docs/alfred/fm/response/ranked_response.md @@ -137,4 +137,6 @@ Type: *dict* @property def scores(self) -> Dict: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/response/response.md b/docs/alfred/fm/response/response.md index 389184c..55d556e 100644 --- a/docs/alfred/fm/response/response.md +++ b/docs/alfred/fm/response/response.md @@ -100,4 +100,6 @@ Type: *str* ```python def serialize(self) -> str: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/fm/utils.md b/docs/alfred/fm/utils.md index 3825d7c..4c543ad 100644 --- a/docs/alfred/fm/utils.md +++ b/docs/alfred/fm/utils.md @@ -47,7 +47,7 @@ class DynamicBatcher: ### DynamicBatcher().batch -[Show source in utils.py:375](../../../alfred/fm/utils.py#L375) +[Show source in utils.py:377](../../../alfred/fm/utils.py#L377) Batch a list of instances into a list of batches. If the instances are of different sizes, they will be sorted by size @@ -67,7 +67,7 @@ def batch(self) -> List: ### DynamicBatcher().merge_rank_response -[Show source in utils.py:289](../../../alfred/fm/utils.py#L289) +[Show source in utils.py:291](../../../alfred/fm/utils.py#L291) Merge a list of responses with raw logit into a single RankedResponse Assumption: Candidate Order is the same across all ranked queries @@ -95,7 +95,7 @@ def merge_rank_response( ### DynamicBatcher().reorder -[Show source in utils.py:334](../../../alfred/fm/utils.py#L334) +[Show source in utils.py:336](../../../alfred/fm/utils.py#L336) Reordering the responses according to the original order of the queries @@ -361,4 +361,6 @@ Type: *List[int]* ```python def tokenize(inst, tokenizer, max_length=512): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/labeling/flyingsquid.md b/docs/alfred/labeling/flyingsquid.md index eec789f..73dae10 100644 --- a/docs/alfred/labeling/flyingsquid.md +++ b/docs/alfred/labeling/flyingsquid.md @@ -34,4 +34,6 @@ class FlyingSquid(LabelModel): ```python def label(self, votes: np.ndarray) -> np.ndarray: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/labeling/labelmodel.md b/docs/alfred/labeling/labelmodel.md index bea20d8..291f46e 100644 --- a/docs/alfred/labeling/labelmodel.md +++ b/docs/alfred/labeling/labelmodel.md @@ -49,4 +49,6 @@ def __call__(self, votes): @abc.abstractmethod def label(self, votes): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/labeling/majority_vote.md b/docs/alfred/labeling/majority_vote.md index 94294e2..56dbf7e 100644 --- a/docs/alfred/labeling/majority_vote.md +++ b/docs/alfred/labeling/majority_vote.md @@ -36,4 +36,6 @@ returns the majority vote for each response row ```python def label(self, votes: np.ndarray) -> np.ndarray: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/labeling/naive_bayes.md b/docs/alfred/labeling/naive_bayes.md index 5b24189..9662eab 100644 --- a/docs/alfred/labeling/naive_bayes.md +++ b/docs/alfred/labeling/naive_bayes.md @@ -47,4 +47,6 @@ Type: *np.ndarray* ```python def label(self, votes: np.ndarray) -> np.ndarray: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/labeling/nplm.md b/docs/alfred/labeling/nplm.md index 132833a..6d7be6e 100644 --- a/docs/alfred/labeling/nplm.md +++ b/docs/alfred/labeling/nplm.md @@ -52,4 +52,6 @@ Type: *np.ndarray* ```python def label(self, votes: np.ndarray) -> np.ndarray: ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/run_client_api.md b/docs/alfred/run_client_api.md index d1ea282..82faf32 100644 --- a/docs/alfred/run_client_api.md +++ b/docs/alfred/run_client_api.md @@ -366,4 +366,6 @@ async def set_alfred_server_webhook_port(request: Request): @alfred_app.get("/status") async def status(): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/run_server.md b/docs/alfred/run_server.md index 9b75516..42ea36d 100644 --- a/docs/alfred/run_server.md +++ b/docs/alfred/run_server.md @@ -43,4 +43,6 @@ Wrapper function to start gRPC Server. ```python def start_server(args: argparse.Namespace): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/template/image_template.md b/docs/alfred/template/image_template.md index b3f7c99..7f57260 100644 --- a/docs/alfred/template/image_template.md +++ b/docs/alfred/template/image_template.md @@ -293,4 +293,6 @@ returns the template type @property def type(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/template/string_template.md b/docs/alfred/template/string_template.md index cde3ca0..8790f42 100644 --- a/docs/alfred/template/string_template.md +++ b/docs/alfred/template/string_template.md @@ -334,4 +334,6 @@ returns the template type @property def type(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/template/template.md b/docs/alfred/template/template.md index 1377eb9..9f17393 100644 --- a/docs/alfred/template/template.md +++ b/docs/alfred/template/template.md @@ -219,4 +219,6 @@ returns the type of the template @abc.abstractmethod def type(self): ... -``` \ No newline at end of file +``` + + diff --git a/docs/alfred/voter/voter.md b/docs/alfred/voter/voter.md index 760c81e..b61a0c4 100644 --- a/docs/alfred/voter/voter.md +++ b/docs/alfred/voter/voter.md @@ -114,4 +114,6 @@ def vote( #### See also -- [Response](../fm/response/response.md#response) \ No newline at end of file +- [Response](../fm/response/response.md#response) + + diff --git a/requirements.txt b/requirements.txt index 9c692d8..e1d6999 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,9 @@ datasets~=2.4.0 paramiko>=2.7.2 pyarrow>=3.0.0 torch>=1.8.0 -accelerate~=0.20.3 -transformers>=4.20.0 +transformers +accelerate numpy>=1.21.0 tqdm>=4.62.3 pandas>=1.3.5