Skip to content

Commit 779c7b9

Browse files
authored
Support HuggingFace's inference API (#352)
* Support HuggingFace's inference API * Use positive temperature for prompt parser
1 parent 5ca2858 commit 779c7b9

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

prompt2model/prompt_parser/instr_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def parse_from_prompt(self, prompt: str) -> None:
9393
response: openai.ChatCompletion | Exception = (
9494
chat_api.generate_one_completion(
9595
parsing_prompt_for_chatgpt,
96-
temperature=0,
96+
temperature=0.01,
9797
presence_penalty=0,
9898
frequency_penalty=0,
9999
)

prompt2model/utils/api_tools.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,19 @@ def __init__(
4545
self,
4646
model_name: str = "gpt-3.5-turbo",
4747
max_tokens: int | None = None,
48+
api_base: str | None = None,
4849
):
4950
"""Initialize APIAgent with model_name and max_tokens.
5051
5152
Args:
5253
model_name: Name fo the model to use (by default, gpt-3.5-turbo).
5354
max_tokens: The maximum number of tokens to generate. Defaults to the max
5455
value for the model if available through litellm.
56+
api_base: Custom endpoint for Hugging Face's inference API.
5557
"""
5658
self.model_name = model_name
5759
self.max_tokens = max_tokens
60+
self.api_base = api_base
5861
if max_tokens is None:
5962
try:
6063
self.max_tokens = litellm.utils.get_max_tokens(model_name)
@@ -99,6 +102,7 @@ def generate_one_completion(
99102
messages=[
100103
{"role": "user", "content": f"{prompt}"},
101104
],
105+
api_base=self.api_base,
102106
temperature=temperature,
103107
presence_penalty=presence_penalty,
104108
frequency_penalty=frequency_penalty,
@@ -144,6 +148,7 @@ async def _throttled_completion_acreate(
144148
return await acompletion(
145149
model=model,
146150
messages=messages,
151+
api_base=self.api_base,
147152
temperature=temperature,
148153
max_tokens=max_tokens,
149154
n=n,

0 commit comments

Comments
 (0)