Skip to content

Commit

Permalink
Add 'model' command and bring back the hugging face download
Browse files Browse the repository at this point in the history
  • Loading branch information
radare committed Oct 2, 2023
1 parent d5a88f6 commit edeadec
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 195 deletions.
26 changes: 11 additions & 15 deletions r2ai/local/interpreter/get_hf_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,16 @@


def get_hf_llm(repo_id, debug_mode, context_window):

if "TheBloke/CodeLlama-" not in repo_id:
# ^ This means it was prob through the old --local, so we have already displayed this message.
# Hacky. Not happy with this
print('', Markdown(f"**Open Interpreter** will use `{repo_id}` for local execution. Use your arrow keys to set up the model."), '')

print("Getting the model from hugging face")
raw_models = list_gguf_files(repo_id)

if not raw_models:
print(f"Failed. Are you sure there are GGUF files in `{repo_id}`?")
return None

# print(raw_models)
combined_models = group_and_combine_splits(raw_models)
# print (combined_models)

selected_model = "Medium"
selected_model = None #"Medium"

# First we give them a simple small medium large option. If they want to see more, they can.

Expand All @@ -59,23 +54,23 @@ def get_hf_llm(repo_id, debug_mode, context_window):
format_quality_choice(combined_models[-1], "Large"),
"See More"
]
# questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
# answers = inquirer.prompt(questions)
answers = {"selected_model": "Small"}
questions = [inquirer.List('selected_model', message="Quality (smaller is faster)", choices=choices)]
answers = inquirer.prompt(questions)
#answers = {"selected_model": "Small"}
if answers["selected_model"].startswith("Small"):
selected_model = combined_models[0]["filename"]
elif answers["selected_model"].startswith("Medium"):
selected_model = combined_models[len(combined_models) // 2]["filename"]
elif answers["selected_model"].startswith("Large"):
selected_model = combined_models[-1]["filename"]

if selected_model == None:
if selected_model != None:
# This means they either selected See More,
# Or the model only had 1 or 2 options

# Display to user
choices = [format_quality_choice(model) for model in combined_models]
questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
questions = [inquirer.List('selected_model', message="Quality (smaller is faster)", choices=choices)]
answers = inquirer.prompt(questions)
for model in combined_models:
if format_quality_choice(model) == answers["selected_model"]:
Expand Down Expand Up @@ -381,7 +376,8 @@ def new_get_hf_llm(repo_id, debug_mode, context_window):
if not os.path.exists(repo_id):
return get_hf_llm(repo_id, debug_mode, context_window)
print("LOADING FILE: " + repo_id)
n_gpu_layers = -1
n_gpu_layers = -1 # = 0 to use cpu
n_gpu_layers = 0
# Third stage: GPU confirm
#if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
## n_gpu_layers = -1
Expand Down
178 changes: 9 additions & 169 deletions r2ai/local/interpreter/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- killian
"""

import builtins
from .cli import cli
from .utils import merge_deltas, parse_partial_json
from .message_block import MessageBlock
Expand Down Expand Up @@ -305,12 +306,6 @@ def handle_command(self, user_input):
action(arguments) # Execute the function

def chat(self, message=None, return_messages=False):

# Connect to an LLM (an large language model)
if not self.local:
# gpt-4
self.verify_api_key()

# ^ verify_api_key may set self.local to True, so we run this as an 'if', not 'elif':
if self.local:

Expand All @@ -319,28 +314,14 @@ def chat(self, message=None, return_messages=False):

# Find or install Code-Llama
try:
# self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
#self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
self.llama_instance = new_get_hf_llm(self.model, self.debug_mode, self.context_window)
if self.llama_instance == None:
# They cancelled.
print("Cannot find the model")
return
except:
traceback.print_exc()
# If it didn't work, apologize and switch to GPT-4

print(Markdown("".join([
f"> Failed to install `{self.model}`.",
f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/KillianLucas/open-interpreter/tree/main/docs\n```",
f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**",
"\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Open Interpreter community Discord and consider contributing the project's development. )",
"\n\nPress enter to switch to `GPT-4` (recommended)."
])))
input()

# Switch to GPT-4
self.local = False
self.model = "gpt-4"
self.verify_api_key()

# Display welcome message
welcome_message = ""
Expand Down Expand Up @@ -419,147 +400,6 @@ def chat(self, message=None, return_messages=False):
if return_messages:
return self.messages

def verify_api_key(self):
"""
Makes sure we have an AZURE_API_KEY or OPENAI_API_KEY.
"""
if self.use_azure:
all_env_available = (
('AZURE_API_KEY' in os.environ or 'OPENAI_API_KEY' in os.environ) and
'AZURE_API_BASE' in os.environ and
'AZURE_API_VERSION' in os.environ and
'AZURE_DEPLOYMENT_NAME' in os.environ)
if all_env_available:
self.api_key = os.environ.get('AZURE_API_KEY') or os.environ['OPENAI_API_KEY']
self.azure_api_base = os.environ['AZURE_API_BASE']
self.azure_api_version = os.environ['AZURE_API_VERSION']
self.azure_deployment_name = os.environ['AZURE_DEPLOYMENT_NAME']
self.azure_api_type = os.environ.get('AZURE_API_TYPE', 'azure')
else:
# This is probably their first time here!
self._print_welcome_message()
time.sleep(1)

print(Rule(style="white"))

print(Markdown(missing_azure_info_message), '', Rule(style="white"), '')
response = input("Azure OpenAI API key: ")

if response == "":
# User pressed `enter`, requesting Code-Llama

print(Markdown(
"> Switching to `Code-Llama`...\n\n**Tip:** Run `interpreter --local` to automatically use `Code-Llama`."),
'')
time.sleep(2)
print(Rule(style="white"))



# Temporarily, for backwards (behavioral) compatability, we've moved this part of llama_2.py here.
# AND BELOW.
# This way, when folks hit interpreter --local, they get the same experience as before.
import inquirer

print('', Markdown("**Open Interpreter** will use `Code Llama` for local execution. Use your arrow keys to set up the model."), '')

models = {
'7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
'13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
'34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
}

parameter_choices = list(models.keys())
questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
answers = inquirer.prompt(questions)
chosen_param = answers['param']

# THIS is more in line with the future. You just say the model you want by name:
self.model = models[chosen_param]
self.local = True




return

else:
self.api_key = response
self.azure_api_base = input("Azure OpenAI API base: ")
self.azure_deployment_name = input("Azure OpenAI deployment name of GPT: ")
self.azure_api_version = input("Azure OpenAI API version: ")
print('', Markdown(
"**Tip:** To save this key for later, run `export AZURE_API_KEY=your_api_key AZURE_API_BASE=your_api_base AZURE_API_VERSION=your_api_version AZURE_DEPLOYMENT_NAME=your_gpt_deployment_name` on Mac/Linux or `setx AZURE_API_KEY your_api_key AZURE_API_BASE your_api_base AZURE_API_VERSION your_api_version AZURE_DEPLOYMENT_NAME your_gpt_deployment_name` on Windows."),
'')
time.sleep(2)
print(Rule(style="white"))

litellm.api_type = self.azure_api_type
litellm.api_base = self.azure_api_base
litellm.api_version = self.azure_api_version
litellm.api_key = self.api_key
else:
if self.api_key == None:
if 'OPENAI_API_KEY' in os.environ:
self.api_key = os.environ['OPENAI_API_KEY']
else:
# This is probably their first time here!
self._print_welcome_message()
time.sleep(1)

print(Rule(style="white"))

print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
response = input("OpenAI API key: ")

if response == "":
# User pressed `enter`, requesting Code-Llama

print(Markdown(
"> Switching to `Code-Llama`...\n\n**Tip:** Run `interpreter --local` to automatically use `Code-Llama`."),
'')
time.sleep(2)
print(Rule(style="white"))



# Temporarily, for backwards (behavioral) compatability, we've moved this part of llama_2.py here.
# AND ABOVE.
# This way, when folks hit interpreter --local, they get the same experience as before.
import inquirer

print('', Markdown("**Open Interpreter** will use `Code Llama` for local execution. Use your arrow keys to set up the model."), '')

models = {
'7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
'13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
'34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
}

parameter_choices = list(models.keys())
questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
answers = inquirer.prompt(questions)
chosen_param = answers['param']

# THIS is more in line with the future. You just say the model you want by name:
self.model = models[chosen_param]
self.local = True




return

else:
self.api_key = response
print('', Markdown("**Tip:** To save this key for later, run `export OPENAI_API_KEY=your_api_key` on Mac/Linux or `setx OPENAI_API_KEY your_api_key` on Windows."), '')
time.sleep(2)
print(Rule(style="white"))

litellm.api_key = self.api_key
if self.api_base:
litellm.api_base = self.api_base

def end_active_block(self):
if self.active_block:
self.active_block.end()
Expand Down Expand Up @@ -673,23 +513,24 @@ def messages_to_prompt(messages):

# Extracting the system prompt and initializing the formatted string with it.
system_prompt = messages[0]['content']
formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
formatted_messages = f"<s>[INST]<<SYS>>\n{system_prompt}\n<</SYS>>\n"
# Loop starting from the first user message
for index, item in enumerate(messages[1:]):
role = item['role']
content = item['content']

if role == 'user':
formatted_messages += f"{content} [/INST] "
formatted_messages += f"{content}[/INST] "
elif role == 'function':
formatted_messages += f"Output: {content} [/INST] "
formatted_messages += f"Output: {content}[/INST] "
elif role == 'assistant':
formatted_messages += f"{content} </s><s>[INST] "

# Remove the trailing '<s>[INST] ' from the final output
if formatted_messages.endswith("<s>[INST] "):
if formatted_messages.endswith("<s>[INST]"):
formatted_messages = formatted_messages[:-10]

# DEBUG DEBUG DEBUG AGAIN AGAIN AGAIN
# builtins.print(formatted_messages)
return formatted_messages

prompt = messages_to_prompt(messages)
Expand All @@ -704,7 +545,6 @@ def messages_to_prompt(messages):

if self.debug_mode:
# we have to use builtins bizarrely! because rich.print interprets "[INST]" as something meaningful
import builtins
builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt)

# Run Code-Llama
Expand Down
28 changes: 17 additions & 11 deletions r2ai/local/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,22 @@
#interpreter.model = "models/models/guanaco-7b-uncensored.Q2_K.gguf"
#interpreter.model = "models/models/ggml-model-q4_0.gguf" # tinysmall -- very bad results

#interpreter.model = "models/models/mistral-7b-v0.1.Q4_K_M.gguf"
#interpreter.model = "models/models/mistral-7b-instruct-v0.1.Q2_K.gguf"
#interpreter.model = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
builtins.print("TheBloke/Mistral-7B-Instruct-v0.1-GGUF")

dir_path = os.path.dirname(os.path.realpath(__file__))
interpreter.model = dir_path + "/" + interpreter.model
model_path = dir_path + "/" + interpreter.model
if os.path.exists(model_path):
interpreter.model = model_path

def slurp(f):
fd = open(f)
res = fd.read()
fd.close()
return "" + res

# script = slurp("/tmp/util.c")
# usertext = "Describe the purpose of this C function in a single sentence and add it as a comment on top: [CODE]" + script + "[/CODE]"
# usertext = "Add comments in the following code to make it easier to understand: [CODE]" + script + "[/CODE]"
# usertext = "Tell me what's the use case for this function and when it should not be used: [CODE]" + script + "[/CODE]"
# usertext = "Digues en Català i en una sola frase si aquesta funció modifica els continguts dels arguments que reb: [CODE]" + script + "[/CODE]"
# usertext = "Tell me what's not obvious or wrong in this function: [CODE]" + script + "[/CODE]"
# usertext = "How to bind this function from Python? [CODE]" + script + "[/CODE]"
# interpreter.chat(usertext)
# exit()

r2 = None
try:
import r2pipe
Expand All @@ -64,6 +61,8 @@ def slurp(f):
#questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
#inquirer.prompt(questions)

# TheBloke/Mistral-7B-Instruct-v0.1-GGUF

help_message = """
Usage: [!r2command] | [chat-query] | [command]
Examples:
Expand All @@ -75,6 +74,7 @@ def slurp(f):
$system prompt -> define the role of the conversation
which instruction corresponds to this description? -> the query for the chat model
reset -> reset the chat (same as pressing enter with an empty line)
model [file/repo] -> select model from huggingface repository or local file
clear -> clear the screen
q -> quit/exit/^C
"""
Expand All @@ -86,6 +86,12 @@ def runline(usertext):
builtins.print(help_message)
elif usertext == "clear":
builtins.print("\x1b[2J\x1b[0;0H\r")
elif usertext.startswith("model"):
words = usertext.split(" ")
if len(words) > 1:
interpreter.model = words[1]
else:
builtins.print(interpreter.model)
elif usertext == "reset":
builtins.print("Forgot")
interpreter.reset()
Expand Down

0 comments on commit edeadec

Please sign in to comment.