Add 'model' command and bring back the hugging face download

radareorg · Oct 2, 2023 · edeadec · edeadec
1 parent d5a88f6
commit edeadec
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 195 deletions.
diff --git a/r2ai/local/interpreter/get_hf_llm.py b/r2ai/local/interpreter/get_hf_llm.py
@@ -32,21 +32,16 @@
 
 
 def get_hf_llm(repo_id, debug_mode, context_window):
-
-    if "TheBloke/CodeLlama-" not in repo_id:
-      # ^ This means it was prob through the old --local, so we have already displayed this message.
-      # Hacky. Not happy with this
-      print('', Markdown(f"**Open Interpreter** will use `{repo_id}` for local execution. Use your arrow keys to set up the model."), '')
-
+    print("Getting the model from hugging face")
     raw_models = list_gguf_files(repo_id)
-
     if not raw_models:
         print(f"Failed. Are you sure there are GGUF files in `{repo_id}`?")
         return None
-
+#    print(raw_models)
     combined_models = group_and_combine_splits(raw_models)
+#    print (combined_models)
 
-    selected_model = "Medium"
+    selected_model = None #"Medium"
 
     # First we give them a simple small medium large option. If they want to see more, they can.
 
@@ -59,23 +54,23 @@ def get_hf_llm(repo_id, debug_mode, context_window):
             format_quality_choice(combined_models[-1], "Large"),
             "See More"
         ]
-#        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
-# answers = inquirer.prompt(questions)
-        answers = {"selected_model": "Small"}
+        questions = [inquirer.List('selected_model', message="Quality (smaller is faster)", choices=choices)]
+        answers = inquirer.prompt(questions)
+        #answers = {"selected_model": "Small"}
         if answers["selected_model"].startswith("Small"):
             selected_model = combined_models[0]["filename"]
         elif answers["selected_model"].startswith("Medium"):
             selected_model = combined_models[len(combined_models) // 2]["filename"]
         elif answers["selected_model"].startswith("Large"):
             selected_model = combined_models[-1]["filename"]
 
-    if selected_model == None:
+    if selected_model != None:
         # This means they either selected See More,
         # Or the model only had 1 or 2 options
 
         # Display to user
         choices = [format_quality_choice(model) for model in combined_models]
-        questions = [inquirer.List('selected_model', message="Quality (smaller is faster, larger is more capable)", choices=choices)]
+        questions = [inquirer.List('selected_model', message="Quality (smaller is faster)", choices=choices)]
         answers = inquirer.prompt(questions)
         for model in combined_models:
             if format_quality_choice(model) == answers["selected_model"]:
@@ -381,7 +376,8 @@ def new_get_hf_llm(repo_id, debug_mode, context_window):
     if not os.path.exists(repo_id):
         return get_hf_llm(repo_id, debug_mode, context_window)
     print("LOADING FILE: " + repo_id)
-    n_gpu_layers = -1
+    n_gpu_layers = -1 # = 0 to use cpu
+    n_gpu_layers = 0
     # Third stage: GPU confirm
 #if confirm_action("Use GPU? (Large models might crash on GPU, but will run more quickly)"):
 ##      n_gpu_layers = -1

diff --git a/r2ai/local/interpreter/interpreter.py b/r2ai/local/interpreter/interpreter.py
@@ -18,6 +18,7 @@
 - killian
 """
 
+import builtins
 from .cli import cli
 from .utils import merge_deltas, parse_partial_json
 from .message_block import MessageBlock
@@ -305,12 +306,6 @@ def handle_command(self, user_input):
     action(arguments)  # Execute the function
 
   def chat(self, message=None, return_messages=False):
-
-    # Connect to an LLM (an large language model)
-    if not self.local:
-      # gpt-4
-      self.verify_api_key()
-
     # ^ verify_api_key may set self.local to True, so we run this as an 'if', not 'elif':
     if self.local:
 
@@ -319,28 +314,14 @@ def chat(self, message=None, return_messages=False):
 
         # Find or install Code-Llama
         try:
-          # self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
+          #self.llama_instance = get_hf_llm(self.model, self.debug_mode, self.context_window)
           self.llama_instance = new_get_hf_llm(self.model, self.debug_mode, self.context_window)
           if self.llama_instance == None:
             # They cancelled.
+            print("Cannot find the model")
             return
         except:
           traceback.print_exc()
-          # If it didn't work, apologize and switch to GPT-4
-
-          print(Markdown("".join([
-            f"> Failed to install `{self.model}`.",
-            f"\n\n**Common Fixes:** You can follow our simple setup docs at the link below to resolve common errors.\n\n```\nhttps://github.com/KillianLucas/open-interpreter/tree/main/docs\n```",
-            f"\n\n**If you've tried that and you're still getting an error, we have likely not built the proper `{self.model}` support for your system.**",
-            "\n\n*( Running language models locally is a difficult task!* If you have insight into the best way to implement this across platforms/architectures, please join the Open Interpreter community Discord and consider contributing the project's development. )",
-            "\n\nPress enter to switch to `GPT-4` (recommended)."
-          ])))
-          input()
-
-          # Switch to GPT-4
-          self.local = False
-          self.model = "gpt-4"
-          self.verify_api_key()
 
     # Display welcome message
     welcome_message = ""
@@ -419,147 +400,6 @@ def chat(self, message=None, return_messages=False):
     if return_messages:
         return self.messages
 
-  def verify_api_key(self):
-    """
-    Makes sure we have an AZURE_API_KEY or OPENAI_API_KEY.
-    """
-    if self.use_azure:
-      all_env_available = (
-        ('AZURE_API_KEY' in os.environ or 'OPENAI_API_KEY' in os.environ) and
-        'AZURE_API_BASE' in os.environ and
-        'AZURE_API_VERSION' in os.environ and
-        'AZURE_DEPLOYMENT_NAME' in os.environ)
-      if all_env_available:
-        self.api_key = os.environ.get('AZURE_API_KEY') or os.environ['OPENAI_API_KEY']
-        self.azure_api_base = os.environ['AZURE_API_BASE']
-        self.azure_api_version = os.environ['AZURE_API_VERSION']
-        self.azure_deployment_name = os.environ['AZURE_DEPLOYMENT_NAME']
-        self.azure_api_type = os.environ.get('AZURE_API_TYPE', 'azure')
-      else:
-        # This is probably their first time here!
-        self._print_welcome_message()
-        time.sleep(1)
-
-        print(Rule(style="white"))
-
-        print(Markdown(missing_azure_info_message), '', Rule(style="white"), '')
-        response = input("Azure OpenAI API key: ")
-
-        if response == "":
-          # User pressed `enter`, requesting Code-Llama
-
-          print(Markdown(
-            "> Switching to `Code-Llama`...\n\n**Tip:** Run `interpreter --local` to automatically use `Code-Llama`."),
-                '')
-          time.sleep(2)
-          print(Rule(style="white"))
-
-
-
-          # Temporarily, for backwards (behavioral) compatability, we've moved this part of llama_2.py here.
-          # AND BELOW.
-          # This way, when folks hit interpreter --local, they get the same experience as before.
-          import inquirer
-
-          print('', Markdown("**Open Interpreter** will use `Code Llama` for local execution. Use your arrow keys to set up the model."), '')
-
-          models = {
-              '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-              '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-              '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-          }
-
-          parameter_choices = list(models.keys())
-          questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-          answers = inquirer.prompt(questions)
-          chosen_param = answers['param']
-
-          # THIS is more in line with the future. You just say the model you want by name:
-          self.model = models[chosen_param]
-          self.local = True
-
-
-
-
-          return
-
-        else:
-          self.api_key = response
-          self.azure_api_base = input("Azure OpenAI API base: ")
-          self.azure_deployment_name = input("Azure OpenAI deployment name of GPT: ")
-          self.azure_api_version = input("Azure OpenAI API version: ")
-          print('', Markdown(
-            "**Tip:** To save this key for later, run `export AZURE_API_KEY=your_api_key AZURE_API_BASE=your_api_base AZURE_API_VERSION=your_api_version AZURE_DEPLOYMENT_NAME=your_gpt_deployment_name` on Mac/Linux or `setx AZURE_API_KEY your_api_key AZURE_API_BASE your_api_base AZURE_API_VERSION your_api_version AZURE_DEPLOYMENT_NAME your_gpt_deployment_name` on Windows."),
-                '')
-          time.sleep(2)
-          print(Rule(style="white"))
-
-      litellm.api_type = self.azure_api_type
-      litellm.api_base = self.azure_api_base
-      litellm.api_version = self.azure_api_version
-      litellm.api_key = self.api_key
-    else:
-      if self.api_key == None:
-        if 'OPENAI_API_KEY' in os.environ:
-          self.api_key = os.environ['OPENAI_API_KEY']
-        else:
-          # This is probably their first time here!
-          self._print_welcome_message()
-          time.sleep(1)
-
-          print(Rule(style="white"))
-
-          print(Markdown(missing_api_key_message), '', Rule(style="white"), '')
-          response = input("OpenAI API key: ")
-
-          if response == "":
-              # User pressed `enter`, requesting Code-Llama
-
-              print(Markdown(
-                "> Switching to `Code-Llama`...\n\n**Tip:** Run `interpreter --local` to automatically use `Code-Llama`."),
-                    '')
-              time.sleep(2)
-              print(Rule(style="white"))
-
-
-
-              # Temporarily, for backwards (behavioral) compatability, we've moved this part of llama_2.py here.
-              # AND ABOVE.
-              # This way, when folks hit interpreter --local, they get the same experience as before.
-              import inquirer
-
-              print('', Markdown("**Open Interpreter** will use `Code Llama` for local execution. Use your arrow keys to set up the model."), '')
-
-              models = {
-                  '7B': 'TheBloke/CodeLlama-7B-Instruct-GGUF',
-                  '13B': 'TheBloke/CodeLlama-13B-Instruct-GGUF',
-                  '34B': 'TheBloke/CodeLlama-34B-Instruct-GGUF'
-              }
-
-              parameter_choices = list(models.keys())
-              questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
-              answers = inquirer.prompt(questions)
-              chosen_param = answers['param']
-
-              # THIS is more in line with the future. You just say the model you want by name:
-              self.model = models[chosen_param]
-              self.local = True
-
-
-
-
-              return
-
-          else:
-              self.api_key = response
-              print('', Markdown("**Tip:** To save this key for later, run `export OPENAI_API_KEY=your_api_key` on Mac/Linux or `setx OPENAI_API_KEY your_api_key` on Windows."), '')
-              time.sleep(2)
-              print(Rule(style="white"))
-
-      litellm.api_key = self.api_key
-      if self.api_base:
-        litellm.api_base = self.api_base
-
   def end_active_block(self):
     if self.active_block:
       self.active_block.end()
@@ -673,23 +513,24 @@ def messages_to_prompt(messages):
 
           # Extracting the system prompt and initializing the formatted string with it.
           system_prompt = messages[0]['content']
-          formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
+          formatted_messages = f"<s>[INST]<<SYS>>\n{system_prompt}\n<</SYS>>\n"
           # Loop starting from the first user message
           for index, item in enumerate(messages[1:]):
               role = item['role']
               content = item['content']
 
               if role == 'user':
-                  formatted_messages += f"{content} [/INST] "
+                  formatted_messages += f"{content}[/INST] "
               elif role == 'function':
-                  formatted_messages += f"Output: {content} [/INST] "
+                  formatted_messages += f"Output: {content}[/INST] "
               elif role == 'assistant':
                   formatted_messages += f"{content} </s><s>[INST] "
-
           # Remove the trailing '<s>[INST] ' from the final output
-          if formatted_messages.endswith("<s>[INST] "):
+          if formatted_messages.endswith("<s>[INST]"):
               formatted_messages = formatted_messages[:-10]
 
+# DEBUG DEBUG DEBUG AGAIN AGAIN AGAIN
+# builtins.print(formatted_messages)
         return formatted_messages
 
       prompt = messages_to_prompt(messages)
@@ -704,7 +545,6 @@ def messages_to_prompt(messages):
 
       if self.debug_mode:
         # we have to use builtins bizarrely! because rich.print interprets "[INST]" as something meaningful
-        import builtins
         builtins.print("TEXT PROMPT SEND TO LLM:\n", prompt)
 
       # Run Code-Llama

diff --git a/r2ai/local/main.py b/r2ai/local/main.py
@@ -29,25 +29,22 @@
 #interpreter.model = "models/models/guanaco-7b-uncensored.Q2_K.gguf" 
 #interpreter.model = "models/models/ggml-model-q4_0.gguf" # tinysmall -- very bad results
 
+#interpreter.model = "models/models/mistral-7b-v0.1.Q4_K_M.gguf"
+#interpreter.model = "models/models/mistral-7b-instruct-v0.1.Q2_K.gguf"
+#interpreter.model = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
+builtins.print("TheBloke/Mistral-7B-Instruct-v0.1-GGUF")
+
 dir_path = os.path.dirname(os.path.realpath(__file__))
-interpreter.model = dir_path + "/" + interpreter.model
+model_path = dir_path + "/" + interpreter.model
+if os.path.exists(model_path):
+	interpreter.model = model_path
 
 def slurp(f):
 	fd = open(f)
 	res = fd.read()
 	fd.close()
 	return "" + res
 
-# script = slurp("/tmp/util.c")
-# usertext = "Describe the purpose of this C function in a single sentence and add it as a comment on top: [CODE]" + script + "[/CODE]"
-# usertext = "Add comments in the following code to make it easier to understand: [CODE]" + script + "[/CODE]"
-# usertext = "Tell me what's the use case for this function and when it should not be used: [CODE]" + script + "[/CODE]"
-# usertext = "Digues en Català i en una sola frase si aquesta funció modifica els continguts dels arguments que reb: [CODE]" + script + "[/CODE]"
-# usertext = "Tell me what's not obvious or wrong in this function: [CODE]" + script + "[/CODE]"
-# usertext = "How to bind this function from Python? [CODE]" + script + "[/CODE]"
-# interpreter.chat(usertext)
-# exit()
-
 r2 = None
 try:
 	import r2pipe
@@ -64,6 +61,8 @@ def slurp(f):
 #questions = [inquirer.List('param', message="Parameter count (smaller is faster, larger is more capable)", choices=parameter_choices)]
 #inquirer.prompt(questions)
 
+# TheBloke/Mistral-7B-Instruct-v0.1-GGUF
+
 help_message = """
 Usage: [!r2command] | [chat-query] | [command]
 Examples:
@@ -75,6 +74,7 @@ def slurp(f):
   $system prompt -> define the role of the conversation
   which instruction corresponds to this description? -> the query for the chat model
   reset  -> reset the chat (same as pressing enter with an empty line)
+  model [file/repo] -> select model from huggingface repository or local file
   clear  -> clear the screen
   q      -> quit/exit/^C
 """
@@ -86,6 +86,12 @@ def runline(usertext):
 		builtins.print(help_message)
 	elif usertext == "clear":
 		builtins.print("\x1b[2J\x1b[0;0H\r")
+	elif usertext.startswith("model"):
+		words = usertext.split(" ")
+		if len(words) > 1:
+			interpreter.model = words[1]
+		else:
+			builtins.print(interpreter.model)
 	elif usertext == "reset":
 		builtins.print("Forgot")
 		interpreter.reset()