From 1c30d49c0e9dc8e310380423273936d56ff2a5cc Mon Sep 17 00:00:00 2001 From: gpetters-amd <159576198+gpetters-amd@users.noreply.github.com> Date: Mon, 29 Apr 2024 13:18:16 -0400 Subject: [PATCH] Fix Llama2 on CPU (#2133) --- apps/shark_studio/api/llm.py | 6 +++++- apps/shark_studio/web/ui/chat.py | 2 ++ requirements.txt | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/apps/shark_studio/api/llm.py b/apps/shark_studio/api/llm.py index 83ccb81de3..8ba816f353 100644 --- a/apps/shark_studio/api/llm.py +++ b/apps/shark_studio/api/llm.py @@ -347,7 +347,11 @@ def llm_chat_api(InputData: dict): else: print(f"prompt : {InputData['prompt']}") - model_name = InputData["model"] if "model" in InputData.keys() else "llama2_7b" + model_name = ( + InputData["model"] + if "model" in InputData.keys() + else "meta-llama/Llama-2-7b-chat-hf" + ) model_path = llm_model_map[model_name] device = InputData["device"] if "device" in InputData.keys() else "cpu" precision = "fp16" diff --git a/apps/shark_studio/web/ui/chat.py b/apps/shark_studio/web/ui/chat.py index f23244ed4b..54ae4a139f 100644 --- a/apps/shark_studio/web/ui/chat.py +++ b/apps/shark_studio/web/ui/chat.py @@ -9,6 +9,7 @@ llm_model_map, LanguageModel, ) +from apps.shark_studio.modules.shared_cmd_opts import cmd_opts import apps.shark_studio.web.utils.globals as global_obj B_SYS, E_SYS = "", "" @@ -64,6 +65,7 @@ def chat_fn( external_weights="safetensors", use_system_prompt=prompt_prefix, streaming_llm=streaming_llm, + hf_auth_token=cmd_opts.hf_auth_token, ) history[-1][-1] = "Getting the model ready... Done" yield history, "" diff --git a/requirements.txt b/requirements.txt index f985b4b60f..e831092f0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,6 +36,7 @@ safetensors==0.3.1 py-cpuinfo pydantic==2.4.1 # pin until pyinstaller-hooks-contrib works with beta versions mpmath==1.3.0 +optimum # Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors pefile