0.3.4

Benjoyo · Benjoyo · commit 9cb513ef9c25 · 2024-05-03T23:31:08.000+02:00
diff --git a/bpm_ai_inference/llm/llama_cpp/_constants.py b/bpm_ai_inference/llm/llama_cpp/_constants.py
@@ -1,7 +1,7 @@
 
-DEFAULT_MODEL = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
+DEFAULT_MODEL = "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF"
 DEFAULT_QUANT_LARGE = "*Q8_0.gguf"
-DEFAULT_QUANT_BALANCED = "*Q4_K_M.gguf"
+DEFAULT_QUANT_BALANCED = "*Q5_K_M.gguf"
 DEFAULT_QUANT_SMALL = "*Q2_K.gguf"
 DEFAULT_TEMPERATURE = 0.0
 DEFAULT_MAX_RETRIES = 8
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bpm-ai-inference"
-version = "0.3.3"
+version = "0.3.4"
 description = "Inference and server for local AI implementations of bpm-ai-core abstractions."
 authors = ["Bennet Krause <bennet.krause@holisticon.de>"]
 repository = "https://github.com/holunda-io/bpm-ai-inference"
@@ -23,9 +23,7 @@ optimum = {extras = ["onnxruntime"], version = "^1.18.0"}
 gliner = "^0.1.6"
 scipy = "1.10.1"
 py-cpuinfo = "^9.0.0"
-llama-cpp-python = "^0.2.65"
-#fast-fit = "^1.2.0"
-#jupyter = "^1.0.0"
+llama-cpp-python = "^0.2.69"
 
 
 [tool.poetry.group.test.dependencies]