llama.cpp updated + dynatemp params

mgonzs13 · Jan 27, 2024 · 476810d · 476810d
1 parent 00edb1d
commit 476810d
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 1 deletion.
diff --git a/llama_msgs/msg/SamplingConfig.msg b/llama_msgs/msg/SamplingConfig.msg
@@ -5,6 +5,8 @@ bool ignore_eos             false       # ignore end of stream token and continu
 LogitBiasArray logit_bias               # logit bias for specific tokens
 
 float32 temp                0.80        # temperature
+float32 dynatemp_range      0.0         # 0.0 = disabled
+float32 dynatemp_exponent   1.0         # controls how entropy maps to temperature in dynamic temperature sampler
 
 int32 top_k                 40          # top-k sampling (0.0 = disabled)
 float32 top_p               0.95        # top-p sampling (1.0 = disabled)

diff --git a/llama_ros/llama_cpp b/llama_ros/llama_cpp
diff --git a/llama_ros/llama_ros/langchain/llama_ros.py b/llama_ros/llama_ros/langchain/llama_ros.py
@@ -47,6 +47,8 @@ class LlamaROS(LLM):
     logit_bias: Dict[int, float] = {}
 
     temp: float = 0.80
+    dynatemp_range: float = 0.0
+    dynatemp_exponent: float = 1.0
 
     top_k: int = 40
     top_p: float = 0.95
@@ -122,6 +124,8 @@ def _call(
             goal.sampling_config.logit_bias.data.append(lb)
 
         goal.sampling_config.temp = self.temp
+        goal.sampling_config.dynatemp_range = self.dynatemp_range
+        goal.sampling_config.dynatemp_exponent = self.dynatemp_exponent
 
         goal.sampling_config.top_k = self.top_k
         goal.sampling_config.top_p = self.top_p
+26 −0		.devops/main-intel.Dockerfile
+3 −0		.devops/nix/package.nix
+6 −6		.github/workflows/build.yml
+1 −0		.github/workflows/docker.yml
+1 −18		.gitignore
+44 −35		CMakeLists.txt
+6 −3		Makefile
+2 −1		README.md
+70 −11		ci/run.sh
+4 −6		common/common.cpp
+12 −1		common/sampling.cpp
+2 −0		common/sampling.h
+1 −1		examples/infill/infill.cpp
+1 −0		examples/llama.android/app/build.gradle.kts
+4 −1		examples/llama.vim
+82 −36		examples/llava/clip.cpp
+29 −3		examples/llava/llava-cli.cpp
+63 −12		examples/perplexity/perplexity.cpp
+6 −37		examples/pydantic-models-to-grammar-examples.py
+82 −84		examples/pydantic_models_to_grammar.py
+1 −1		examples/server/CMakeLists.txt
+2 −1		examples/server/README.md
+208 −0		examples/server/oai.hpp
+294 −707		examples/server/server.cpp
+508 −0		examples/server/utils.hpp
+5 −3		ggml-alloc.c
+3 −1		ggml-backend.c
+62 −52		ggml-cuda.cu
+26 −63		ggml-metal.m
+84 −3		ggml-opencl.cpp
+1 −0		ggml-opencl.h
+23 −6		ggml.c
+0 −1		ggml.h
+1 −1		gguf-py/gguf/gguf_reader.py
+299 −195		llama.cpp
+8 −0		llama.h
+0 −1		pocs/vdot/vdot.cpp
+50 −0		scripts/ci-run.sh
+1 −1		scripts/run-with-preset.py
+1 −1		scripts/sync-ggml.last
+3 −0		tests/.gitignore
+11 −3		tests/CMakeLists.txt
+21 −0		tests/get-model.cpp
+2 −0		tests/get-model.h
+4 −8		tests/test-autorelease.cpp
+0 −1		tests/test-backend-ops.cpp
+0 −1		tests/test-llama-grammar.cpp
+27 −0		tests/test-model-load-cancel.cpp