From 3f5298932c038d748f50b1069eef09fa802682fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Gonz=C3=A1lez=20Santamarta?= Date: Sun, 3 Mar 2024 19:36:15 +0100 Subject: [PATCH] llama.cpp updated + pooling_type param --- llama_bringup/launch/base.launch.py | 5 +++-- llama_bringup/llama_bringup/utils.py | 8 +++++--- llama_ros/llama_cpp | 2 +- llama_ros/src/llama_node.cpp | 20 ++++++++++++++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/llama_bringup/launch/base.launch.py b/llama_bringup/launch/base.launch.py index 4faea2f5..28e389e3 100644 --- a/llama_bringup/launch/base.launch.py +++ b/llama_bringup/launch/base.launch.py @@ -41,7 +41,7 @@ def generate_launch_description(): "n_batch": LaunchConfiguration("n_batch", default=8), "n_gpu_layers": LaunchConfiguration("n_gpu_layers", default=0), - "split_mode": LaunchConfiguration("split_mode", default="none"), + "split_mode": LaunchConfiguration("split_mode", default="layer"), "main_gpu": LaunchConfiguration("main_gpu", default=0), "tensor_split": LaunchConfiguration("tensor_split", default="[0.0]"), @@ -50,7 +50,7 @@ def generate_launch_description(): "rope_freq_base": LaunchConfiguration("rope_freq_base", default=0.0), "rope_freq_scale": LaunchConfiguration("rope_freq_scale", default=0.0), - "rope_scaling_type": LaunchConfiguration("rope_scaling_type", default="none"), + "rope_scaling_type": LaunchConfiguration("rope_scaling_type", default=""), "yarn_ext_factor": LaunchConfiguration("yarn_ext_factor", default=-1.0), "yarn_attn_factor": LaunchConfiguration("yarn_attn_factor", default=1.0), @@ -77,6 +77,7 @@ def generate_launch_description(): "lora_adapter": LaunchConfiguration("lora_adapter", default=""), "lora_base": LaunchConfiguration("lora_base", default=""), "numa": LaunchConfiguration("numa", default="none"), + "pooling_type": LaunchConfiguration("pooling_type", default=""), "prefix": ParameterValue(LaunchConfiguration("prefix", default=""), value_type=str), "suffix": ParameterValue(LaunchConfiguration("suffix", default=""), value_type=str), diff --git a/llama_bringup/llama_bringup/utils.py b/llama_bringup/llama_bringup/utils.py index c9cf6764..c8183f81 100644 --- a/llama_bringup/llama_bringup/utils.py +++ b/llama_bringup/llama_bringup/utils.py @@ -62,7 +62,7 @@ def create_llama_launch( n_batch: int = 8, n_gpu_layers: int = 0, - split_mode: str = "none", + split_mode: str = "layer", main_gpu: int = 0, tensor_split: str = "[0.0]", @@ -71,7 +71,7 @@ def create_llama_launch( rope_freq_base: float = 0.0, rope_freq_scale: float = 0.0, - rope_scaling_type: str = "none", + rope_scaling_type: str = "", yarn_ext_factor: float = -1.0, yarn_attn_factor: float = 1.0, @@ -101,6 +101,7 @@ def create_llama_launch( lora_base_filename: str = "", numa: str = "none", + pooling_type: str = "", prefix: str = "", suffix: str = "", @@ -152,7 +153,8 @@ def create_llama_launch( "model": download_model(model_repo, model_filename), "lora_base": download_model(lora_base_repo, lora_base_filename), - "numa": str(numa), + "numa": numa, + "pooling_type": pooling_type, "prefix": prefix, "suffix": suffix, diff --git a/llama_ros/llama_cpp b/llama_ros/llama_cpp index bbde6eb2..67be2ce1 160000 --- a/llama_ros/llama_cpp +++ b/llama_ros/llama_cpp @@ -1 +1 @@ -Subproject commit bbde6eb2561153aabbdfac5001c690fe00cad639 +Subproject commit 67be2ce1015d070b3b2cd488bcb041eefb61de72 diff --git a/llama_ros/src/llama_node.cpp b/llama_ros/src/llama_node.cpp index 574fe97d..8413dd7b 100644 --- a/llama_ros/src/llama_node.cpp +++ b/llama_ros/src/llama_node.cpp @@ -74,6 +74,7 @@ void LlamaNode::load_params(struct gpt_params ¶ms) { std::string split_mode; std::string rope_scaling_type; std::string numa; + std::string pooling_type; std::vector tensor_split; @@ -98,9 +99,10 @@ void LlamaNode::load_params(struct gpt_params ¶ms) { {"model", ""}, {"lora_adapter", ""}, {"lora_base", ""}, - {"split_mode", "none"}, - {"rope_scaling_type", "none"}, + {"split_mode", "layer"}, + {"rope_scaling_type", ""}, {"numa", "none"}, + {"pooling_type", ""}, {"cache_type_k", "f16"}, {"cache_type_v", "f16"}, {"prompt", ""}, @@ -172,6 +174,7 @@ void LlamaNode::load_params(struct gpt_params ¶ms) { this->get_parameter("lora_adapter", lora_adapter); this->get_parameter("lora_base", params.lora_base); this->get_parameter("numa", numa); + this->get_parameter("pooling_type", pooling_type); this->get_parameter("n_parallel", params.n_parallel); this->get_parameter("n_sequences", params.n_sequences); @@ -215,6 +218,8 @@ void LlamaNode::load_params(struct gpt_params ¶ms) { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; } else if (rope_scaling_type == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; + } else { + params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; } // numa @@ -230,6 +235,17 @@ void LlamaNode::load_params(struct gpt_params ¶ms) { params.numa = GGML_NUMA_STRATEGY_MIRROR; } + // pooling + if (pooling_type == "none") { + params.pooling_type = LLAMA_POOLING_TYPE_NONE; + } else if (pooling_type == "mean") { + params.pooling_type = LLAMA_POOLING_TYPE_MEAN; + } else if (pooling_type == "cls") { + params.pooling_type = LLAMA_POOLING_TYPE_CLS; + } else { + params.pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; + } + // initial prompt if (!file_path.empty()) { std::ifstream file(file_path.c_str());