Skip to content

Commit

Permalink
llama.cpp updated + pooling_type param
Browse files Browse the repository at this point in the history
  • Loading branch information
mgonzs13 committed Mar 3, 2024
1 parent bce4565 commit 3f52989
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 8 deletions.
5 changes: 3 additions & 2 deletions llama_bringup/launch/base.launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def generate_launch_description():
"n_batch": LaunchConfiguration("n_batch", default=8),

"n_gpu_layers": LaunchConfiguration("n_gpu_layers", default=0),
"split_mode": LaunchConfiguration("split_mode", default="none"),
"split_mode": LaunchConfiguration("split_mode", default="layer"),
"main_gpu": LaunchConfiguration("main_gpu", default=0),
"tensor_split": LaunchConfiguration("tensor_split", default="[0.0]"),

Expand All @@ -50,7 +50,7 @@ def generate_launch_description():

"rope_freq_base": LaunchConfiguration("rope_freq_base", default=0.0),
"rope_freq_scale": LaunchConfiguration("rope_freq_scale", default=0.0),
"rope_scaling_type": LaunchConfiguration("rope_scaling_type", default="none"),
"rope_scaling_type": LaunchConfiguration("rope_scaling_type", default=""),

"yarn_ext_factor": LaunchConfiguration("yarn_ext_factor", default=-1.0),
"yarn_attn_factor": LaunchConfiguration("yarn_attn_factor", default=1.0),
Expand All @@ -77,6 +77,7 @@ def generate_launch_description():
"lora_adapter": LaunchConfiguration("lora_adapter", default=""),
"lora_base": LaunchConfiguration("lora_base", default=""),
"numa": LaunchConfiguration("numa", default="none"),
"pooling_type": LaunchConfiguration("pooling_type", default=""),

"prefix": ParameterValue(LaunchConfiguration("prefix", default=""), value_type=str),
"suffix": ParameterValue(LaunchConfiguration("suffix", default=""), value_type=str),
Expand Down
8 changes: 5 additions & 3 deletions llama_bringup/llama_bringup/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def create_llama_launch(
n_batch: int = 8,

n_gpu_layers: int = 0,
split_mode: str = "none",
split_mode: str = "layer",
main_gpu: int = 0,
tensor_split: str = "[0.0]",

Expand All @@ -71,7 +71,7 @@ def create_llama_launch(

rope_freq_base: float = 0.0,
rope_freq_scale: float = 0.0,
rope_scaling_type: str = "none",
rope_scaling_type: str = "",

yarn_ext_factor: float = -1.0,
yarn_attn_factor: float = 1.0,
Expand Down Expand Up @@ -101,6 +101,7 @@ def create_llama_launch(
lora_base_filename: str = "",

numa: str = "none",
pooling_type: str = "",

prefix: str = "",
suffix: str = "",
Expand Down Expand Up @@ -152,7 +153,8 @@ def create_llama_launch(

"model": download_model(model_repo, model_filename),
"lora_base": download_model(lora_base_repo, lora_base_filename),
"numa": str(numa),
"numa": numa,
"pooling_type": pooling_type,

"prefix": prefix,
"suffix": suffix,
Expand Down
20 changes: 18 additions & 2 deletions llama_ros/src/llama_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ void LlamaNode::load_params(struct gpt_params &params) {
std::string split_mode;
std::string rope_scaling_type;
std::string numa;
std::string pooling_type;

std::vector<double> tensor_split;

Expand All @@ -98,9 +99,10 @@ void LlamaNode::load_params(struct gpt_params &params) {
{"model", ""},
{"lora_adapter", ""},
{"lora_base", ""},
{"split_mode", "none"},
{"rope_scaling_type", "none"},
{"split_mode", "layer"},
{"rope_scaling_type", ""},
{"numa", "none"},
{"pooling_type", ""},
{"cache_type_k", "f16"},
{"cache_type_v", "f16"},
{"prompt", ""},
Expand Down Expand Up @@ -172,6 +174,7 @@ void LlamaNode::load_params(struct gpt_params &params) {
this->get_parameter("lora_adapter", lora_adapter);
this->get_parameter("lora_base", params.lora_base);
this->get_parameter("numa", numa);
this->get_parameter("pooling_type", pooling_type);

this->get_parameter("n_parallel", params.n_parallel);
this->get_parameter("n_sequences", params.n_sequences);
Expand Down Expand Up @@ -215,6 +218,8 @@ void LlamaNode::load_params(struct gpt_params &params) {
params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR;
} else if (rope_scaling_type == "yarn") {
params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN;
} else {
params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
}

// numa
Expand All @@ -230,6 +235,17 @@ void LlamaNode::load_params(struct gpt_params &params) {
params.numa = GGML_NUMA_STRATEGY_MIRROR;
}

// pooling
if (pooling_type == "none") {
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
} else if (pooling_type == "mean") {
params.pooling_type = LLAMA_POOLING_TYPE_MEAN;
} else if (pooling_type == "cls") {
params.pooling_type = LLAMA_POOLING_TYPE_CLS;
} else {
params.pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED;
}

// initial prompt
if (!file_path.empty()) {
std::ifstream file(file_path.c_str());
Expand Down

0 comments on commit 3f52989

Please sign in to comment.