Skip to content

Commit bce4565

Browse files
committed
llama.cpp updated
mul_mat_q removed GGML_USE_CUBLAS removed
1 parent 3abe0b3 commit bce4565

File tree

6 files changed

+3
-14
lines changed

6 files changed

+3
-14
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,10 @@ $ colcon build
2929

3030
### CUDA
3131

32-
To run llama_ros with CUDA, you have to install the [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) and the following lines in the [CMakeLists.txt](llama_ros/CMakeLists.txt) must be uncommented:
32+
To run llama_ros with CUDA, you have to install the [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) and the following line in the [CMakeLists.txt](llama_ros/CMakeLists.txt) must be uncommented:
3333

3434
```
3535
option(LLAMA_CUBLAS "llama: use cuBLAS" ON)
36-
add_compile_definitions(GGML_USE_CUBLAS)
3736
```
3837

3938
## Usage

llama_bringup/launch/base.launch.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def generate_launch_description():
5858
"yarn_beta_slow": LaunchConfiguration("yarn_beta_slow", default=1.0),
5959
"yarn_orig_ctx": LaunchConfiguration("yarn_orig_ctx", default=0),
6060

61-
"mul_mat_q": LaunchConfiguration("mul_mat_q", default=True),
6261
"embedding": LaunchConfiguration("embedding", default=True),
6362
"logits_all": LaunchConfiguration("logits_all", default=False),
6463
"use_mmap": LaunchConfiguration("use_mmap", default=True),

llama_bringup/llama_bringup/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ def create_llama_launch(
8080
yarn_orig_ctx: float = 0,
8181

8282
embedding: bool = True,
83-
mul_mat_q: bool = True,
8483
logits_all: bool = False,
8584
use_mmap: bool = True,
8685
use_mlock: bool = False,
@@ -136,7 +135,6 @@ def create_llama_launch(
136135
"yarn_orig_ctx": str(yarn_orig_ctx),
137136
"rope_scaling_type": str(rope_scaling_type),
138137

139-
"mul_mat_q": str(mul_mat_q),
140138
"embedding": str(embedding),
141139
"logits_all": str(logits_all),
142140
"use_mmap": str(use_mmap),

llama_ros/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ endif()
77

88
# cuBLAS
99
# option(LLAMA_CUBLAS "llama: use cuBLAS" ON)
10-
# add_compile_definitions(GGML_USE_CUBLAS)
1110

1211
# find dependencies
1312
find_package(ament_cmake REQUIRED)

llama_ros/llama_cpp

llama_ros/src/llama_node.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ void LlamaNode::load_params(struct gpt_params &params) {
121121
std::vector<double>({0.0}));
122122
this->declare_parameters<bool>("", {
123123
{"debug", true},
124-
{"mul_mat_q", true},
125124
{"embedding", true},
126125
{"logits_all", false},
127126
{"use_mmap", true},
@@ -140,7 +139,6 @@ void LlamaNode::load_params(struct gpt_params &params) {
140139
this->get_parameter("main_gpu", params.main_gpu);
141140
this->get_parameter("tensor_split", tensor_split);
142141

143-
this->get_parameter("mul_mat_q", params.mul_mat_q);
144142
this->get_parameter("embedding", params.embedding);
145143
this->get_parameter("logits_all", params.logits_all);
146144
this->get_parameter("use_mmap", params.use_mmap);
@@ -243,8 +241,7 @@ void LlamaNode::load_params(struct gpt_params &params) {
243241
std::istreambuf_iterator<char>(), back_inserter(params.prompt));
244242
}
245243

246-
// cublas
247-
#ifdef GGML_USE_CUBLAS
244+
// split tensors
248245
GGML_ASSERT(tensor_split.size() <= llama_max_devices());
249246
for (size_t i = 0; i < llama_max_devices(); ++i) {
250247
if (i < tensor_split.size()) {
@@ -253,9 +250,6 @@ void LlamaNode::load_params(struct gpt_params &params) {
253250
params.tensor_split[i] = 0.0f;
254251
}
255252
}
256-
257-
params.mul_mat_q = false;
258-
#endif
259253
}
260254

261255
void LlamaNode::tokenize_service_callback(

0 commit comments

Comments
 (0)