From 1db2a6dde915e906ef749342fcbcade6b376946f Mon Sep 17 00:00:00 2001 From: Adam Debreceni Date: Tue, 17 Feb 2026 15:05:52 +0100 Subject: [PATCH 1/2] MINIFICPP-2719 - Add multimodal capability to llama.cpp processor --- cmake/LlamaCpp.cmake | 15 +- extensions/llamacpp/CMakeLists.txt | 2 +- .../processors/DefaultLlamaContext.cpp | 148 ++++++++++++++++-- .../llamacpp/processors/DefaultLlamaContext.h | 8 +- extensions/llamacpp/processors/LlamaContext.h | 2 +- .../processors/RunLlamaCppInference.cpp | 21 ++- .../processors/RunLlamaCppInference.h | 10 ++ .../tests/RunLlamaCppInferenceTests.cpp | 2 +- thirdparty/llamacpp/cpp-23-fixes.patch | 24 --- thirdparty/llamacpp/lu8_macro_fix.patch | 17 -- 10 files changed, 174 insertions(+), 75 deletions(-) delete mode 100644 thirdparty/llamacpp/cpp-23-fixes.patch delete mode 100644 thirdparty/llamacpp/lu8_macro_fix.patch diff --git a/cmake/LlamaCpp.cmake b/cmake/LlamaCpp.cmake index f78101c6f9..1494b137e4 100644 --- a/cmake/LlamaCpp.cmake +++ b/cmake/LlamaCpp.cmake @@ -21,6 +21,8 @@ set(BUILD_SHARED_LIBS "OFF" CACHE STRING "" FORCE) set(LLAMA_BUILD_TESTS "OFF" CACHE STRING "" FORCE) set(LLAMA_BUILD_EXAMPLES "OFF" CACHE STRING "" FORCE) set(LLAMA_BUILD_SERVER "OFF" CACHE STRING "" FORCE) +set(LLAMA_BUILD_COMMON "ON" CACHE STRING "" FORCE) +set(LLAMA_BUILD_TOOLS "ON" CACHE STRING "" FORCE) set(GGML_OPENMP "OFF" CACHE STRING "" FORCE) set(GGML_METAL "OFF" CACHE STRING "" FORCE) set(GGML_BLAS "OFF" CACHE STRING "" FORCE) @@ -30,17 +32,9 @@ else() set(GGML_NATIVE "ON" CACHE STRING "" FORCE) endif() -set(PATCH_FILE_1 "${CMAKE_SOURCE_DIR}/thirdparty/llamacpp/lu8_macro_fix.patch") # https://github.com/ggml-org/llama.cpp/issues/12740 -set(PATCH_FILE_2 "${CMAKE_SOURCE_DIR}/thirdparty/llamacpp/cpp-23-fixes.patch") - -set(PC ${Bash_EXECUTABLE} -c "set -x &&\ - (\\\"${Patch_EXECUTABLE}\\\" -p1 -R -s -f --dry-run -i \\\"${PATCH_FILE_1}\\\" || \\\"${Patch_EXECUTABLE}\\\" -p1 -N -i \\\"${PATCH_FILE_1}\\\") &&\ - (\\\"${Patch_EXECUTABLE}\\\" -p1 -R -s -f --dry-run -i \\\"${PATCH_FILE_2}\\\" || \\\"${Patch_EXECUTABLE}\\\" -p1 -N -i \\\"${PATCH_FILE_2}\\\")") - FetchContent_Declare(llamacpp - URL https://github.com/ggml-org/llama.cpp/archive/refs/tags/b5958.tar.gz - URL_HASH SHA256=4e8a2abd83092aa446cd13556f6fe8777139da7b191bdaa0e1b79fe9740b36a6 - PATCH_COMMAND "${PC}" + URL https://github.com/ggml-org/llama.cpp/archive/refs/tags/b7836.tar.gz + URL_HASH SHA256=3d384e7e8b3bc3cd31abddedf684a6e201405c1d932cafb3c4a5277d872b0614 SYSTEM ) @@ -49,5 +43,6 @@ FetchContent_MakeAvailable(llamacpp) set(LLAMACPP_INCLUDE_DIRS "${llamacpp_SOURCE_DIR}/include" "${llamacpp_SOURCE_DIR}/ggml/include" + "${llamacpp_SOURCE_DIR}/tools" CACHE STRING "" FORCE ) diff --git a/extensions/llamacpp/CMakeLists.txt b/extensions/llamacpp/CMakeLists.txt index 421143f692..6312a92d41 100644 --- a/extensions/llamacpp/CMakeLists.txt +++ b/extensions/llamacpp/CMakeLists.txt @@ -31,7 +31,7 @@ add_minifi_library(minifi-llamacpp SHARED ${SOURCES}) target_include_directories(minifi-llamacpp PUBLIC "${CMAKE_SOURCE_DIR}/extensions/llamacpp") target_include_directories(minifi-llamacpp PUBLIC "${LLAMACPP_INCLUDE_DIRS}") -target_link_libraries(minifi-llamacpp minifi-cpp-extension-lib llama) +target_link_libraries(minifi-llamacpp minifi-cpp-extension-lib llama mtmd) register_c_api_extension(minifi-llamacpp "LLAMACPP EXTENSION" LLAMACPP-EXTENSION "Provides llama.cpp support" "extensions/llamacpp/tests") diff --git a/extensions/llamacpp/processors/DefaultLlamaContext.cpp b/extensions/llamacpp/processors/DefaultLlamaContext.cpp index cc8fb017df..7dc869640f 100644 --- a/extensions/llamacpp/processors/DefaultLlamaContext.cpp +++ b/extensions/llamacpp/processors/DefaultLlamaContext.cpp @@ -16,8 +16,12 @@ */ #include "DefaultLlamaContext.h" + +#include + #include "minifi-cpp/Exception.h" #include "fmt/format.h" +#include "mtmd/mtmd-helper.h" namespace org::apache::nifi::minifi::extensions::llamacpp::processors { @@ -41,7 +45,8 @@ constexpr size_t DEFAULT_BUFFER_SIZE = 4096; } // namespace -DefaultLlamaContext::DefaultLlamaContext(const std::filesystem::path& model_path, const LlamaSamplerParams& llama_sampler_params, const LlamaContextParams& llama_ctx_params) { +DefaultLlamaContext::DefaultLlamaContext(const std::filesystem::path& model_path, const std::optional& multimodal_model_path, + const LlamaSamplerParams& llama_sampler_params, const LlamaContextParams& llama_ctx_params, const std::shared_ptr& logger) { llama_model_ = llama_model_load_from_file(model_path.string().c_str(), llama_model_default_params()); // NOLINT(cppcoreguidelines-prefer-member-initializer) if (!llama_model_) { throw Exception(ExceptionType::PROCESS_SCHEDULE_EXCEPTION, fmt::format("Failed to load model from '{}'", model_path.string())); @@ -54,7 +59,7 @@ DefaultLlamaContext::DefaultLlamaContext(const std::filesystem::path& model_path ctx_params.n_seq_max = llama_ctx_params.n_seq_max; ctx_params.n_threads = llama_ctx_params.n_threads; ctx_params.n_threads_batch = llama_ctx_params.n_threads_batch; - ctx_params.flash_attn = false; + ctx_params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED; llama_ctx_ = llama_init_from_model(llama_model_, ctx_params); auto sparams = llama_sampler_chain_default_params(); @@ -73,9 +78,27 @@ DefaultLlamaContext::DefaultLlamaContext(const std::filesystem::path& model_path llama_sampler_chain_add(llama_sampler_, llama_sampler_init_temp(*llama_sampler_params.temperature)); } llama_sampler_chain_add(llama_sampler_, llama_sampler_init_dist(LLAMA_DEFAULT_SEED)); + + if (!multimodal_model_path) { + logger->log_info("No multimodal model path provided"); + return; + } + + mtmd_context_params mparams = mtmd_context_params_default(); + mparams.use_gpu = false; + mparams.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED; + + multimodal_ctx_ = mtmd_init_from_file(multimodal_model_path->string().c_str(), llama_model_, mparams); + if (!multimodal_ctx_) { + throw Exception(ExceptionType::PROCESS_SCHEDULE_EXCEPTION, fmt::format("Failed to load multimodal model from '{}'", multimodal_model_path->string())); + } + + logger->log_info("Successfully loaded multimodal model from '{}'", multimodal_model_path->string()); } DefaultLlamaContext::~DefaultLlamaContext() { + mtmd_free(multimodal_ctx_); + multimodal_ctx_ = nullptr; llama_sampler_free(llama_sampler_); llama_sampler_ = nullptr; llama_free(llama_ctx_); @@ -108,24 +131,100 @@ std::optional DefaultLlamaContext::applyTemplate(const std::vector< return text; } -nonstd::expected DefaultLlamaContext::generate(const std::string& input, std::function token_handler) { +namespace { + +struct mtmd_bitmap_deleter { + void operator()(mtmd_bitmap* val) { mtmd_bitmap_free(val); } +}; +using unique_bitmap_ptr = std::unique_ptr; + +struct mtmd_input_chunks_deleter { + void operator()(mtmd_input_chunks* val) { mtmd_input_chunks_free(val); } +}; +using unique_mtmd_input_chunks_ptr = std::unique_ptr; + +class unique_llama_batch { + public: + explicit unique_llama_batch(std::optional batch = std::nullopt): batch_(std::move(batch)) {} + + unique_llama_batch(unique_llama_batch&&) = default; + unique_llama_batch& operator=(unique_llama_batch&&) = default; + unique_llama_batch(const unique_llama_batch&) = delete; + unique_llama_batch& operator=(const unique_llama_batch&) = delete; + + std::optional get() const { + return batch_; + } + + std::optional& operator->() { + return batch_; + } + + void reset(std::optional batch = std::nullopt) { + if (batch_) { + llama_batch_free(batch_.value()); + } + batch_ = std::move(batch); + } + + ~unique_llama_batch() { + if (batch_) { + llama_batch_free(batch_.value()); + } + batch_.reset(); + } + + private: + std::optional batch_; +}; + +} // namespace + +nonstd::expected DefaultLlamaContext::generate(const std::string& prompt, const std::vector>& files, std::function token_handler) { GenerationResult result{}; auto start_time = std::chrono::steady_clock::now(); + llama_memory_seq_rm(llama_get_memory(llama_ctx_), 0, -1, -1); const llama_vocab * vocab = llama_model_get_vocab(llama_model_); - std::vector tokenized_input = tokenizeInput(vocab, input); - result.num_tokens_in = gsl::narrow(tokenized_input.size()); + llama_pos n_past = 0; + std::vector tokenized_input; + unique_llama_batch batch; + int32_t decode_status = 0; + if (multimodal_ctx_) { + gsl_Assert(!files.empty()); + std::vector bitmaps; + for (auto& file : files) { + unique_bitmap_ptr bitmap{mtmd_helper_bitmap_init_from_buf(multimodal_ctx_, reinterpret_cast(file.data()), file.size())}; + if (!bitmap) { + throw Exception(PROCESSOR_EXCEPTION, "Failed to create multimodal bitmap from buffer"); + } + bitmaps.push_back(std::move(bitmap)); + } + mtmd_input_text inp_txt = { + .text = prompt.c_str(), + .add_special = true, + .parse_special = true, + }; + unique_mtmd_input_chunks_ptr chunks{mtmd_input_chunks_init()}; + auto bitmap_c_ptrs = bitmaps | std::views::transform([] (auto& ptr) {return ptr.get();}) | std::ranges::to>(); + auto tokenized = mtmd_tokenize(multimodal_ctx_, chunks.get(), &inp_txt, bitmap_c_ptrs.data(), bitmap_c_ptrs.size()); + if (tokenized != 0) { + throw Exception(PROCESSOR_EXCEPTION, fmt::format("Failed to tokenize multimodal prompt, error: {}", tokenized)); + } + auto status = mtmd_helper_eval_chunks(multimodal_ctx_, llama_ctx_, chunks.get(), 0, 0, 1, true, &n_past); + if (status != 0) { + throw Exception(PROCESSOR_EXCEPTION, fmt::format("Failed to eval multimodal chunks, error: {}", status)); + } + } else { + gsl_Assert(files.empty()); + tokenized_input = tokenizeInput(vocab, prompt); + n_past = gsl::narrow(tokenized_input.size()); + result.num_tokens_in = gsl::narrow(tokenized_input.size()); + decode_status = llama_decode(llama_ctx_, llama_batch_get_one(tokenized_input.data(), n_past)); + } - llama_batch batch = llama_batch_get_one(tokenized_input.data(), gsl::narrow(tokenized_input.size())); llama_token new_token_id = 0; bool first_token_generated = false; - while (true) { - int32_t res = llama_decode(llama_ctx_, batch); - if (res == 1) { - return nonstd::make_unexpected("Could not find a KV slot for the batch (try reducing the size of the batch or increase the context)"); - } else if (res < 0) { - return nonstd::make_unexpected("Error occurred while executing llama decode"); - } - + while (decode_status == 0) { new_token_id = llama_sampler_sample(llama_sampler_, llama_ctx_, -1); if (!first_token_generated) { result.time_to_first_token = std::chrono::duration_cast(std::chrono::steady_clock::now() - start_time); @@ -147,8 +246,27 @@ nonstd::expected DefaultLlamaContext::generate(co gsl_Assert(len < 128); std::string_view token_str{buf.data(), gsl::narrow(len)}; - batch = llama_batch_get_one(&new_token_id, 1); + batch.reset(llama_batch_init(1, 0, 1)); + batch->n_tokens = 1; + batch->token[0] = new_token_id; + batch->pos[0] = n_past; + batch->n_seq_id[0] = 1; + batch->seq_id[0][0] = 0; + batch->logits[0] = true; + ++n_past; token_handler(token_str); + + decode_status = llama_decode(llama_ctx_, batch.get().value()); + } + + if (decode_status == 1) { + return nonstd::make_unexpected("Could not find a KV slot for the batch (try reducing the size of the batch or increase the context)"); + } + if (decode_status == 2) { + return nonstd::make_unexpected("Llama decode aborted"); + } + if (decode_status < 0) { + return nonstd::make_unexpected("Error occurred while executing llama decode"); } result.tokens_per_second = diff --git a/extensions/llamacpp/processors/DefaultLlamaContext.h b/extensions/llamacpp/processors/DefaultLlamaContext.h index 2d2bdf562f..8478e3f38f 100644 --- a/extensions/llamacpp/processors/DefaultLlamaContext.h +++ b/extensions/llamacpp/processors/DefaultLlamaContext.h @@ -19,12 +19,15 @@ #include "LlamaContext.h" #include "llama.h" #include "LlamaBackendInitializer.h" +#include "mtmd/mtmd.h" +#include "minifi-cpp/core/logging/Logger.h" namespace org::apache::nifi::minifi::extensions::llamacpp::processors { class DefaultLlamaContext : public LlamaContext { public: - DefaultLlamaContext(const std::filesystem::path& model_path, const LlamaSamplerParams& llama_sampler_params, const LlamaContextParams& llama_ctx_params); + DefaultLlamaContext(const std::filesystem::path& model_path, const std::optional& multimodal_model_path, + const LlamaSamplerParams& llama_sampler_params, const LlamaContextParams& llama_ctx_params, const std::shared_ptr& logger); DefaultLlamaContext(const DefaultLlamaContext&) = delete; DefaultLlamaContext(DefaultLlamaContext&&) = delete; DefaultLlamaContext& operator=(const DefaultLlamaContext&) = delete; @@ -32,12 +35,13 @@ class DefaultLlamaContext : public LlamaContext { ~DefaultLlamaContext() override; std::optional applyTemplate(const std::vector& messages) override; - nonstd::expected generate(const std::string& input, std::function token_handler) override; + nonstd::expected generate(const std::string& prompt, const std::vector>& files, std::function token_handler) override; private: const LlamaBackendInitializer& llama_context_initializer_ = LlamaBackendInitializer::get(); llama_model* llama_model_{}; llama_context* llama_ctx_{}; + mtmd_context* multimodal_ctx_{}; llama_sampler* llama_sampler_{}; }; diff --git a/extensions/llamacpp/processors/LlamaContext.h b/extensions/llamacpp/processors/LlamaContext.h index 3c107c2c52..871109692a 100644 --- a/extensions/llamacpp/processors/LlamaContext.h +++ b/extensions/llamacpp/processors/LlamaContext.h @@ -59,7 +59,7 @@ struct GenerationResult { class LlamaContext { public: virtual std::optional applyTemplate(const std::vector& messages) = 0; - virtual nonstd::expected generate(const std::string& input, std::function token_handler) = 0; + virtual nonstd::expected generate(const std::string& prompt, const std::vector>& files, std::function token_handler) = 0; virtual ~LlamaContext() = default; }; diff --git a/extensions/llamacpp/processors/RunLlamaCppInference.cpp b/extensions/llamacpp/processors/RunLlamaCppInference.cpp index 5927a0199e..2150651d81 100644 --- a/extensions/llamacpp/processors/RunLlamaCppInference.cpp +++ b/extensions/llamacpp/processors/RunLlamaCppInference.cpp @@ -31,7 +31,9 @@ namespace org::apache::nifi::minifi::extensions::llamacpp::processors { MinifiStatus RunLlamaCppInference::onScheduleImpl(api::core::ProcessContext& context) { model_path_.clear(); model_path_ = api::utils::parseProperty(context, ModelPath); + multimodal_model_path_ = api::utils::parseOptionalProperty(context, MultiModalModelPath); system_prompt_ = context.getProperty(SystemPrompt).value_or(""); + output_attribute_ = api::utils::parseOptionalProperty(context, OutputAttributeName); LlamaSamplerParams llama_sampler_params; llama_sampler_params.temperature = api::utils::parseOptionalFloatProperty(context, Temperature); @@ -53,7 +55,7 @@ MinifiStatus RunLlamaCppInference::onScheduleImpl(api::core::ProcessContext& con if (llama_context_provider_) { llama_ctx_ = llama_context_provider_(model_path_, llama_sampler_params, llama_ctx_params); } else { - llama_ctx_ = std::make_unique(model_path_, llama_sampler_params, llama_ctx_params); + llama_ctx_ = std::make_unique(model_path_, multimodal_model_path_, llama_sampler_params, llama_ctx_params, logger_); } return MINIFI_STATUS_SUCCESS; @@ -76,10 +78,16 @@ MinifiStatus RunLlamaCppInference::onTriggerImpl(api::core::ProcessContext& cont auto prompt = context.getProperty(Prompt, &flow_file).value_or(""); auto read_result = session.readBuffer(flow_file); + std::vector> files; std::string input_data_and_prompt; if (!read_result.empty()) { input_data_and_prompt.append("Input data (or flow file content):\n"); - input_data_and_prompt.append({reinterpret_cast(read_result.data()), read_result.size()}); + if (multimodal_model_path_) { + input_data_and_prompt.append(mtmd_default_marker()); + files.push_back(std::move(read_result)); + } else { + input_data_and_prompt.append({reinterpret_cast(read_result.data()), read_result.size()}); + } input_data_and_prompt.append("\n\n"); } input_data_and_prompt.append(prompt); @@ -111,7 +119,7 @@ MinifiStatus RunLlamaCppInference::onTriggerImpl(api::core::ProcessContext& cont auto start_time = std::chrono::steady_clock::now(); std::string text; - auto generation_result = llama_ctx_->generate(*input, [&] (std::string_view token) { + auto generation_result = llama_ctx_->generate(*input, files, [&] (std::string_view token) { text += token; }); @@ -133,7 +141,12 @@ MinifiStatus RunLlamaCppInference::onTriggerImpl(api::core::ProcessContext& cont session.setAttribute(flow_file, LlamaCppTimeToFirstToken.name, std::to_string(generation_result->time_to_first_token.count()) + " ms"); session.setAttribute(flow_file, LlamaCppTokensPerSecond.name, fmt::format("{:.2f}", generation_result->tokens_per_second)); - session.writeBuffer(flow_file, text); + if (output_attribute_) { + session.setAttribute(flow_file, output_attribute_.value(), text); + } else { + session.writeBuffer(flow_file, text); + } + session.transfer(std::move(flow_file), Success); return MINIFI_STATUS_SUCCESS; diff --git a/extensions/llamacpp/processors/RunLlamaCppInference.h b/extensions/llamacpp/processors/RunLlamaCppInference.h index 76ae3a1f65..bf9b7d3151 100644 --- a/extensions/llamacpp/processors/RunLlamaCppInference.h +++ b/extensions/llamacpp/processors/RunLlamaCppInference.h @@ -59,6 +59,12 @@ class RunLlamaCppInference : public api::core::ProcessorImpl { .withDescription("The filesystem path of the model file in gguf format.") .isRequired(true) .build(); + EXTENSIONAPI static constexpr auto OutputAttributeName = core::PropertyDefinitionBuilder<>::createProperty("Output Attribute Name") + .withDescription("Specify the attribute to use as output, if not provided, the content is overridden instead.") + .build(); + EXTENSIONAPI static constexpr auto MultiModalModelPath = core::PropertyDefinitionBuilder<>::createProperty("MultiModal Model Path") + .withDescription("The filesystem path of the multimodal model (visual, audio) file in gguf format.") + .build(); EXTENSIONAPI static constexpr auto Temperature = core::PropertyDefinitionBuilder<>::createProperty("Temperature") .withDescription("The temperature to use for sampling.") .withDefaultValue("0.8") @@ -128,6 +134,8 @@ class RunLlamaCppInference : public api::core::ProcessorImpl { EXTENSIONAPI static constexpr auto Properties = std::to_array({ ModelPath, + OutputAttributeName, + MultiModalModelPath, Temperature, TopK, TopP, @@ -167,7 +175,9 @@ class RunLlamaCppInference : public api::core::ProcessorImpl { void increaseTokensOut(uint64_t token_count); std::string model_path_; + std::optional multimodal_model_path_; std::string system_prompt_; + std::optional output_attribute_; LlamaContextProvider llama_context_provider_; std::unique_ptr llama_ctx_; diff --git a/extensions/llamacpp/tests/RunLlamaCppInferenceTests.cpp b/extensions/llamacpp/tests/RunLlamaCppInferenceTests.cpp index ec507af33a..fe660aa863 100644 --- a/extensions/llamacpp/tests/RunLlamaCppInferenceTests.cpp +++ b/extensions/llamacpp/tests/RunLlamaCppInferenceTests.cpp @@ -37,7 +37,7 @@ class MockLlamaContext : public processors::LlamaContext { return "Test input"; } - nonstd::expected generate(const std::string& input, std::function token_handler) override { + nonstd::expected generate(const std::string& input, const std::vector>& /*files*/, std::function token_handler) override { if (fail_generation_) { return nonstd::make_unexpected("Generation failed"); } diff --git a/thirdparty/llamacpp/cpp-23-fixes.patch b/thirdparty/llamacpp/cpp-23-fixes.patch deleted file mode 100644 index 0e84e43956..0000000000 --- a/thirdparty/llamacpp/cpp-23-fixes.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 072bd8ce7e10a0fffb1e2bc755c2964e472909ed Mon Sep 17 00:00:00 2001 -From: Martin Zink -Date: Tue, 22 Jul 2025 12:49:42 +0200 -Subject: [PATCH] c++23 fixes - ---- - src/llama-hparams.cpp | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp -index c6c67d26..db36de4d 100644 ---- a/src/llama-hparams.cpp -+++ b/src/llama-hparams.cpp -@@ -1,5 +1,7 @@ - #include "llama-hparams.h" - -+#include -+ - #include "ggml.h" - - void llama_hparams::set_swa_pattern(uint32_t n_pattern) { --- -2.39.5 (Apple Git-154) - diff --git a/thirdparty/llamacpp/lu8_macro_fix.patch b/thirdparty/llamacpp/lu8_macro_fix.patch deleted file mode 100644 index a1b92d28b3..0000000000 --- a/thirdparty/llamacpp/lu8_macro_fix.patch +++ /dev/null @@ -1,17 +0,0 @@ -diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp -index dd27a381..47550954 100644 ---- a/src/llama-chat.cpp -+++ b/src/llama-chat.cpp -@@ -6,11 +6,7 @@ - #include - #include - --#if __cplusplus >= 202000L -- #define LU8(x) (const char*)(u8##x) --#else -- #define LU8(x) u8##x --#endif -+#define LU8(x) reinterpret_cast(u8##x) - - // trim whitespace from the beginning and end of a string - static std::string trim(const std::string & str) { From b207ec47dcea6188a16b3e6de29b8227b7dab87b Mon Sep 17 00:00:00 2001 From: Adam Debreceni Date: Wed, 18 Feb 2026 13:38:19 +0100 Subject: [PATCH 2/2] MINIFICPP-2719 - Do not build executable tools --- cmake/LlamaCpp.cmake | 8 +++++++- thirdparty/llamacpp/mtmd-fix.patch | 31 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 thirdparty/llamacpp/mtmd-fix.patch diff --git a/cmake/LlamaCpp.cmake b/cmake/LlamaCpp.cmake index 1494b137e4..9d6f23b542 100644 --- a/cmake/LlamaCpp.cmake +++ b/cmake/LlamaCpp.cmake @@ -22,7 +22,6 @@ set(LLAMA_BUILD_TESTS "OFF" CACHE STRING "" FORCE) set(LLAMA_BUILD_EXAMPLES "OFF" CACHE STRING "" FORCE) set(LLAMA_BUILD_SERVER "OFF" CACHE STRING "" FORCE) set(LLAMA_BUILD_COMMON "ON" CACHE STRING "" FORCE) -set(LLAMA_BUILD_TOOLS "ON" CACHE STRING "" FORCE) set(GGML_OPENMP "OFF" CACHE STRING "" FORCE) set(GGML_METAL "OFF" CACHE STRING "" FORCE) set(GGML_BLAS "OFF" CACHE STRING "" FORCE) @@ -32,9 +31,16 @@ else() set(GGML_NATIVE "ON" CACHE STRING "" FORCE) endif() +set(PATCH_FILE_1 "${CMAKE_SOURCE_DIR}/thirdparty/llamacpp/mtmd-fix.patch") + +set(PC ${Bash_EXECUTABLE} -c "set -x &&\ + (\\\"${Patch_EXECUTABLE}\\\" -p1 -R -s -f --dry-run -i \\\"${PATCH_FILE_1}\\\" || \\\"${Patch_EXECUTABLE}\\\" -p1 -N -i \\\"${PATCH_FILE_1}\\\")") + + FetchContent_Declare(llamacpp URL https://github.com/ggml-org/llama.cpp/archive/refs/tags/b7836.tar.gz URL_HASH SHA256=3d384e7e8b3bc3cd31abddedf684a6e201405c1d932cafb3c4a5277d872b0614 + PATCH_COMMAND "${PC}" SYSTEM ) diff --git a/thirdparty/llamacpp/mtmd-fix.patch b/thirdparty/llamacpp/mtmd-fix.patch new file mode 100644 index 0000000000..b462347ad8 --- /dev/null +++ b/thirdparty/llamacpp/mtmd-fix.patch @@ -0,0 +1,31 @@ +diff --color=auto -rupN llamacpp-src-original/CMakeLists.txt llamacpp-src-patched/CMakeLists.txt +--- llamacpp-src-original/CMakeLists.txt 2026-01-25 21:19:47 ++++ llamacpp-src-patched/CMakeLists.txt 2026-02-18 13:15:46 +@@ -212,6 +212,7 @@ add_subdirectory(src) + # + + add_subdirectory(src) ++add_subdirectory(tools/mtmd) + + # + # utils, programs, examples and tests +diff --color=auto -rupN llamacpp-src-original/tools/mtmd/CMakeLists.txt llamacpp-src-patched/tools/mtmd/CMakeLists.txt +--- llamacpp-src-original/tools/mtmd/CMakeLists.txt 2026-01-25 21:19:47 ++++ llamacpp-src-patched/tools/mtmd/CMakeLists.txt 2026-02-18 13:13:40 +@@ -80,16 +80,3 @@ endif() + endif() + endif() + +-add_executable(llama-llava-cli deprecation-warning.cpp) +-add_executable(llama-gemma3-cli deprecation-warning.cpp) +-add_executable(llama-minicpmv-cli deprecation-warning.cpp) +-add_executable(llama-qwen2vl-cli deprecation-warning.cpp) +- +-set(TARGET llama-mtmd-cli) +-add_executable (${TARGET} mtmd-cli.cpp) +-set_target_properties (${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli) +-if(LLAMA_TOOLS_INSTALL) +- install(TARGETS ${TARGET} RUNTIME) +-endif() +-target_link_libraries (${TARGET} PRIVATE common mtmd Threads::Threads) +-target_compile_features(${TARGET} PRIVATE cxx_std_17)