Skip to content

Commit

Permalink
correct get load time
Browse files Browse the repository at this point in the history
  • Loading branch information
zkh2016 committed Sep 18, 2024
1 parent 1c445e4 commit 245020f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 13 deletions.
29 changes: 18 additions & 11 deletions examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ static struct llava_context * llava_init_context(gpt_params * params) {
} else {
ctx_params.n_ctx = params->n_ctx;
}

llama_model * model2 = nullptr;
if(params->skip_model.size() > 0 && params->skip_layers > 0) {
//load last model
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
model_params.init_time = false;
//llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params);
//llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params);
model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params);
llama_set_model_skip_layers(model2, params->skip_layers);
//llama_add_model_load_times(model, model2);
}

llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);

Expand All @@ -83,6 +95,10 @@ static struct llava_context * llava_init_context(gpt_params * params) {
return NULL;
}

if(params->skip_model.size() > 0 && params->skip_layers > 0) {
llama_set_model2(ctx_llama, model2);
}

for (unsigned int i = 0; i < params->lora_adapter.size(); ++i) {
const std::string & lora_adapter = std::get<0>(params->lora_adapter[i]);
float lora_scale = std::get<1>(params->lora_adapter[i]);
Expand All @@ -101,16 +117,6 @@ static struct llava_context * llava_init_context(gpt_params * params) {

auto ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));

{
//load last model
llama_model_params model_params = llama_model_params_from_gpt_params(*params);
//llama_model * model2 = llama_load_model_from_file(params->model.c_str(), model_params);
//llama_model * model2 = llama_load_model_from_file("/Users/zkh/Downloads/last_16/ggml-model-Q4_0.gguf", model_params);
llama_model * model2 = llama_load_model_from_file(params->skip_model.c_str(), model_params);
llama_set_model_skip_layers(model2, params->skip_layers);
llama_set_model2(ctx_llama, model2);
}

ctx_llava->ctx_llama = ctx_llama;
ctx_llava->ctx_clip = ctx_clip;
ctx_llava->model = model;
Expand Down Expand Up @@ -341,7 +347,8 @@ int main(int argc, char ** argv) {
if (params.image.size() > 0) {
auto image = params.image;
ctx_llava = minicpmv_init(&params, image, n_past);

//release vit memory
clip_free(ctx_llava->ctx_clip);
if (!params.prompt.empty()) {
LOG_TEE("<user>%s\n", params.prompt.c_str());
LOG_TEE("<assistant>");
Expand Down
7 changes: 5 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14849,6 +14849,7 @@ struct llama_model_params llama_model_default_params() {
/*.vocab_only =*/ false,
/*.use_mmap =*/ true,
/*.use_mlock =*/ false,
/*.init_time =*/ true,
};

#ifdef GGML_USE_METAL
Expand Down Expand Up @@ -14983,8 +14984,10 @@ void llama_set_model_skip_layers(
struct llama_model * llama_load_model_from_file(
const char * path_model,
struct llama_model_params params) {
ggml_time_init();

if(params.init_time){
ggml_time_init();
}

llama_model * model = new llama_model;

unsigned cur_percentage = 0;
Expand Down
1 change: 1 addition & 0 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ extern "C" {
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_mlock; // force system to keep model in RAM
bool init_time = true;
};

struct llama_context_params {
Expand Down

0 comments on commit 245020f

Please sign in to comment.