Skip to content

Commit

Permalink
load lora
Browse files Browse the repository at this point in the history
  • Loading branch information
zkh2016 committed Sep 14, 2024
1 parent 2a16030 commit bafd61f
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
13 changes: 10 additions & 3 deletions convert-lora-to-ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin")

if os.path.exists(input_model):
model = torch.load(input_model, map_location="cpu")
model = torch.load(input_model, map_location="cpu", weights_only=True)
else:
input_model = os.path.join(sys.argv[1], "adapter_model.safetensors")
# lazy import load_file only if lora is in safetensors format.
Expand All @@ -75,7 +75,6 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty

arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
name_map = gguf.TensorNameMap(arch, 200) # 200 layers ought to be enough for anyone

with open(input_json, "r") as f:
params = json.load(f)

Expand Down Expand Up @@ -103,6 +102,7 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
write_file_header(fout, params)
for k, v in model.items():
orig_k = k
print(orig_k)
if k.endswith(".default.weight"):
k = k.replace(".default.weight", ".weight")
if k in ["llama_proj.weight", "llama_proj.bias"]:
Expand All @@ -117,18 +117,25 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
t = v.detach().numpy()

prefix = "base_model.model."
if k.startswith(prefix):
k = k[len(prefix) :]
prefix = "llm."
if k.startswith(prefix):
k = k[len(prefix) :]

lora_suffixes = (".lora_A.weight", ".lora_B.weight")
lora_suffixes = (".lora_A.weight", ".lora_B.weight", ".lora_A.default.weight", ".lora_B.default.weight")
print("k:", k)
if k.endswith(lora_suffixes):
suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])]
else:
print(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1)
print("k:", k)

tname = name_map.get_name(k)
#model.layers.139.self_attn.v_proj'
#llm.model.layers.0.self_attn.q_proj
if tname is None:
print(f"Error: could not map tensor name {orig_k}")
print(" Note: the arch parameter must be specified if the model is not llama")
Expand Down
20 changes: 19 additions & 1 deletion examples/llava/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,30 @@ static struct llava_context * llava_init_context(gpt_params * params) {
}

llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);

if (ctx_llama == NULL) {
LOG_TEE("%s: error: failed to create the llama_context\n" , __func__);
return NULL;
}

for (unsigned int i = 0; i < params->lora_adapter.size(); ++i) {
const std::string & lora_adapter = std::get<0>(params->lora_adapter[i]);
float lora_scale = std::get<1>(params->lora_adapter[i]);
int err = llama_model_apply_lora_from_file(model,
lora_adapter.c_str(),
lora_scale,
((i > 0) || params->lora_base.empty())
? NULL
: params->lora_base.c_str(),
params->n_threads);
if (err != 0) {
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
// llama_free(lctx);
// llama_free_model(model);
// return std::make_tuple(nullptr, nullptr);
}
}

auto ctx_llava = (struct llava_context *)malloc(sizeof(llava_context));

ctx_llava->ctx_llama = ctx_llama;
Expand Down

0 comments on commit bafd61f

Please sign in to comment.