Skip to content

Commit

Permalink
model : fix order kvq -> qkv
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Feb 19, 2025
1 parent 2eacb4c commit f95b04a
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 55 deletions.
12 changes: 6 additions & 6 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2572,9 +2572,9 @@ ggml_tensor * llama_context_kv_self::build_attn(
ggml_cgraph * gf,
ggml_tensor * wo,
ggml_tensor * wo_b,
ggml_tensor * q_cur,
ggml_tensor * k_cur,
ggml_tensor * v_cur,
ggml_tensor * q_cur,
int32_t n_tokens,
float kq_scale,
int il,
Expand Down Expand Up @@ -2617,9 +2617,6 @@ ggml_tensor * llama_context_kv_self::build_attn(
ggml_build_forward_expand(gf, ggml_cpy(ctx0, v_cur, v_cache_view));
}

const auto & n_embd_head_k = hparams.n_embd_head_k;
const auto & n_embd_head_v = hparams.n_embd_head_v;

// TODO: improve
bool is_sliding = false;

Expand Down Expand Up @@ -2648,8 +2645,11 @@ ggml_tensor * llama_context_kv_self::build_attn(

const auto n_kv = worst_case ? kv_self.size : kv_self.n;

const int64_t n_head = hparams.n_head(il);
const int64_t n_head_kv = hparams.n_head_kv(il);
const int64_t n_head = hparams.n_head(il);
const int64_t n_head_kv = hparams.n_head_kv(il);

const auto & n_embd_head_k = hparams.n_embd_head_k;
const auto & n_embd_head_v = hparams.n_embd_head_v;

struct ggml_tensor * q = ggml_permute(ctx0, q_cur, 0, 2, 1, 3);
//cb(q, "q", il);
Expand Down
2 changes: 1 addition & 1 deletion src/llama-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,9 @@ class llama_context_kv_self : public llama_context {
ggml_cgraph * gf,
ggml_tensor * wo,
ggml_tensor * wo_b,
ggml_tensor * q_cur,
ggml_tensor * k_cur,
ggml_tensor * v_cur,
ggml_tensor * q_cur,
int32_t n_tokens,
float kq_scale,
int il,
Expand Down
2 changes: 1 addition & 1 deletion src/llama-graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ class llama_graph_i {
ggml_cgraph * gf,
ggml_tensor * wo,
ggml_tensor * wo_b,
ggml_tensor * q_cur,
ggml_tensor * k_cur,
ggml_tensor * v_cur,
ggml_tensor * q_cur,
int32_t n_tokens,
float kq_scale,
int il,
Expand Down
Loading

0 comments on commit f95b04a

Please sign in to comment.