Compare commits

...

3 Commits

Author SHA1 Message Date
Gabriel Larson 4762ad7316 model : make rope_yarn_log_mul optional for deepseek2 (#14896)
* make rope_yarn_log_mul optional for deepseek2

* default rope_yarn_log_mul = 0.0f
2025-07-27 11:18:37 +03:00
Shunta Saito 1dc9614e06 llama : fix kq_scale for the attention layers of PLaMo2 (#14892)
* Fix dimensions for expand

* Change dimensions to copy states to cache

* Fix the default value for plamo2 conversion

* Fix scale given to build_attn

* Update src/llama-model.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update src/llama-model.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update src/llama-model.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
2025-07-27 09:38:44 +02:00
Aman Gupta 446595b9b3 Docs: add instructions for adding backends (#14889) 2025-07-27 09:36:43 +08:00
8 changed files with 14756 additions and 13085 deletions
+2 -2
View File
@@ -3791,7 +3791,7 @@ class Plamo2Model(TextModel):
self.gguf_writer.add_block_count(block_count)
self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 32))
self.gguf_writer.add_layer_norm_rms_eps(hparams.get("rms_norm_eps", 1e-06))
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1000000.0))
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 10000))
# Mamba parameters
self.gguf_writer.add_ssm_state_size(hparams.get("mamba_d_state", 64))
@@ -3802,7 +3802,7 @@ class Plamo2Model(TextModel):
self.gguf_writer.add_ssm_group_count(0)
# MLP feed forward parameters (for attention layers)
self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 16384))
self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 13312))
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+12 -4
View File
@@ -2,6 +2,11 @@
List of GGML operations and backend support status.
## How to add a backend to this table:
1. Run `test-backend-ops support --output csv` with your backend name and redirect output to a csv file in `docs/ops/` (e.g., `docs/ops/CUDA.csv`)
2. Regenerate `/docs/ops.md` via `./scripts/create_ops_docs.py`
Legend:
- ✅ Fully supported by this backend
- 🟡 Partially supported by this backend
@@ -18,7 +23,8 @@ Legend:
| ARGSORT | ❌ | ✅ | ✅ | ✅ |
| CLAMP | ❌ | ✅ | ✅ | 🟡 |
| CONCAT | ❌ | ✅ | 🟡 | ✅ |
| CONT | ❌ | ✅ | 🟡 | ✅ |
| CONT | ❌ | ✅ | | ✅ |
| CONV_2D | ❌ | ✅ | ❌ | ❌ |
| CONV_2D_DW | ❌ | ✅ | ✅ | ❌ |
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ |
| CONV_TRANSPOSE_2D | ❌ | ✅ | ✅ | ❌ |
@@ -30,7 +36,7 @@ Legend:
| DIAG_MASK_INF | ❌ | ✅ | ✅ | 🟡 |
| DIV | ❌ | ✅ | ✅ | 🟡 |
| DUP | ❌ | ✅ | 🟡 | 🟡 |
| ELU | ❌ | ✅ | | 🟡 |
| ELU | ❌ | ✅ | 🟡 | 🟡 |
| EXP | ❌ | ✅ | 🟡 | ❌ |
| FLASH_ATTN_EXT | ❌ | ✅ | 🟡 | 🟡 |
| GATED_LINEAR_ATTN | ❌ | ✅ | ✅ | ❌ |
@@ -66,14 +72,16 @@ Legend:
| REPEAT_BACK | ❌ | ✅ | ✅ | ❌ |
| RMS_NORM | ❌ | ✅ | ✅ | 🟡 |
| RMS_NORM_BACK | ❌ | ✅ | ✅ | ❌ |
| RMS_NORM_MUL | ❌ | | | ✅ |
| RMS_NORM_MUL | ❌ | | | ✅ |
| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ❌ |
| ROLL | ❌ | ✅ | ❌ | ❌ |
| ROPE | ❌ | ✅ | ✅ | ✅ |
| ROPE_BACK | ❌ | ✅ | ✅ | ❌ |
| RWKV_WKV6 | ❌ | ✅ | ✅ | ✅ |
| RWKV_WKV7 | ❌ | ✅ | ✅ | ✅ |
| SCALE | ❌ | ✅ | ✅ | ✅ |
| SET | ❌ | ✅ | ❌ | ✅ |
| SET_ROWS | ❌ | 🟡 | | 🟡 |
| SET_ROWS | ❌ | 🟡 | 🟡 | 🟡 |
| SGN | ❌ | ✅ | 🟡 | ❌ |
| SIGMOID | ❌ | ✅ | 🟡 | 🟡 |
| SILU | ❌ | ✅ | 🟡 | 🟡 |
+7349 -6534
View File
File diff suppressed because it is too large Load Diff
+7349 -6534
View File
File diff suppressed because it is too large Load Diff
+5
View File
@@ -112,6 +112,11 @@ class DocsGenerator:
lines.append("")
lines.append("List of GGML operations and backend support status.")
lines.append("")
lines.append("## How to add a backend to this table:")
lines.append("")
lines.append("1. Run `test-backend-ops support --output csv` with your backend name and redirect output to a csv file in `docs/ops/` (e.g., `docs/ops/CUDA.csv`)")
lines.append("2. Regenerate `/docs/ops.md` via `./scripts/create_ops_docs.py`")
lines.append("")
lines.append("Legend:")
lines.append("- ✅ Fully supported by this backend")
lines.append("- 🟡 Partially supported by this backend")
+1 -1
View File
@@ -98,7 +98,7 @@ struct llama_hparams {
float rope_freq_scale_train;
float rope_freq_scale_train_swa;
uint32_t n_ctx_orig_yarn;
float rope_yarn_log_mul;
float rope_yarn_log_mul = 0.0f;
std::array<int, 4> rope_sections;
+9 -8
View File
@@ -1369,7 +1369,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
// that have no expert_gating_func model parameter set
hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
}
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
switch (hparams.n_layer) {
case 27: type = LLM_TYPE_16B; break;
@@ -16191,7 +16191,7 @@ private:
{
// PLaMo-2 uses combined QKV tensor
ggml_tensor * qkv = build_lora_mm(model.layers[il].wqkv, cur);
cb(qkv, "qkv", il);
cb(qkv, "wqkv", il);
// split QKV tensor into Q, K, V
const int64_t n_embd_head_q = hparams.n_embd_head_k;
@@ -16231,7 +16231,7 @@ private:
ext_factor, attn_factor, beta_fast, beta_slow
);
cur = build_attn(inp, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f, il);
cur = build_attn(inp, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f/sqrtf(float(n_embd_head_v)), il);
}
cb(cur, "attn_out", il);
@@ -16306,8 +16306,9 @@ private:
ggml_build_forward_expand(gf,
ggml_cpy(ctx0, last_conv,
ggml_view_1d(ctx0, conv_states_all,
(d_conv - 1)*(d_inner)*(n_seqs),
kv_head*(d_conv - 1)*(d_inner)*ggml_element_size(conv_states_all))));
(d_conv - 1)*(d_inner + 2*n_group*d_state)*(n_seqs),
kv_head*(d_conv - 1)*(d_inner + 2*n_group*d_state)*ggml_element_size(conv_states_all))));
cb(conv_states_all, "mamba_conv1d_state", il);
// 1D convolution
x = ggml_ssm_conv(ctx0, conv_x, model.layers[il].ssm_conv1d);
@@ -16370,9 +16371,9 @@ private:
// store last states
ggml_build_forward_expand(gf,
ggml_cpy(ctx0,
ggml_view_1d(ctx0, y_ssm, d_state*d_inner*n_seqs, x->nb[3]*x->ne[3]),
ggml_view_1d(ctx0, ssm_states_all, d_state*d_inner*n_seqs,
kv_head*d_state*d_inner*ggml_element_size(ssm_states_all))));
ggml_view_1d(ctx0, y_ssm, n_heads*head_dim*d_state*n_seqs, n_heads*head_dim*n_seq_tokens*n_seqs*ggml_element_size(y_ssm)),
ggml_view_1d(ctx0, ssm_states_all, n_heads*head_dim*d_state*n_seqs, kv_head*n_seqs*n_heads*head_dim*d_state*ggml_element_size(ssm_states_all))));
cb(ssm_states_all, "mamba_ssm_states", il);
ggml_tensor * y = ggml_view_4d(ctx0, y_ssm, head_dim, n_heads, n_seq_tokens, n_seqs, head_dim * ggml_element_size(x), head_dim * n_heads * ggml_element_size(x), head_dim * n_heads * n_seq_tokens * ggml_element_size(x), 0);
cb(y, "mamba_y_view", il);
+29 -2
View File
@@ -868,16 +868,30 @@ struct sql_printer : public printer {
struct csv_printer : public printer {
void print_header() override {
std::vector<std::string> fields = test_result::get_fields();
std::vector<std::string> fields = test_result::get_fields();
std::vector<std::string> fields_csv = get_fields_csv();
for (size_t i = 0; i < fields.size(); i++) {
if (std::find(std::begin(fields_csv), std::end(fields_csv), fields[i]) == std::end(fields_csv)) {
continue;
}
printf("\"%s\"%s", fields[i].c_str(), i < fields.size() - 1 ? "," : "");
}
printf("\n");
}
void print_test_result(const test_result & result) override {
std::vector<std::string> values = result.get_values();
std::vector<std::string> values = result.get_values();
std::vector<std::string> fields = test_result::get_fields();
std::vector<std::string> fields_csv = get_fields_csv();
for (size_t i = 0; i < values.size(); i++) {
if (std::find(std::begin(fields_csv), std::end(fields_csv), fields[i]) == std::end(fields_csv)) {
continue;
}
// Escape quotes and wrap in quotes for CSV
std::string escaped_value = values[i];
size_t pos = 0;
@@ -889,6 +903,19 @@ struct csv_printer : public printer {
}
printf("\n");
}
static std::vector<std::string> get_fields_csv() {
return {
"op_name",
"op_params",
"supported",
"error_message",
"test_mode",
"backend_reg_name",
"backend_name",
};
}
};
static std::unique_ptr<printer> create_printer(output_formats format) {