mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-09 07:16:44 +02:00
model : support for DeepseekV32ForCausalLM with generic DeepSeek Sparse Attention (DSA) implementation (#23346)
* llama : support DeepSeek V3.2 model family (with DSA lightning indexer) * convert : handle DeepseekV32ForCausalLM architecture * ggml : support for f16 GGML_OP_FILL * memory : separate hparams argument in llama_kv_cache constructor * memory : add llama_kv_cache_dsa memory (KV cache + lightning indexer cache) * llama : support for LLM_ARCH_DEEPSEEK32 * model : llama_model_deepseek32 implementation * model : merge two scale operations into one in DSA lightning indexer implementation * chore : remove unused code * model : support NVFP4 in DeepSeek V3.2 Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * memory : refactoring TODO Co-authored-by: ggerganov <ggerganov@users.noreply.github.com> --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: ggerganov <ggerganov@users.noreply.github.com>
This commit is contained in:
@@ -100,6 +100,7 @@ static gguf_context_ptr get_gguf_ctx(const llm_arch arch, const bool moe) {
|
||||
n_ff = 96;
|
||||
n_layer = 22; // hparams.n_layer_kv_from_start = 20 is hardcoded
|
||||
} else if (arch == LLM_ARCH_DEEPSEEK2
|
||||
|| arch == LLM_ARCH_DEEPSEEK32
|
||||
|| arch == LLM_ARCH_GLM_DSA
|
||||
|| arch == LLM_ARCH_KIMI_LINEAR
|
||||
|| arch == LLM_ARCH_MISTRAL4) {
|
||||
@@ -156,6 +157,7 @@ static gguf_context_ptr get_gguf_ctx(const llm_arch arch, const bool moe) {
|
||||
|
||||
ms.add_kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, 8.0f);
|
||||
if (arch == LLM_ARCH_DEEPSEEK2
|
||||
|| arch == LLM_ARCH_DEEPSEEK32
|
||||
|| arch == LLM_ARCH_GLM_DSA
|
||||
|| arch == LLM_ARCH_KIMI_LINEAR
|
||||
|| arch == LLM_ARCH_MISTRAL4) {
|
||||
@@ -332,6 +334,7 @@ static bool moe_mandatory(const llm_arch arch) {
|
||||
case LLM_ARCH_ARCTIC:
|
||||
case LLM_ARCH_DEEPSEEK:
|
||||
case LLM_ARCH_DEEPSEEK2:
|
||||
case LLM_ARCH_DEEPSEEK32:
|
||||
case LLM_ARCH_GLM4_MOE:
|
||||
case LLM_ARCH_GLM_DSA:
|
||||
case LLM_ARCH_EXAONE_MOE:
|
||||
|
||||
Reference in New Issue
Block a user