mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-09 07:16:44 +02:00
convert : fix conversion for Mistral-Medium-3.5-128B (#24268)
Mistral explicitly sets `moe` and `llama_4_scaling` to `null` in params.json, breaking `key in dict` checks during conversion. Replace with `dict.get(key) is not None` where this matters. Fixes `convert-hf-to-gguf.py --mistral-format Mistral-Medium-3.5-128B`
This commit is contained in:
@@ -105,8 +105,9 @@ class MistralModel(LlamaModel):
|
|||||||
gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim)
|
gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim)
|
||||||
gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"])
|
gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"])
|
||||||
|
|
||||||
if "llama_4_scaling" in hparams:
|
llama_4_scaling = hparams.get("llama_4_scaling")
|
||||||
gguf_writer.add_attn_temperature_scale(hparams["llama_4_scaling"]["beta"])
|
if llama_4_scaling is not None:
|
||||||
|
gguf_writer.add_attn_temperature_scale(llama_4_scaling["beta"])
|
||||||
|
|
||||||
|
|
||||||
class MistralMoeModel(DeepseekV2Model):
|
class MistralMoeModel(DeepseekV2Model):
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ def main() -> None:
|
|||||||
assert hparams.get("vision_encoder") is not None, "This model does not support multimodal"
|
assert hparams.get("vision_encoder") is not None, "This model does not support multimodal"
|
||||||
from conversion.pixtral import PixtralModel
|
from conversion.pixtral import PixtralModel
|
||||||
model_class = PixtralModel
|
model_class = PixtralModel
|
||||||
elif "moe" in hparams:
|
elif hparams.get("moe") is not None:
|
||||||
from conversion.mistral import MistralMoeModel
|
from conversion.mistral import MistralMoeModel
|
||||||
model_class = MistralMoeModel
|
model_class = MistralMoeModel
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user