ggml alloc: Fix for null dereference on alloc failure (#5200 )

* Fix for a null pointer dereference if a metal GGML buffer fails to be allocated * Freeing the allocated buffers rather than the pointer in ggml-alloc.c * Fixed the fix of the fix
kompute : fix fallback to CPU (#5201 )
2026-06-17 11:07:39 +02:00 · 2024-01-29 23:19:29 +01:00 · 2024-01-29 17:11:27 -05:00
2 changed files with 3 additions and 3 deletions
@@ -791,7 +791,7 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
        for (size_t i = 0; i < *n_buffers; i++) {
            ggml_backend_buffer_free(*buffers[i]);
        }
-        free(buffers);
+        free(*buffers);
        return false;
    }

@@ -4136,7 +4136,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
        }

 #ifdef GGML_USE_KOMPUTE
-        if (ggml_vk_has_device() && params.n_gpu_layers > 0 && (
+        if (params.n_gpu_layers > 0 && (
            !(model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON)
            || !(
                model.ftype == LLAMA_FTYPE_ALL_F32 ||
@@ -4145,8 +4145,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
                model.ftype == LLAMA_FTYPE_MOSTLY_Q4_1
            )
        )) {
-            // disable Vulkan due to unsupported model architecture or quantization type
            // TODO(cebtenzzre): propagate this error outside of llama_load_model_from_file
+            LLAMA_LOG_WARN("%s: disabling Kompute due to unsupported model arch or quantization\n", __func__);
            params.n_gpu_layers = 0;
        }
 #endif
Author	SHA1	Message	Date
Paul Tsochantaris	ceebbb5b21	ggml alloc: Fix for null dereference on alloc failure (#5200 ) * Fix for a null pointer dereference if a metal GGML buffer fails to be allocated * Freeing the allocated buffers rather than the pointer in ggml-alloc.c * Fixed the fix of the fix	2024-01-29 23:19:29 +01:00
Jared Van Bortel	6daa69ee81	kompute : fix fallback to CPU (#5201 )	2024-01-29 17:11:27 -05:00