Compare commits

..

2 Commits

Author SHA1 Message Date
Paul Tsochantaris ceebbb5b21 ggml alloc: Fix for null dereference on alloc failure (#5200)
* Fix for a null pointer dereference if a metal GGML buffer fails to be allocated

* Freeing the allocated buffers rather than the pointer in ggml-alloc.c

* Fixed the fix of the fix
2024-01-29 23:19:29 +01:00
Jared Van Bortel 6daa69ee81 kompute : fix fallback to CPU (#5201) 2024-01-29 17:11:27 -05:00
2 changed files with 3 additions and 3 deletions
+1 -1
View File
@@ -791,7 +791,7 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
for (size_t i = 0; i < *n_buffers; i++) {
ggml_backend_buffer_free(*buffers[i]);
}
free(buffers);
free(*buffers);
return false;
}
+2 -2
View File
@@ -4136,7 +4136,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
}
#ifdef GGML_USE_KOMPUTE
if (ggml_vk_has_device() && params.n_gpu_layers > 0 && (
if (params.n_gpu_layers > 0 && (
!(model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON)
|| !(
model.ftype == LLAMA_FTYPE_ALL_F32 ||
@@ -4145,8 +4145,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
model.ftype == LLAMA_FTYPE_MOSTLY_Q4_1
)
)) {
// disable Vulkan due to unsupported model architecture or quantization type
// TODO(cebtenzzre): propagate this error outside of llama_load_model_from_file
LLAMA_LOG_WARN("%s: disabling Kompute due to unsupported model arch or quantization\n", __func__);
params.n_gpu_layers = 0;
}
#endif