Compare commits

..

3 Commits

Author SHA1 Message Date
slaren 47d604fa2d fix issues 2023-11-05 13:20:22 +01:00
slaren 73c0010e18 Merge remote-tracking branch 'origin/master' into fix-tensor-split-zero 2023-11-05 12:42:43 +01:00
Jared Van Bortel 05c51f96fe cuda : fix disabling device with --tensor-split 1,0 2023-11-05 00:56:32 -04:00
+5 -6
View File
@@ -5164,12 +5164,11 @@ static int llama_decode_internal(
// If all tensors can be run on the GPU then using more than 1 thread is detrimental.
const bool full_offload_supported =
model.arch == LLM_ARCH_LLAMA ||
model.arch == LLM_ARCH_BAICHUAN ||
model.arch == LLM_ARCH_FALCON ||
model.arch == LLM_ARCH_REFACT ||
model.arch == LLM_ARCH_MPT ||
model.arch == LLM_ARCH_STARCODER;
model.arch == LLM_ARCH_LLAMA ||
model.arch == LLM_ARCH_BAICHUAN ||
model.arch == LLM_ARCH_FALCON ||
model.arch == LLM_ARCH_REFACT ||
model.arch == LLM_ARCH_MPT;
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 3;
if (ggml_cpu_has_cublas() && full_offload_supported && fully_offloaded) {