diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 2924fdbe98..9087b6b4ef 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -36,6 +36,7 @@ extern "C" { GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft); GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size); + GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors); GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft); GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft); GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 3bda9abbe0..0541b0f78e 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -1116,133 +1116,61 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) { // utils -static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) { - for (size_t i = 0; i < *n_buffers; i++) { - ggml_backend_buffer_free((*buffers)[i]); - } - free(*buffers); -} - -static bool alloc_tensor_range(struct ggml_context * ctx, - struct ggml_tensor * first, struct ggml_tensor * last, - ggml_backend_buffer_type_t buft, size_t size, - ggml_backend_buffer_t ** buffers, size_t * n_buffers) { - - ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size); - if (buffer == NULL) { - GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size); - free_buffers(buffers, n_buffers); - return false; - } - - *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1)); - (*buffers)[(*n_buffers)++] = buffer; - - struct ggml_tallocr tallocr = ggml_tallocr_new(buffer); - - for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { - enum ggml_status status = GGML_STATUS_SUCCESS; - if (t->data == NULL) { - if (t->view_src == NULL) { - status = ggml_tallocr_alloc(&tallocr, t); - } else if (t->buffer == NULL) { - status = ggml_backend_view_init(t); - } - } else { - if (t->view_src != NULL && t->buffer == NULL) { - // view of a pre-allocated tensor - status = ggml_backend_view_init(t); - } - } - if (status != GGML_STATUS_SUCCESS) { - GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name); - free_buffers(buffers, n_buffers); - return false; - } - } - - return true; -} - static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl( - struct ggml_context * ctx, ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) { + struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { + GGML_ASSERT(ggml_get_no_alloc(ctx) == true); + + // collect tensors into a list + int n_tensors = 0; + for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + n_tensors++; + } + if (n_tensors == 0) { + return NULL; + } + + struct ggml_tensor ** tensors = (struct ggml_tensor **) malloc(n_tensors * sizeof(struct ggml_tensor *)); + int i = 0; + for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + tensors[i++] = t; + } + + ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer_n(buft, tensors, n_tensors); + free(tensors); + return buffer; +} + +ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { + return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft); +} + +size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { GGML_ASSERT(ggml_get_no_alloc(ctx) == true); size_t alignment = ggml_backend_buft_get_alignment(buft); size_t max_size = ggml_backend_buft_get_max_size(buft); - ggml_backend_buffer_t * buffers = NULL; - size_t n_buffers = 0; - *nbytes_total = 0; - + size_t nbytes_total = 0; size_t cur_buf_size = 0; - struct ggml_tensor * first = ggml_get_first_tensor(ctx); - for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) { + + for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { size_t this_size = 0; if (t->data == NULL && t->view_src == NULL) { this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment); } if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) { - // allocate tensors in the current buffer - if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) { - return NULL; - } - first = t; - *nbytes_total += cur_buf_size; + nbytes_total += cur_buf_size; cur_buf_size = this_size; } else { cur_buf_size += this_size; } } + nbytes_total += cur_buf_size; - // allocate remaining tensors - if (cur_buf_size > 0) { - *nbytes_total += cur_buf_size; - if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) { - return NULL; - } - } - - if (no_alloc) { - return NULL; - } - - if (n_buffers == 0) { -#ifndef NDEBUG - GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__); -#endif - GGML_ASSERT(!buffers); - return NULL; - } - - ggml_backend_buffer_t buffer; - if (n_buffers == 1) { - buffer = buffers[0]; - } else { - buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers); - } - if (buffers) { - free(buffers); // can be NULL if context is empty or no_alloc - } - return buffer; -} - -size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { - size_t nbytes_total = 0; - ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true); - GGML_ASSERT(!buf); return nbytes_total; } -ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { - size_t nbytes_total = 0; - if (ggml_backend_buft_is_meta(buft)) { - return ggml_backend_meta_alloc_ctx_tensors_from_buft(ctx, buft); - } - return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false); -} - ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) { return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend)); } diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index 9c56ec30c5..5cb8216c5c 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -8,7 +8,7 @@ extern "C" { #endif - #define GGML_BACKEND_API_VERSION 2 + #define GGML_BACKEND_API_VERSION 3 // // Backend buffer type @@ -18,6 +18,8 @@ extern "C" { const char * (*get_name) (ggml_backend_buffer_type_t buft); // allocate a buffer of this type ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); + // (optional) allocate tensors from a list into a buffer of this type (defaults to alloc_buffer + linear allocator) + ggml_backend_buffer_t (*alloc_buffer_n)(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors); // tensor alignment size_t (*get_alignment) (ggml_backend_buffer_type_t buft); // (optional) max buffer size that can be allocated (defaults to SIZE_MAX) @@ -95,9 +97,6 @@ extern "C" { GGML_API size_t ggml_backend_meta_n_backends (ggml_backend_t meta_backend); GGML_API ggml_backend_t ggml_backend_meta_simple_backend(ggml_backend_t meta_backend, size_t index); - // temporary workaround to statically allocate tensors from a context in a deduplicated way: - GGML_API struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); - // // Backend (stream) // diff --git a/ggml/src/ggml-backend-meta.cpp b/ggml/src/ggml-backend-meta.cpp index 0a36f09900..14c2926f83 100644 --- a/ggml/src/ggml-backend-meta.cpp +++ b/ggml/src/ggml-backend-meta.cpp @@ -288,6 +288,8 @@ static ggml_backend_buffer_type_t ggml_backend_meta_buft_simple_buft(ggml_backen static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size); +static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors); + static size_t ggml_backend_meta_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft); size_t max_alignment = 1; @@ -329,12 +331,13 @@ static bool ggml_backend_meta_buffer_type_is_host(ggml_backend_buffer_type_t buf } static const struct ggml_backend_buffer_type_i ggml_backend_meta_buffer_type_iface = { - /* .get_name = */ ggml_backend_meta_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_meta_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_meta_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_meta_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_meta_buffer_type_is_host, + /* .get_name = */ ggml_backend_meta_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_meta_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ ggml_backend_meta_buffer_type_alloc_buffer_n, + /* .get_alignment = */ ggml_backend_meta_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_meta_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_meta_buffer_type_is_host, }; bool ggml_backend_buft_is_meta(ggml_backend_buffer_type_t buft) { @@ -1517,17 +1520,17 @@ static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_bac return ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, buf_ctx, max_size); } -struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) { +static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors) { const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft); constexpr size_t compute_headroom = 16; // Maximum number of views per statically allocated tensor that can be created between evals. const ggml_init_params params_static = { - /*.mem_size =*/ ggml_get_mem_size(ctx), + /*.mem_size =*/ n_tensors * ggml_tensor_overhead(), /*.mem_buffer =*/ nullptr, /*.no_alloc =*/ true, }; const ggml_init_params params_compute = { - /*.mem_size =*/ compute_headroom*ggml_get_mem_size(ctx), + /*.mem_size =*/ compute_headroom * n_tensors * ggml_tensor_overhead(), /*.mem_buffer =*/ nullptr, /*.no_alloc =*/ true, }; @@ -1539,7 +1542,8 @@ struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struc ggml_backend_meta_buffer_context * meta_buf_ctx = new ggml_backend_meta_buffer_context(stc_static, stc_compute_0, stc_compute_1, bufs); ggml_backend_buffer_t meta_buf = ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, meta_buf_ctx, 0); - for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) { + for (int i = 0; i < n_tensors; i++) { + ggml_tensor * t = tensors[i]; t->buffer = meta_buf; ggml_backend_meta_buffer_init_tensor_impl(meta_buf_ctx->stc_static, t); t->data = (void *) 0x2000000000000000; // FIXME diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 87615921c0..704c1f226a 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -44,6 +44,107 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t return buft->iface.alloc_buffer(buft, size); } +// default implementation of alloc_buffer_n +// allocates tensors from a list into one or more buffers of the given type +static ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n_default(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) { + size_t alignment = ggml_backend_buft_get_alignment(buft); + size_t max_size = ggml_backend_buft_get_max_size(buft); + + ggml_backend_buffer_t * buffers = NULL; + size_t n_buffers = 0; + + size_t cur_buf_size = 0; + int first = 0; + for (int i = 0; i <= n_tensors; i++) { + size_t this_size = 0; + if (i < n_tensors) { + struct ggml_tensor * t = tensors[i]; + if (t->data == NULL && t->view_src == NULL) { + this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment); + } + } + + // flush the current buffer if adding this tensor would exceed max_size, or if we are at the end + bool should_flush = (i == n_tensors) || (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size); + if (should_flush && cur_buf_size > 0) { + // allocate the buffer with the computed size for this range + ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, cur_buf_size); + if (buffer == NULL) { + for (size_t b = 0; b < n_buffers; b++) { + ggml_backend_buffer_free(buffers[b]); + } + free(buffers); + return NULL; + } + struct ggml_tallocr tallocr = ggml_tallocr_new(buffer); + + // allocate tensors in the current buffer + bool ok = true; + for (int j = first; j < i; j++) { + struct ggml_tensor * t = tensors[j]; + if (t->data == NULL) { + if (t->view_src == NULL) { + if (ggml_tallocr_alloc(&tallocr, t) != GGML_STATUS_SUCCESS) { + ok = false; + break; + } + } else if (t->buffer == NULL) { + if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) { + ok = false; + break; + } + } + } else { + if (t->view_src != NULL && t->buffer == NULL) { + // view of a pre-allocated tensor + if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) { + ok = false; + break; + } + } + } + } + if (!ok) { + for (size_t b = 0; b < n_buffers; b++) { + ggml_backend_buffer_free(buffers[b]); + } + ggml_backend_buffer_free(buffer); + free(buffers); + return NULL; + } + + buffers = (ggml_backend_buffer_t *) realloc(buffers, sizeof(ggml_backend_buffer_t) * (n_buffers + 1)); + buffers[n_buffers++] = buffer; + cur_buf_size = 0; + first = i; + } else if (i < n_tensors) { + cur_buf_size += this_size; + } + } + + if (n_buffers == 0) { + free(buffers); + return NULL; + } + + ggml_backend_buffer_t result; + if (n_buffers == 1) { + result = buffers[0]; + } else { + result = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers); + } + free(buffers); + return result; +} + +ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) { + GGML_ASSERT(buft); + if (buft->iface.alloc_buffer_n) { + return buft->iface.alloc_buffer_n(buft, tensors, n_tensors); + } + return ggml_backend_buft_alloc_buffer_n_default(buft, tensors, n_tensors); +} + size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) { GGML_ASSERT(buft); return buft->iface.get_alignment(buft); @@ -2328,12 +2429,13 @@ static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) { static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = { /* .iface = */ { - /* .get_name = */ ggml_backend_cpu_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes - /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, + /* .get_name = */ ggml_backend_cpu_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes + /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, }, /* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ NULL, @@ -2351,12 +2453,13 @@ static const char * ggml_backend_cpu_buffer_from_ptr_type_get_name(ggml_backend_ static ggml_backend_buffer_type_t ggml_backend_cpu_buffer_from_ptr_type(void) { static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = { /* .iface = */ { - /* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name, - /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes - /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, + /* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes + /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, }, /* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ NULL, diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 5f51ea3bb3..7669cc3073 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -1595,12 +1595,13 @@ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buf * memory for CANN buffer types in the GGML backend. */ static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = { - /* .get_name = */ ggml_backend_cann_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_cann_buffer_type_is_host, + /* .get_name = */ ggml_backend_cann_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_cann_buffer_type_is_host, }; /** @@ -1742,12 +1743,13 @@ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggm ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() { static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = { /* .iface = */ { - /* .get_name = */ ggml_backend_cann_host_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, - /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, + /* .get_name = */ ggml_backend_cann_host_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, + /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0), diff --git a/ggml/src/ggml-cpu/amx/amx.cpp b/ggml/src/ggml-cpu/amx/amx.cpp index 1118f7169c..dfae225491 100644 --- a/ggml/src/ggml-cpu/amx/amx.cpp +++ b/ggml/src/ggml-cpu/amx/amx.cpp @@ -228,12 +228,13 @@ static bool ggml_amx_init() { ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() { static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = { /* .iface = */ { - /* .get_name = */ ggml_backend_amx_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment, - /* .get_max_size = */ nullptr, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size, - /* .is_host = */ nullptr, + /* .get_name = */ ggml_backend_amx_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ nullptr, + /* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment, + /* .get_max_size = */ nullptr, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size, + /* .is_host = */ nullptr, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ new ggml::cpu::amx::extra_buffer_type(), diff --git a/ggml/src/ggml-cpu/hbm.cpp b/ggml/src/ggml-cpu/hbm.cpp index a4073c15e6..767347f284 100644 --- a/ggml/src/ggml-cpu/hbm.cpp +++ b/ggml/src/ggml-cpu/hbm.cpp @@ -40,12 +40,13 @@ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) { static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = { /* .iface = */ { - /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, - /* .get_max_size = */ nullptr, // defaults to SIZE_MAX - /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes - /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, + /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ nullptr, + /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ nullptr, // defaults to SIZE_MAX + /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes + /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, }, /* .context = */ nullptr, }; diff --git a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp index 9e54b676b9..40f4338e76 100644 --- a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +++ b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp @@ -1506,12 +1506,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) { static ggml::cpu::kleidiai::extra_buffer_type ctx; static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = { /* .iface = */ { - /* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment, - /* .get_max_size = */ nullptr, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size, - /* .is_host = */ nullptr, + /* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ nullptr, + /* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment, + /* .get_max_size = */ nullptr, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size, + /* .is_host = */ nullptr, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ &ctx, diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index f18758f16b..6d56871425 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -4821,12 +4821,13 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type { ggml_backend_buffer_type_t ggml_backend_cpu_repack_buffer_type(void) { static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_repack = { /* .iface = */ { - /* .get_name = */ ggml_backend_cpu_repack_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_repack_buffer_type_get_alignment, - /* .get_max_size = */ nullptr, // defaults to SIZE_MAX - /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes - /* .is_host = */ nullptr, + /* .get_name = */ ggml_backend_cpu_repack_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ nullptr, + /* .get_alignment = */ ggml_backend_cpu_repack_buffer_type_get_alignment, + /* .get_max_size = */ nullptr, // defaults to SIZE_MAX + /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes + /* .is_host = */ nullptr, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), /* .context = */ new ggml::cpu::repack::extra_buffer_type(), diff --git a/ggml/src/ggml-cpu/spacemit/ime.cpp b/ggml/src/ggml-cpu/spacemit/ime.cpp index 9563ea3e4b..4e8d2d2f4c 100644 --- a/ggml/src/ggml-cpu/spacemit/ime.cpp +++ b/ggml/src/ggml-cpu/spacemit/ime.cpp @@ -1648,12 +1648,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void) { static ggml_backend_buffer_type ggml_backend_cpu_buffer_type_riscv64_spacemit = { /* .iface = */ { - /* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment, - /* .get_max_size = */ nullptr, - /* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes, - /* .is_host = */ nullptr, + /* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment, + /* .get_max_size = */ nullptr, + /* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes, + /* .is_host = */ nullptr, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0), diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 01c29a8c68..1380d64bf6 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -844,12 +844,13 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t } static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = { - /* .get_name = */ ggml_backend_cuda_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_cuda_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size, + /* .is_host = */ NULL, }; ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) { @@ -1163,12 +1164,13 @@ static bool ggml_backend_cuda_split_buffer_type_is_host(ggml_backend_buffer_type } static const ggml_backend_buffer_type_i ggml_backend_cuda_split_buffer_type_interface = { - /* .get_name = */ ggml_backend_cuda_split_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host, + /* .get_name = */ ggml_backend_cuda_split_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host, }; // Communication context for multi-GPU AllReduce during tensor parallelism. @@ -1568,12 +1570,13 @@ static ggml_backend_buffer_t ggml_backend_cuda_host_buffer_type_alloc_buffer(ggm ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() { static struct ggml_backend_buffer_type ggml_backend_cuda_buffer_type_host = { /* .iface = */ { - /* .get_name = */ ggml_backend_cuda_host_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, - /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, + /* .get_name = */ ggml_backend_cuda_host_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, + /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cuda_reg(), 0), /* .context = */ nullptr, diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index 49bd7e4331..14a4cf80b6 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -1766,21 +1766,23 @@ static bool ggml_backend_hexagon_repack_buffer_type_is_host(ggml_backend_buffer_ } static ggml_backend_buffer_type_i ggml_backend_hexagon_buffer_type_interface = { - /* .get_name = */ ggml_backend_hexagon_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_hexagon_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_hexagon_buffer_type_is_host, + /* .get_name = */ ggml_backend_hexagon_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_hexagon_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_hexagon_buffer_type_is_host, }; static ggml_backend_buffer_type_i ggml_backend_hexagon_repack_buffer_type_interface = { - /* .get_name = */ ggml_backend_hexagon_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host, + /* .get_name = */ ggml_backend_hexagon_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host, }; struct ggml_hexagon_opbatch { diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp index a1003b3acf..681a1306c0 100644 --- a/ggml/src/ggml-metal/ggml-metal.cpp +++ b/ggml/src/ggml-metal/ggml-metal.cpp @@ -300,12 +300,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int devi ggml_backend_buffer_type buft = { /* .iface = */ { - /* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name, - /* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer, - /* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment, - /* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size, - /* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size, - /* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host, + /* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name, + /* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment, + /* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size, + /* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size, + /* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i), /* .context = */ raw_ctx, @@ -375,12 +376,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int dev ggml_backend_buffer_type buft = { /* .iface = */ { - /* .get_name = */ ggml_backend_metal_buffer_type_private_get_name, - /* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer, - /* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment, - /* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size, - /* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size, - /* .is_host = */ ggml_backend_metal_buffer_type_private_is_host, + /* .get_name = */ ggml_backend_metal_buffer_type_private_get_name, + /* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment, + /* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size, + /* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size, + /* .is_host = */ ggml_backend_metal_buffer_type_private_is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i), /* .context = */ raw_ctx, @@ -453,12 +455,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int devi // https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099 ggml_backend_buffer_type buft = { /* .iface = */ { - /* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name, - /* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer, - /* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment, - /* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size, - /* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size, - /* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host, + /* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name, + /* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment, + /* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size, + /* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size, + /* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i), /* .context = */ raw_ctx, diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index ca2002424d..4758c7bd08 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -8298,12 +8298,13 @@ static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer } static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = { - /* .get_name = */ ggml_backend_opencl_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size, - /* .get_alloc_size = */ NULL, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_opencl_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size, + /* .get_alloc_size = */ NULL, + /* .is_host = */ NULL, }; // diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp index 4f3ebf2536..cd4894e869 100644 --- a/ggml/src/ggml-openvino/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino.cpp @@ -475,12 +475,13 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff } static const ggml_backend_buffer_type_i ggml_backend_openvino_buffer_type_interface = { - /* .get_name = */ ggml_backend_openvino_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size, - /* .is_host = */ nullptr, + /* .get_name = */ ggml_backend_openvino_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size, + /* .is_host = */ nullptr, }; // Get buffer type for a specific device @@ -530,12 +531,13 @@ static bool ggml_backend_openvino_host_buffer_type_is_host(ggml_backend_buffer_t } static const ggml_backend_buffer_type_i ggml_backend_openvino_host_buffer_type_interface = { - /* .get_name = */ ggml_backend_openvino_host_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_openvino_host_buffer_type_is_host, + /* .get_name = */ ggml_backend_openvino_host_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_openvino_host_buffer_type_is_host, }; GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_host_buffer_type(int device) { diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index d380577218..33bb647a27 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -629,12 +629,13 @@ static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_ty } static ggml_backend_buffer_type_i ggml_backend_rpc_buffer_type_interface = { - /* .get_name = */ ggml_backend_rpc_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_rpc_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_rpc_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_rpc_get_max_size, - /* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_rpc_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_rpc_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_rpc_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_rpc_get_max_size, + /* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size, + /* .is_host = */ NULL, }; static const char * ggml_backend_rpc_name(ggml_backend_t backend) { diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index fb8665a02c..e204a10f79 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -809,12 +809,13 @@ static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_t } static const ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = { - /* .get_name = */ ggml_backend_sycl_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_sycl_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size, + /* .is_host = */ NULL, }; ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) { @@ -1244,12 +1245,13 @@ static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type } static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = { - /* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX - /* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size, - /* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host, + /* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size, + /* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host, }; ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) { @@ -1339,12 +1341,13 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() { GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n"); static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = { /* .iface = */ { - /* .get_name = */ ggml_backend_sycl_host_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, - /* .get_max_size = */ NULL, // TODO: return device.maxBufferLength - /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, - /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, + /* .get_name = */ ggml_backend_sycl_host_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, + /* .get_max_size = */ NULL, // TODO: return device.maxBufferLength + /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, + /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), 0), /* .context = */ nullptr, diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index 8fa20ff43b..4d84f3b9a4 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -63,19 +63,21 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff } const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = { - /* .get_name = */ ggml_backend_remoting_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_remoting_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size, + /* .is_host = */ NULL, }; const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = { - /* .get_name = */ ggml_backend_remoting_buffer_type_get_name, - /* .alloc_buffer = */ NULL, - /* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_remoting_buffer_type_get_name, + /* .alloc_buffer = */ NULL, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size, + /* .is_host = */ NULL, }; diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 6c149bf097..e81967dcda 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -286,12 +286,13 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft); static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor); static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = { - /* .get_name = */ ggml_backend_vk_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size, - /* .is_host = */ NULL, + /* .get_name = */ ggml_backend_vk_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, + /* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size, + /* .is_host = */ NULL, }; class vk_memory_logger; @@ -14916,12 +14917,13 @@ static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { static struct ggml_backend_buffer_type ggml_backend_vk_buffer_type_host = { /* .iface = */ { - /* .get_name = */ ggml_backend_vk_host_buffer_type_name, - /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size, - /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, - /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, + /* .get_name = */ ggml_backend_vk_host_buffer_type_name, + /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ nullptr, + /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, + /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, }, /* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0), /* .context = */ nullptr, diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 0b605fa86b..8ea6822eec 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -4010,6 +4010,7 @@ static ggml_backend_buffer_type_t ggml_backend_webgpu_device_get_buffer_type(ggm /* .iface = */ { /* .get_name = */ ggml_backend_webgpu_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_webgpu_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, /* .get_alignment = */ ggml_backend_webgpu_buffer_type_get_alignment, /* .get_max_size = */ ggml_backend_webgpu_buffer_type_get_max_size, /* .get_alloc_size = */ ggml_backend_webgpu_buffer_type_get_alloc_size, diff --git a/ggml/src/ggml-zdnn/ggml-zdnn.cpp b/ggml/src/ggml-zdnn/ggml-zdnn.cpp index 639b818d12..e755b58d42 100644 --- a/ggml/src/ggml-zdnn/ggml-zdnn.cpp +++ b/ggml/src/ggml-zdnn/ggml-zdnn.cpp @@ -385,6 +385,7 @@ ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void) { /* .iface = */ { /* .get_name = */ ggml_backend_zdnn_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_zdnn_buffer_type_alloc_buffer, + /* .alloc_buffer_n = */ NULL, /* .get_alignment = */ ggml_backend_zdnn_buffer_type_get_alignment, /* .get_max_size = */ NULL, /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes