ggml : add alloc_buffer_n to buffer type interface

Add alloc_buffer_n method to ggml_backend_buffer_type_i interface, with a public API ggml_backend_buft_alloc_buffer_n. - Default implementation in ggml-backend.cpp handles multi-buffer splitting and tensor allocation via ggml_tallocr - Meta buffer type provides custom implementation that creates per-device sub-contexts and delegates to simple buffer types - ggml_backend_alloc_ctx_tensors_from_buft now collects tensors into a list and delegates to the new API - Remove temporary ggml_backend_meta_alloc_ctx_tensors_from_buft - Add NULL alloc_buffer_n to all existing buffer type interfaces (cpu, metal, openvino, hexagon, webgpu, zdnn, virtgpu, repack) Assisted-by: llama.cpp:local pi
2026-06-30 01:27:42 +02:00 · 2026-05-25 16:50:13 +03:00
parent e3cab403bf
commit e0d7afdf74
22 changed files with 350 additions and 287 deletions
@@ -36,6 +36,7 @@ extern "C" {

    GGML_API const char *          ggml_backend_buft_name          (ggml_backend_buffer_type_t buft);
    GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer  (ggml_backend_buffer_type_t buft, size_t size);
+    GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors);
    GGML_API size_t                ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
    GGML_API size_t                ggml_backend_buft_get_max_size  (ggml_backend_buffer_type_t buft);
    GGML_API size_t                ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
@@ -1116,133 +1116,61 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {

 // utils

-static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
-    for (size_t i = 0; i < *n_buffers; i++) {
-        ggml_backend_buffer_free((*buffers)[i]);
-    }
-    free(*buffers);
-}
-
-static bool alloc_tensor_range(struct ggml_context * ctx,
-        struct ggml_tensor * first, struct ggml_tensor * last,
-        ggml_backend_buffer_type_t buft, size_t size,
-        ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
-
-    ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
-    if (buffer == NULL) {
-        GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
-        free_buffers(buffers, n_buffers);
-        return false;
-    }
-
-    *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
-    (*buffers)[(*n_buffers)++] = buffer;
-
-    struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
-
-    for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
-        enum ggml_status status = GGML_STATUS_SUCCESS;
-        if (t->data == NULL) {
-            if (t->view_src == NULL) {
-                status = ggml_tallocr_alloc(&tallocr, t);
-            } else if (t->buffer == NULL) {
-                status = ggml_backend_view_init(t);
-            }
-        } else {
-            if (t->view_src != NULL && t->buffer == NULL) {
-                // view of a pre-allocated tensor
-                status = ggml_backend_view_init(t);
-            }
-        }
-        if (status != GGML_STATUS_SUCCESS) {
-            GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
-            free_buffers(buffers, n_buffers);
-            return false;
-        }
-    }
-
-    return true;
-}
-
 static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl(
-        struct ggml_context * ctx, ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) {
+        struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+    GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
+
+    // collect tensors into a list
+    int n_tensors = 0;
+    for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+        n_tensors++;
+    }
+    if (n_tensors == 0) {
+        return NULL;
+    }
+
+    struct ggml_tensor ** tensors = (struct ggml_tensor **) malloc(n_tensors * sizeof(struct ggml_tensor *));
+    int i = 0;
+    for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+        tensors[i++] = t;
+    }
+
+    ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer_n(buft, tensors, n_tensors);
+    free(tensors);
+    return buffer;
+}
+
+ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+    return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft);
+}
+
+size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
    GGML_ASSERT(ggml_get_no_alloc(ctx) == true);

    size_t alignment = ggml_backend_buft_get_alignment(buft);
    size_t max_size = ggml_backend_buft_get_max_size(buft);

-    ggml_backend_buffer_t * buffers = NULL;
-    size_t n_buffers = 0;
-    *nbytes_total = 0;
-
+    size_t nbytes_total = 0;
    size_t cur_buf_size = 0;
-    struct ggml_tensor * first = ggml_get_first_tensor(ctx);
-    for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+
+    for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
        size_t this_size = 0;
        if (t->data == NULL && t->view_src == NULL) {
            this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
        }

        if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
-            // allocate tensors in the current buffer
-            if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
-                return NULL;
-            }
-            first = t;
-            *nbytes_total += cur_buf_size;
+            nbytes_total += cur_buf_size;
            cur_buf_size = this_size;
        } else {
            cur_buf_size += this_size;
        }
    }
+    nbytes_total += cur_buf_size;

-    // allocate remaining tensors
-    if (cur_buf_size > 0) {
-        *nbytes_total += cur_buf_size;
-        if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
-            return NULL;
-        }
-    }
-
-    if (no_alloc) {
-        return NULL;
-    }
-
-    if (n_buffers == 0) {
-#ifndef NDEBUG
-        GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
-#endif
-        GGML_ASSERT(!buffers);
-        return NULL;
-    }
-
-    ggml_backend_buffer_t buffer;
-    if (n_buffers == 1) {
-        buffer = buffers[0];
-    } else {
-        buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
-    }
-    if (buffers) {
-        free(buffers); // can be NULL if context is empty or no_alloc
-    }
-    return buffer;
-}
-
-size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
-    size_t nbytes_total = 0;
-    ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true);
-    GGML_ASSERT(!buf);
    return nbytes_total;
 }

-ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
-    size_t nbytes_total = 0;
-    if (ggml_backend_buft_is_meta(buft)) {
-        return ggml_backend_meta_alloc_ctx_tensors_from_buft(ctx, buft);
-    }
-    return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false);
-}
-
 ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
    return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
 }
@@ -8,7 +8,7 @@
 extern "C" {
 #endif

-    #define GGML_BACKEND_API_VERSION 2
+    #define GGML_BACKEND_API_VERSION 3

    //
    // Backend buffer type
@@ -18,6 +18,8 @@ extern "C" {
        const char *          (*get_name)      (ggml_backend_buffer_type_t buft);
        // allocate a buffer of this type
        ggml_backend_buffer_t (*alloc_buffer)  (ggml_backend_buffer_type_t buft, size_t size);
+        // (optional) allocate tensors from a list into a buffer of this type (defaults to alloc_buffer + linear allocator)
+        ggml_backend_buffer_t (*alloc_buffer_n)(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors);
        // tensor alignment
        size_t                (*get_alignment) (ggml_backend_buffer_type_t buft);
        // (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
@@ -95,9 +97,6 @@ extern "C" {
    GGML_API size_t         ggml_backend_meta_n_backends    (ggml_backend_t meta_backend);
    GGML_API ggml_backend_t ggml_backend_meta_simple_backend(ggml_backend_t meta_backend, size_t index);

-    // temporary workaround to statically allocate tensors from a context in a deduplicated way:
-    GGML_API struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
-
    //
    // Backend (stream)
    //
@@ -288,6 +288,8 @@ static ggml_backend_buffer_type_t ggml_backend_meta_buft_simple_buft(ggml_backen

 static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);

+static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors);
+
 static size_t ggml_backend_meta_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
    const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft);
    size_t max_alignment = 1;
@@ -329,12 +331,13 @@ static bool ggml_backend_meta_buffer_type_is_host(ggml_backend_buffer_type_t buf
 }

 static const struct ggml_backend_buffer_type_i ggml_backend_meta_buffer_type_iface = {
-    /* .get_name         = */ ggml_backend_meta_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_meta_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_meta_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_meta_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_meta_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_meta_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_meta_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_meta_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ ggml_backend_meta_buffer_type_alloc_buffer_n,
+    /* .get_alignment  = */ ggml_backend_meta_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_meta_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_meta_buffer_type_is_host,
 };

 bool ggml_backend_buft_is_meta(ggml_backend_buffer_type_t buft) {
@@ -1517,17 +1520,17 @@ static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_bac
    return ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, buf_ctx, max_size);
 }

-struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors) {
    const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft);

    constexpr size_t compute_headroom = 16; // Maximum number of views per statically allocated tensor that can be created between evals.
    const ggml_init_params params_static = {
-        /*.mem_size   =*/ ggml_get_mem_size(ctx),
+        /*.mem_size   =*/ n_tensors * ggml_tensor_overhead(),
        /*.mem_buffer =*/ nullptr,
        /*.no_alloc   =*/ true,
    };
    const ggml_init_params params_compute = {
-        /*.mem_size   =*/ compute_headroom*ggml_get_mem_size(ctx),
+        /*.mem_size   =*/ compute_headroom * n_tensors * ggml_tensor_overhead(),
        /*.mem_buffer =*/ nullptr,
        /*.no_alloc   =*/ true,
    };
@@ -1539,7 +1542,8 @@ struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struc
    ggml_backend_meta_buffer_context * meta_buf_ctx = new ggml_backend_meta_buffer_context(stc_static, stc_compute_0, stc_compute_1, bufs);

    ggml_backend_buffer_t meta_buf = ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, meta_buf_ctx, 0);
-    for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
+    for (int i = 0; i < n_tensors; i++) {
+        ggml_tensor * t = tensors[i];
        t->buffer = meta_buf;
        ggml_backend_meta_buffer_init_tensor_impl(meta_buf_ctx->stc_static, t);
        t->data = (void *) 0x2000000000000000; // FIXME
@@ -44,6 +44,107 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t
    return buft->iface.alloc_buffer(buft, size);
 }

+// default implementation of alloc_buffer_n
+// allocates tensors from a list into one or more buffers of the given type
+static ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n_default(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) {
+    size_t alignment = ggml_backend_buft_get_alignment(buft);
+    size_t max_size = ggml_backend_buft_get_max_size(buft);
+
+    ggml_backend_buffer_t * buffers = NULL;
+    size_t n_buffers = 0;
+
+    size_t cur_buf_size = 0;
+    int first = 0;
+    for (int i = 0; i <= n_tensors; i++) {
+        size_t this_size = 0;
+        if (i < n_tensors) {
+            struct ggml_tensor * t = tensors[i];
+            if (t->data == NULL && t->view_src == NULL) {
+                this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
+            }
+        }
+
+        // flush the current buffer if adding this tensor would exceed max_size, or if we are at the end
+        bool should_flush = (i == n_tensors) || (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size);
+        if (should_flush && cur_buf_size > 0) {
+            // allocate the buffer with the computed size for this range
+            ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, cur_buf_size);
+            if (buffer == NULL) {
+                for (size_t b = 0; b < n_buffers; b++) {
+                    ggml_backend_buffer_free(buffers[b]);
+                }
+                free(buffers);
+                return NULL;
+            }
+            struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
+
+            // allocate tensors in the current buffer
+            bool ok = true;
+            for (int j = first; j < i; j++) {
+                struct ggml_tensor * t = tensors[j];
+                if (t->data == NULL) {
+                    if (t->view_src == NULL) {
+                        if (ggml_tallocr_alloc(&tallocr, t) != GGML_STATUS_SUCCESS) {
+                            ok = false;
+                            break;
+                        }
+                    } else if (t->buffer == NULL) {
+                        if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) {
+                            ok = false;
+                            break;
+                        }
+                    }
+                } else {
+                    if (t->view_src != NULL && t->buffer == NULL) {
+                        // view of a pre-allocated tensor
+                        if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) {
+                            ok = false;
+                            break;
+                        }
+                    }
+                }
+            }
+            if (!ok) {
+                for (size_t b = 0; b < n_buffers; b++) {
+                    ggml_backend_buffer_free(buffers[b]);
+                }
+                ggml_backend_buffer_free(buffer);
+                free(buffers);
+                return NULL;
+            }
+
+            buffers = (ggml_backend_buffer_t *) realloc(buffers, sizeof(ggml_backend_buffer_t) * (n_buffers + 1));
+            buffers[n_buffers++] = buffer;
+            cur_buf_size = 0;
+            first = i;
+        } else if (i < n_tensors) {
+            cur_buf_size += this_size;
+        }
+    }
+
+    if (n_buffers == 0) {
+        free(buffers);
+        return NULL;
+    }
+
+    ggml_backend_buffer_t result;
+    if (n_buffers == 1) {
+        result = buffers[0];
+    } else {
+        result = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
+    }
+    free(buffers);
+    return result;
+}
+
+ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) {
+    GGML_ASSERT(buft);
+    if (buft->iface.alloc_buffer_n) {
+        return buft->iface.alloc_buffer_n(buft, tensors, n_tensors);
+    }
+    return ggml_backend_buft_alloc_buffer_n_default(buft, tensors, n_tensors);
+}
+
 size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
    GGML_ASSERT(buft);
    return buft->iface.get_alignment(buft);
@@ -2328,12 +2429,13 @@ static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft
 ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
        /* .iface   = */ {
-            /* .get_name         = */ ggml_backend_cpu_buffer_type_get_name,
-            /* .alloc_buffer     = */ ggml_backend_cpu_buffer_type_alloc_buffer,
-            /* .get_alignment    = */ ggml_backend_cpu_buffer_type_get_alignment,
-            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-            /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes
-            /* .is_host          = */ ggml_backend_cpu_buffer_type_is_host,
+            /* .get_name       = */ ggml_backend_cpu_buffer_type_get_name,
+            /* .alloc_buffer   = */ ggml_backend_cpu_buffer_type_alloc_buffer,
+            /* .alloc_buffer_n = */ NULL,
+            /* .get_alignment  = */ ggml_backend_cpu_buffer_type_get_alignment,
+            /* .get_max_size   = */ NULL, // defaults to SIZE_MAX
+            /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
+            /* .is_host        = */ ggml_backend_cpu_buffer_type_is_host,
        },
        /* .device  = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
        /* .context = */ NULL,
@@ -2351,12 +2453,13 @@ static const char * ggml_backend_cpu_buffer_from_ptr_type_get_name(ggml_backend_
 static ggml_backend_buffer_type_t ggml_backend_cpu_buffer_from_ptr_type(void) {
    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
        /* .iface   = */ {
-            /* .get_name         = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
-            /* .alloc_buffer     = */ ggml_backend_cpu_buffer_type_alloc_buffer,
-            /* .get_alignment    = */ ggml_backend_cpu_buffer_type_get_alignment,
-            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-            /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes
-            /* .is_host          = */ ggml_backend_cpu_buffer_type_is_host,
+            /* .get_name       = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
+            /* .alloc_buffer   = */ ggml_backend_cpu_buffer_type_alloc_buffer,
+            /* .alloc_buffer_n = */ NULL,
+            /* .get_alignment  = */ ggml_backend_cpu_buffer_type_get_alignment,
+            /* .get_max_size   = */ NULL, // defaults to SIZE_MAX
+            /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
+            /* .is_host        = */ ggml_backend_cpu_buffer_type_is_host,
        },
        /* .device  = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
        /* .context = */ NULL,
@@ -1595,12 +1595,13 @@ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buf
 * memory for CANN buffer types in the GGML backend.
 */
 static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_cann_buffer_type_name,
-    /* .alloc_buffer     = */ ggml_backend_cann_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_cann_buffer_type_get_alignment,
-    /* .get_max_size     = */ NULL,  // defaults to SIZE_MAX
-    /* .get_alloc_size   = */ ggml_backend_cann_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_cann_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_cann_buffer_type_name,
+    /* .alloc_buffer   = */ ggml_backend_cann_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_cann_buffer_type_get_alignment,
+    /* .get_max_size   = */ NULL,  // defaults to SIZE_MAX
+    /* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_cann_buffer_type_is_host,
 };

 /**
@@ -1742,12 +1743,13 @@ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggm
 ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
    static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
        /* .iface    = */ {
-                           /* .get_name         = */ ggml_backend_cann_host_buffer_type_name,
-                           /* .alloc_buffer     = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
-                           /* .get_alignment    = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
-                           /* .get_max_size     = */ NULL,  // defaults to SIZE_MAX
-            /* .get_alloc_size   = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
-                           /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
+                           /* .get_name       = */ ggml_backend_cann_host_buffer_type_name,
+                           /* .alloc_buffer   = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
+                           /* .alloc_buffer_n = */ NULL,
+                           /* .get_alignment  = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
+                           /* .get_max_size   = */ NULL,  // defaults to SIZE_MAX
+                           /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
+                           /* .is_host        = */ ggml_backend_cpu_buffer_type()->iface.is_host,
                           },
        /* .device   = */
        ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
@@ -228,12 +228,13 @@ static bool ggml_amx_init() {
 ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
    static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
        /* .iface = */ {
-                        /* .get_name         = */ ggml_backend_amx_buffer_type_get_name,
-                        /* .alloc_buffer     = */ ggml_backend_amx_buffer_type_alloc_buffer,
-                        /* .get_alignment    = */ ggml_backend_amx_buffer_type_get_alignment,
-                        /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
-                        /* .get_alloc_size   = */ ggml_backend_amx_buffer_type_get_alloc_size,
-                        /* .is_host          = */ nullptr,
+                        /* .get_name       = */ ggml_backend_amx_buffer_type_get_name,
+                        /* .alloc_buffer   = */ ggml_backend_amx_buffer_type_alloc_buffer,
+                        /* .alloc_buffer_n = */ nullptr,
+                        /* .get_alignment  = */ ggml_backend_amx_buffer_type_get_alignment,
+                        /* .get_max_size   = */ nullptr,  // defaults to SIZE_MAX
+                        /* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
+                        /* .is_host        = */ nullptr,
                        },
        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
        /* .context = */ new ggml::cpu::amx::extra_buffer_type(),
@@ -40,12 +40,13 @@ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_
 ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
        /* .iface    = */ {
-                           /* .get_name         = */ ggml_backend_cpu_hbm_buffer_type_get_name,
-                           /* .alloc_buffer     = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
-                           /* .get_alignment    = */ ggml_backend_cpu_buffer_type_get_alignment,
-                           /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
-                           /* .get_alloc_size   = */ nullptr,  // defaults to ggml_nbytes
-                           /* .is_host          = */ ggml_backend_cpu_buffer_type_is_host,
+                           /* .get_name       = */ ggml_backend_cpu_hbm_buffer_type_get_name,
+                           /* .alloc_buffer   = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
+                           /* .alloc_buffer_n = */ nullptr,
+                           /* .get_alignment  = */ ggml_backend_cpu_buffer_type_get_alignment,
+                           /* .get_max_size   = */ nullptr,  // defaults to SIZE_MAX
+                           /* .get_alloc_size = */ nullptr,  // defaults to ggml_nbytes
+                           /* .is_host        = */ ggml_backend_cpu_buffer_type_is_host,
                           },
        /* .context  = */ nullptr,
    };
@@ -1506,12 +1506,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) {
    static ggml::cpu::kleidiai::extra_buffer_type ctx;
    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = {
        /* .iface    = */ {
-                           /* .get_name         = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
-                           /* .alloc_buffer     = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
-                           /* .get_alignment    = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
-                           /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
-                           /* .get_alloc_size   = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
-                           /* .is_host          = */ nullptr,
+                           /* .get_name       = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
+                           /* .alloc_buffer   = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
+                           /* .alloc_buffer_n = */ nullptr,
+                           /* .get_alignment  = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
+                           /* .get_max_size   = */ nullptr,  // defaults to SIZE_MAX
+                           /* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
+                           /* .is_host        = */ nullptr,
                           },
        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
        /* .context = */ &ctx,
@@ -4821,12 +4821,13 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
 ggml_backend_buffer_type_t ggml_backend_cpu_repack_buffer_type(void) {
    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_repack = {
        /* .iface    = */ {
-                           /* .get_name         = */ ggml_backend_cpu_repack_buffer_type_get_name,
-                           /* .alloc_buffer     = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer,
-                           /* .get_alignment    = */ ggml_backend_cpu_repack_buffer_type_get_alignment,
-                           /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
-                           /* .get_alloc_size   = */ nullptr,  // defaults to ggml_nbytes
-                           /* .is_host          = */ nullptr,
+                           /* .get_name       = */ ggml_backend_cpu_repack_buffer_type_get_name,
+                           /* .alloc_buffer   = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer,
+                           /* .alloc_buffer_n = */ nullptr,
+                           /* .get_alignment  = */ ggml_backend_cpu_repack_buffer_type_get_alignment,
+                           /* .get_max_size   = */ nullptr,  // defaults to SIZE_MAX
+                           /* .get_alloc_size = */ nullptr,  // defaults to ggml_nbytes
+                           /* .is_host        = */ nullptr,
                           },
        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
        /* .context = */ new ggml::cpu::repack::extra_buffer_type(),
@@ -1648,12 +1648,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void) {
    static ggml_backend_buffer_type ggml_backend_cpu_buffer_type_riscv64_spacemit = {
  /* .iface    = */
        {
-         /* .get_name         = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
-         /* .alloc_buffer     = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
-         /* .get_alignment    = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
-         /* .get_max_size     = */ nullptr,
-         /* .get_alloc_size   = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
-         /* .is_host          = */ nullptr,
+         /* .get_name       = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
+         /* .alloc_buffer   = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
+         /* .alloc_buffer_n = */ NULL,
+         /* .get_alignment  = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
+         /* .get_max_size   = */ nullptr,
+         /* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
+         /* .is_host        = */ nullptr,
         },
 /* .device  = */
        ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
@@ -844,12 +844,13 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
 }

 static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_cuda_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_cuda_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_cuda_buffer_type_get_alignment,
-    /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-    /* .get_alloc_size   = */ ggml_backend_cuda_buffer_type_get_alloc_size,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_cuda_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_cuda_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_cuda_buffer_type_get_alignment,
+    /* .get_max_size   = */ NULL, // defaults to SIZE_MAX
+    /* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
+    /* .is_host        = */ NULL,
 };

 ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) {
@@ -1163,12 +1164,13 @@ static bool ggml_backend_cuda_split_buffer_type_is_host(ggml_backend_buffer_type
 }

 static const ggml_backend_buffer_type_i ggml_backend_cuda_split_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_cuda_split_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_cuda_split_buffer_type_get_alignment,
-    /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-    /* .get_alloc_size   = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_cuda_split_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_cuda_split_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_cuda_split_buffer_type_get_alignment,
+    /* .get_max_size   = */ NULL, // defaults to SIZE_MAX
+    /* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_cuda_split_buffer_type_is_host,
 };

 // Communication context for multi-GPU AllReduce during tensor parallelism.
@@ -1568,12 +1570,13 @@ static ggml_backend_buffer_t ggml_backend_cuda_host_buffer_type_alloc_buffer(ggm
 ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
    static struct ggml_backend_buffer_type ggml_backend_cuda_buffer_type_host = {
        /* .iface    = */ {
-            /* .get_name         = */ ggml_backend_cuda_host_buffer_type_name,
-            /* .alloc_buffer     = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
-            /* .get_alignment    = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
-            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-            /* .get_alloc_size   = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
-            /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
+            /* .get_name       = */ ggml_backend_cuda_host_buffer_type_name,
+            /* .alloc_buffer   = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
+            /* .alloc_buffer_n = */ NULL,
+            /* .get_alignment  = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
+            /* .get_max_size   = */ NULL, // defaults to SIZE_MAX
+            /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
+            /* .is_host        = */ ggml_backend_cpu_buffer_type()->iface.is_host,
        },
        /* .device   = */ ggml_backend_reg_dev_get(ggml_backend_cuda_reg(), 0),
        /* .context  = */ nullptr,
@@ -1766,21 +1766,23 @@ static bool ggml_backend_hexagon_repack_buffer_type_is_host(ggml_backend_buffer_
 }

 static ggml_backend_buffer_type_i ggml_backend_hexagon_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_hexagon_buffer_type_name,
-    /* .alloc_buffer     = */ ggml_backend_hexagon_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_hexagon_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_hexagon_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_hexagon_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_hexagon_buffer_type_name,
+    /* .alloc_buffer   = */ ggml_backend_hexagon_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_hexagon_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_hexagon_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_hexagon_buffer_type_is_host,
 };

 static ggml_backend_buffer_type_i ggml_backend_hexagon_repack_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_hexagon_buffer_type_name,
-    /* .alloc_buffer     = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_hexagon_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_hexagon_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_hexagon_repack_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_hexagon_buffer_type_name,
+    /* .alloc_buffer   = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_hexagon_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_hexagon_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_hexagon_repack_buffer_type_is_host,
 };

 struct ggml_hexagon_opbatch {
@@ -300,12 +300,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int devi

            ggml_backend_buffer_type buft = {
                /* .iface = */ {
-                    /* .get_name         = */ ggml_backend_metal_buffer_type_shared_get_name,
-                    /* .alloc_buffer     = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
-                    /* .get_alignment    = */ ggml_backend_metal_buffer_type_shared_get_alignment,
-                    /* .get_max_size     = */ ggml_backend_metal_buffer_type_shared_get_max_size,
-                    /* .get_alloc_size   = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
-                    /* .is_host          = */ ggml_backend_metal_buffer_type_shared_is_host,
+                    /* .get_name       = */ ggml_backend_metal_buffer_type_shared_get_name,
+                    /* .alloc_buffer   = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
+                    /* .alloc_buffer_n = */ NULL,
+                    /* .get_alignment  = */ ggml_backend_metal_buffer_type_shared_get_alignment,
+                    /* .get_max_size   = */ ggml_backend_metal_buffer_type_shared_get_max_size,
+                    /* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
+                    /* .is_host        = */ ggml_backend_metal_buffer_type_shared_is_host,
                },
                /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
                /* .context = */ raw_ctx,
@@ -375,12 +376,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int dev

            ggml_backend_buffer_type buft = {
                /* .iface = */ {
-                    /* .get_name         = */ ggml_backend_metal_buffer_type_private_get_name,
-                    /* .alloc_buffer     = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
-                    /* .get_alignment    = */ ggml_backend_metal_buffer_type_private_get_alignment,
-                    /* .get_max_size     = */ ggml_backend_metal_buffer_type_private_get_max_size,
-                    /* .get_alloc_size   = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
-                    /* .is_host          = */ ggml_backend_metal_buffer_type_private_is_host,
+                    /* .get_name       = */ ggml_backend_metal_buffer_type_private_get_name,
+                    /* .alloc_buffer   = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
+                    /* .alloc_buffer_n = */ NULL,
+                    /* .get_alignment  = */ ggml_backend_metal_buffer_type_private_get_alignment,
+                    /* .get_max_size   = */ ggml_backend_metal_buffer_type_private_get_max_size,
+                    /* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
+                    /* .is_host        = */ ggml_backend_metal_buffer_type_private_is_host,
                },
                /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
                /* .context = */ raw_ctx,
@@ -453,12 +455,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int devi
            //       https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099
            ggml_backend_buffer_type buft = {
                /* .iface = */ {
-                    /* .get_name         = */ ggml_backend_metal_buffer_type_mapped_get_name,
-                    /* .alloc_buffer     = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
-                    /* .get_alignment    = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
-                    /* .get_max_size     = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
-                    /* .get_alloc_size   = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
-                    /* .is_host          = */ ggml_backend_metal_buffer_type_mapped_is_host,
+                    /* .get_name       = */ ggml_backend_metal_buffer_type_mapped_get_name,
+                    /* .alloc_buffer   = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
+                    /* .alloc_buffer_n = */ NULL,
+                    /* .get_alignment  = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
+                    /* .get_max_size   = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
+                    /* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
+                    /* .is_host        = */ ggml_backend_metal_buffer_type_mapped_is_host,
                },
                /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
                /* .context = */ raw_ctx,
@@ -8298,12 +8298,13 @@ static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer
 }

 static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_opencl_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_opencl_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_opencl_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_opencl_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ NULL,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_opencl_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_opencl_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_opencl_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_opencl_buffer_type_get_max_size,
+    /* .get_alloc_size = */ NULL,
+    /* .is_host        = */ NULL,
 };

 //
@@ -475,12 +475,13 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff
 }

 static const ggml_backend_buffer_type_i ggml_backend_openvino_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_openvino_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_openvino_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_openvino_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_openvino_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_openvino_buffer_type_get_alloc_size,
-    /* .is_host          = */ nullptr,
+    /* .get_name       = */ ggml_backend_openvino_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_openvino_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_openvino_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_openvino_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
+    /* .is_host        = */ nullptr,
 };

 // Get buffer type for a specific device
@@ -530,12 +531,13 @@ static bool ggml_backend_openvino_host_buffer_type_is_host(ggml_backend_buffer_t
 }

 static const ggml_backend_buffer_type_i ggml_backend_openvino_host_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_openvino_host_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_openvino_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_openvino_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_openvino_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_openvino_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_openvino_host_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_openvino_host_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_openvino_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_openvino_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_openvino_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_openvino_host_buffer_type_is_host,
 };

 GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_host_buffer_type(int device) {
@@ -629,12 +629,13 @@ static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_ty
 }

 static ggml_backend_buffer_type_i ggml_backend_rpc_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_rpc_buffer_type_name,
-    /* .alloc_buffer     = */ ggml_backend_rpc_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_rpc_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_rpc_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_rpc_buffer_type_get_alloc_size,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_rpc_buffer_type_name,
+    /* .alloc_buffer   = */ ggml_backend_rpc_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_rpc_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_rpc_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size,
+    /* .is_host        = */ NULL,
 };

 static const char * ggml_backend_rpc_name(ggml_backend_t backend) {
@@ -809,12 +809,13 @@ static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_t
 }

 static const ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_sycl_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_sycl_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_sycl_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_sycl_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_sycl_buffer_type_get_alloc_size,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_sycl_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_sycl_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_sycl_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_sycl_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size,
+    /* .is_host        = */ NULL,
 };

 ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
@@ -1244,12 +1245,13 @@ static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type
 }

 static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_sycl_split_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_sycl_split_buffer_type_get_alignment,
-    /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-    /* .get_alloc_size   = */ ggml_backend_sycl_split_buffer_type_get_alloc_size,
-    /* .is_host          = */ ggml_backend_sycl_split_buffer_type_is_host,
+    /* .get_name       = */ ggml_backend_sycl_split_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_sycl_split_buffer_type_get_alignment,
+    /* .get_max_size   = */ NULL, // defaults to SIZE_MAX
+    /* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size,
+    /* .is_host        = */ ggml_backend_sycl_split_buffer_type_is_host,
 };

 ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
@@ -1339,12 +1341,13 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
    GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
    static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
        /* .iface    = */ {
-            /* .get_name         = */ ggml_backend_sycl_host_buffer_type_name,
-            /* .alloc_buffer     = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
-            /* .get_alignment    = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
-            /* .get_max_size     = */ NULL, // TODO: return device.maxBufferLength
-            /* .get_alloc_size   = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
-            /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
+            /* .get_name       = */ ggml_backend_sycl_host_buffer_type_name,
+            /* .alloc_buffer   = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
+            /* .alloc_buffer_n = */ NULL,
+            /* .get_alignment  = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
+            /* .get_max_size   = */ NULL, // TODO: return device.maxBufferLength
+            /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
+            /* .is_host        = */ ggml_backend_cpu_buffer_type()->iface.is_host,
        },
        /* .device   = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), 0),
        /* .context  = */ nullptr,
@@ -63,19 +63,21 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff
 }

 const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_remoting_buffer_type_get_name,
-    /* .alloc_buffer     = */ ggml_backend_remoting_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_remoting_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_remoting_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_remoting_buffer_type_get_alloc_size,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_remoting_buffer_type_get_name,
+    /* .alloc_buffer   = */ ggml_backend_remoting_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_remoting_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_remoting_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
+    /* .is_host        = */ NULL,
 };

 const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = {
-    /* .get_name         = */ ggml_backend_remoting_buffer_type_get_name,
-    /* .alloc_buffer     = */ NULL,
-    /* .get_alignment    = */ ggml_backend_remoting_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_remoting_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_remoting_buffer_type_get_alloc_size,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_remoting_buffer_type_get_name,
+    /* .alloc_buffer   = */ NULL,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_remoting_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_remoting_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
+    /* .is_host        = */ NULL,
 };
@@ -286,12 +286,13 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type
 static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft);
 static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor);
 static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_vk_buffer_type_name,
-    /* .alloc_buffer     = */ ggml_backend_vk_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_vk_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_vk_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ ggml_backend_vk_buffer_type_get_alloc_size,
-    /* .is_host          = */ NULL,
+    /* .get_name       = */ ggml_backend_vk_buffer_type_name,
+    /* .alloc_buffer   = */ ggml_backend_vk_buffer_type_alloc_buffer,
+    /* .alloc_buffer_n = */ NULL,
+    /* .get_alignment  = */ ggml_backend_vk_buffer_type_get_alignment,
+    /* .get_max_size   = */ ggml_backend_vk_buffer_type_get_max_size,
+    /* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
+    /* .is_host        = */ NULL,
 };

 class vk_memory_logger;
@@ -14916,12 +14917,13 @@ static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_
 ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
    static struct ggml_backend_buffer_type ggml_backend_vk_buffer_type_host = {
        /* .iface    = */ {
-            /* .get_name         = */ ggml_backend_vk_host_buffer_type_name,
-            /* .alloc_buffer     = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
-            /* .get_alignment    = */ ggml_backend_vk_host_buffer_type_get_alignment,
-            /* .get_max_size     = */ ggml_backend_vk_host_buffer_type_get_max_size,
-            /* .get_alloc_size   = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
-            /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
+            /* .get_name       = */ ggml_backend_vk_host_buffer_type_name,
+            /* .alloc_buffer   = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
+            /* .alloc_buffer_n = */ nullptr,
+            /* .get_alignment  = */ ggml_backend_vk_host_buffer_type_get_alignment,
+            /* .get_max_size   = */ ggml_backend_vk_host_buffer_type_get_max_size,
+            /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
+            /* .is_host        = */ ggml_backend_cpu_buffer_type()->iface.is_host,
        },
        /* .device   = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0),
        /* .context  = */ nullptr,
@@ -4010,6 +4010,7 @@ static ggml_backend_buffer_type_t ggml_backend_webgpu_device_get_buffer_type(ggm
        /* .iface = */ {
                        /* .get_name       = */ ggml_backend_webgpu_buffer_type_get_name,
                        /* .alloc_buffer   = */ ggml_backend_webgpu_buffer_type_alloc_buffer,
+                        /* .alloc_buffer_n = */ NULL,
                        /* .get_alignment  = */ ggml_backend_webgpu_buffer_type_get_alignment,
                        /* .get_max_size   = */ ggml_backend_webgpu_buffer_type_get_max_size,
                        /* .get_alloc_size = */ ggml_backend_webgpu_buffer_type_get_alloc_size,
@@ -385,6 +385,7 @@ ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void) {
        /* .iface   = */ {
            /* .get_name       = */ ggml_backend_zdnn_buffer_type_get_name,
            /* .alloc_buffer   = */ ggml_backend_zdnn_buffer_type_alloc_buffer,
+            /* .alloc_buffer_n = */ NULL,
            /* .get_alignment  = */ ggml_backend_zdnn_buffer_type_get_alignment,
            /* .get_max_size   = */ NULL,
            /* .get_alloc_size = */ NULL,  // defaults to ggml_nbytes