mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-30 01:27:42 +02:00
ggml : add alloc_buffer_n to buffer type interface
Add alloc_buffer_n method to ggml_backend_buffer_type_i interface, with a public API ggml_backend_buft_alloc_buffer_n. - Default implementation in ggml-backend.cpp handles multi-buffer splitting and tensor allocation via ggml_tallocr - Meta buffer type provides custom implementation that creates per-device sub-contexts and delegates to simple buffer types - ggml_backend_alloc_ctx_tensors_from_buft now collects tensors into a list and delegates to the new API - Remove temporary ggml_backend_meta_alloc_ctx_tensors_from_buft - Add NULL alloc_buffer_n to all existing buffer type interfaces (cpu, metal, openvino, hexagon, webgpu, zdnn, virtgpu, repack) Assisted-by: llama.cpp:local pi
This commit is contained in:
@@ -36,6 +36,7 @@ extern "C" {
|
||||
|
||||
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors);
|
||||
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
|
||||
|
||||
+33
-105
@@ -1116,133 +1116,61 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
||||
|
||||
// utils
|
||||
|
||||
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
||||
for (size_t i = 0; i < *n_buffers; i++) {
|
||||
ggml_backend_buffer_free((*buffers)[i]);
|
||||
}
|
||||
free(*buffers);
|
||||
}
|
||||
|
||||
static bool alloc_tensor_range(struct ggml_context * ctx,
|
||||
struct ggml_tensor * first, struct ggml_tensor * last,
|
||||
ggml_backend_buffer_type_t buft, size_t size,
|
||||
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
|
||||
|
||||
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
|
||||
if (buffer == NULL) {
|
||||
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
|
||||
free_buffers(buffers, n_buffers);
|
||||
return false;
|
||||
}
|
||||
|
||||
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
||||
(*buffers)[(*n_buffers)++] = buffer;
|
||||
|
||||
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
||||
|
||||
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
||||
enum ggml_status status = GGML_STATUS_SUCCESS;
|
||||
if (t->data == NULL) {
|
||||
if (t->view_src == NULL) {
|
||||
status = ggml_tallocr_alloc(&tallocr, t);
|
||||
} else if (t->buffer == NULL) {
|
||||
status = ggml_backend_view_init(t);
|
||||
}
|
||||
} else {
|
||||
if (t->view_src != NULL && t->buffer == NULL) {
|
||||
// view of a pre-allocated tensor
|
||||
status = ggml_backend_view_init(t);
|
||||
}
|
||||
}
|
||||
if (status != GGML_STATUS_SUCCESS) {
|
||||
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
|
||||
free_buffers(buffers, n_buffers);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft_impl(
|
||||
struct ggml_context * ctx, ggml_backend_buffer_type_t buft, size_t * nbytes_total, bool no_alloc) {
|
||||
struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
|
||||
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
|
||||
|
||||
// collect tensors into a list
|
||||
int n_tensors = 0;
|
||||
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
n_tensors++;
|
||||
}
|
||||
if (n_tensors == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ggml_tensor ** tensors = (struct ggml_tensor **) malloc(n_tensors * sizeof(struct ggml_tensor *));
|
||||
int i = 0;
|
||||
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
tensors[i++] = t;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer_n(buft, tensors, n_tensors);
|
||||
free(tensors);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
|
||||
return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft);
|
||||
}
|
||||
|
||||
size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
|
||||
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
|
||||
|
||||
size_t alignment = ggml_backend_buft_get_alignment(buft);
|
||||
size_t max_size = ggml_backend_buft_get_max_size(buft);
|
||||
|
||||
ggml_backend_buffer_t * buffers = NULL;
|
||||
size_t n_buffers = 0;
|
||||
*nbytes_total = 0;
|
||||
|
||||
size_t nbytes_total = 0;
|
||||
size_t cur_buf_size = 0;
|
||||
struct ggml_tensor * first = ggml_get_first_tensor(ctx);
|
||||
for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
|
||||
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
size_t this_size = 0;
|
||||
if (t->data == NULL && t->view_src == NULL) {
|
||||
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
|
||||
}
|
||||
|
||||
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
|
||||
// allocate tensors in the current buffer
|
||||
if (!no_alloc && !alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
|
||||
return NULL;
|
||||
}
|
||||
first = t;
|
||||
*nbytes_total += cur_buf_size;
|
||||
nbytes_total += cur_buf_size;
|
||||
cur_buf_size = this_size;
|
||||
} else {
|
||||
cur_buf_size += this_size;
|
||||
}
|
||||
}
|
||||
nbytes_total += cur_buf_size;
|
||||
|
||||
// allocate remaining tensors
|
||||
if (cur_buf_size > 0) {
|
||||
*nbytes_total += cur_buf_size;
|
||||
if (!no_alloc && !alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (no_alloc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (n_buffers == 0) {
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: all tensors in the context are already allocated\n", __func__);
|
||||
#endif
|
||||
GGML_ASSERT(!buffers);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t buffer;
|
||||
if (n_buffers == 1) {
|
||||
buffer = buffers[0];
|
||||
} else {
|
||||
buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
|
||||
}
|
||||
if (buffers) {
|
||||
free(buffers); // can be NULL if context is empty or no_alloc
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
|
||||
size_t nbytes_total = 0;
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc=*/ true);
|
||||
GGML_ASSERT(!buf);
|
||||
return nbytes_total;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
|
||||
size_t nbytes_total = 0;
|
||||
if (ggml_backend_buft_is_meta(buft)) {
|
||||
return ggml_backend_meta_alloc_ctx_tensors_from_buft(ctx, buft);
|
||||
}
|
||||
return ggml_backend_alloc_ctx_tensors_from_buft_impl(ctx, buft, &nbytes_total, /*no_alloc =*/ false);
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
|
||||
return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_BACKEND_API_VERSION 2
|
||||
#define GGML_BACKEND_API_VERSION 3
|
||||
|
||||
//
|
||||
// Backend buffer type
|
||||
@@ -18,6 +18,8 @@ extern "C" {
|
||||
const char * (*get_name) (ggml_backend_buffer_type_t buft);
|
||||
// allocate a buffer of this type
|
||||
ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
||||
// (optional) allocate tensors from a list into a buffer of this type (defaults to alloc_buffer + linear allocator)
|
||||
ggml_backend_buffer_t (*alloc_buffer_n)(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors);
|
||||
// tensor alignment
|
||||
size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
|
||||
// (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
|
||||
@@ -95,9 +97,6 @@ extern "C" {
|
||||
GGML_API size_t ggml_backend_meta_n_backends (ggml_backend_t meta_backend);
|
||||
GGML_API ggml_backend_t ggml_backend_meta_simple_backend(ggml_backend_t meta_backend, size_t index);
|
||||
|
||||
// temporary workaround to statically allocate tensors from a context in a deduplicated way:
|
||||
GGML_API struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
||||
|
||||
//
|
||||
// Backend (stream)
|
||||
//
|
||||
|
||||
@@ -288,6 +288,8 @@ static ggml_backend_buffer_type_t ggml_backend_meta_buft_simple_buft(ggml_backen
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors);
|
||||
|
||||
static size_t ggml_backend_meta_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft);
|
||||
size_t max_alignment = 1;
|
||||
@@ -329,12 +331,13 @@ static bool ggml_backend_meta_buffer_type_is_host(ggml_backend_buffer_type_t buf
|
||||
}
|
||||
|
||||
static const struct ggml_backend_buffer_type_i ggml_backend_meta_buffer_type_iface = {
|
||||
/* .get_name = */ ggml_backend_meta_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_meta_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_meta_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_meta_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_meta_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_meta_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_meta_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ ggml_backend_meta_buffer_type_alloc_buffer_n,
|
||||
/* .get_alignment = */ ggml_backend_meta_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_meta_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_meta_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_meta_buffer_type_is_host,
|
||||
};
|
||||
|
||||
bool ggml_backend_buft_is_meta(ggml_backend_buffer_type_t buft) {
|
||||
@@ -1517,17 +1520,17 @@ static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_bac
|
||||
return ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, buf_ctx, max_size);
|
||||
}
|
||||
|
||||
struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
|
||||
static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer_n(ggml_backend_buffer_type_t buft, ggml_tensor ** tensors, int n_tensors) {
|
||||
const size_t n_simple_bufts = ggml_backend_meta_buft_n_bufts(buft);
|
||||
|
||||
constexpr size_t compute_headroom = 16; // Maximum number of views per statically allocated tensor that can be created between evals.
|
||||
const ggml_init_params params_static = {
|
||||
/*.mem_size =*/ ggml_get_mem_size(ctx),
|
||||
/*.mem_size =*/ n_tensors * ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ nullptr,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
const ggml_init_params params_compute = {
|
||||
/*.mem_size =*/ compute_headroom*ggml_get_mem_size(ctx),
|
||||
/*.mem_size =*/ compute_headroom * n_tensors * ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ nullptr,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
@@ -1539,7 +1542,8 @@ struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struc
|
||||
ggml_backend_meta_buffer_context * meta_buf_ctx = new ggml_backend_meta_buffer_context(stc_static, stc_compute_0, stc_compute_1, bufs);
|
||||
|
||||
ggml_backend_buffer_t meta_buf = ggml_backend_buffer_init(buft, ggml_backend_meta_buffer_iface, meta_buf_ctx, 0);
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
||||
for (int i = 0; i < n_tensors; i++) {
|
||||
ggml_tensor * t = tensors[i];
|
||||
t->buffer = meta_buf;
|
||||
ggml_backend_meta_buffer_init_tensor_impl(meta_buf_ctx->stc_static, t);
|
||||
t->data = (void *) 0x2000000000000000; // FIXME
|
||||
|
||||
+115
-12
@@ -44,6 +44,107 @@ ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t
|
||||
return buft->iface.alloc_buffer(buft, size);
|
||||
}
|
||||
|
||||
// default implementation of alloc_buffer_n
|
||||
// allocates tensors from a list into one or more buffers of the given type
|
||||
static ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n_default(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) {
|
||||
size_t alignment = ggml_backend_buft_get_alignment(buft);
|
||||
size_t max_size = ggml_backend_buft_get_max_size(buft);
|
||||
|
||||
ggml_backend_buffer_t * buffers = NULL;
|
||||
size_t n_buffers = 0;
|
||||
|
||||
size_t cur_buf_size = 0;
|
||||
int first = 0;
|
||||
for (int i = 0; i <= n_tensors; i++) {
|
||||
size_t this_size = 0;
|
||||
if (i < n_tensors) {
|
||||
struct ggml_tensor * t = tensors[i];
|
||||
if (t->data == NULL && t->view_src == NULL) {
|
||||
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
|
||||
}
|
||||
}
|
||||
|
||||
// flush the current buffer if adding this tensor would exceed max_size, or if we are at the end
|
||||
bool should_flush = (i == n_tensors) || (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size);
|
||||
if (should_flush && cur_buf_size > 0) {
|
||||
// allocate the buffer with the computed size for this range
|
||||
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, cur_buf_size);
|
||||
if (buffer == NULL) {
|
||||
for (size_t b = 0; b < n_buffers; b++) {
|
||||
ggml_backend_buffer_free(buffers[b]);
|
||||
}
|
||||
free(buffers);
|
||||
return NULL;
|
||||
}
|
||||
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
||||
|
||||
// allocate tensors in the current buffer
|
||||
bool ok = true;
|
||||
for (int j = first; j < i; j++) {
|
||||
struct ggml_tensor * t = tensors[j];
|
||||
if (t->data == NULL) {
|
||||
if (t->view_src == NULL) {
|
||||
if (ggml_tallocr_alloc(&tallocr, t) != GGML_STATUS_SUCCESS) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
} else if (t->buffer == NULL) {
|
||||
if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (t->view_src != NULL && t->buffer == NULL) {
|
||||
// view of a pre-allocated tensor
|
||||
if (ggml_backend_view_init(t) != GGML_STATUS_SUCCESS) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!ok) {
|
||||
for (size_t b = 0; b < n_buffers; b++) {
|
||||
ggml_backend_buffer_free(buffers[b]);
|
||||
}
|
||||
ggml_backend_buffer_free(buffer);
|
||||
free(buffers);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffers = (ggml_backend_buffer_t *) realloc(buffers, sizeof(ggml_backend_buffer_t) * (n_buffers + 1));
|
||||
buffers[n_buffers++] = buffer;
|
||||
cur_buf_size = 0;
|
||||
first = i;
|
||||
} else if (i < n_tensors) {
|
||||
cur_buf_size += this_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (n_buffers == 0) {
|
||||
free(buffers);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t result;
|
||||
if (n_buffers == 1) {
|
||||
result = buffers[0];
|
||||
} else {
|
||||
result = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
|
||||
}
|
||||
free(buffers);
|
||||
return result;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_buft_alloc_buffer_n(ggml_backend_buffer_type_t buft, struct ggml_tensor ** tensors, int n_tensors) {
|
||||
GGML_ASSERT(buft);
|
||||
if (buft->iface.alloc_buffer_n) {
|
||||
return buft->iface.alloc_buffer_n(buft, tensors, n_tensors);
|
||||
}
|
||||
return ggml_backend_buft_alloc_buffer_n_default(buft, tensors, n_tensors);
|
||||
}
|
||||
|
||||
size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
GGML_ASSERT(buft);
|
||||
return buft->iface.get_alignment(buft);
|
||||
@@ -2328,12 +2429,13 @@ static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
},
|
||||
/* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
/* .context = */ NULL,
|
||||
@@ -2351,12 +2453,13 @@ static const char * ggml_backend_cpu_buffer_from_ptr_type_get_name(ggml_backend_
|
||||
static ggml_backend_buffer_type_t ggml_backend_cpu_buffer_from_ptr_type(void) {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_cpu_buffer_from_ptr_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
},
|
||||
/* .device = */ NULL, // FIXME ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
/* .context = */ NULL,
|
||||
|
||||
@@ -1595,12 +1595,13 @@ static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buf
|
||||
* memory for CANN buffer types in the GGML backend.
|
||||
*/
|
||||
static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_cann_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_cann_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -1742,12 +1743,13 @@ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggm
|
||||
ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
},
|
||||
/* .device = */
|
||||
ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
|
||||
|
||||
@@ -228,12 +228,13 @@ static bool ggml_amx_init() {
|
||||
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
/* .get_name = */ ggml_backend_amx_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ nullptr,
|
||||
/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
/* .context = */ new ggml::cpu::amx::extra_buffer_type(),
|
||||
|
||||
@@ -40,12 +40,13 @@ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ nullptr,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||
},
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
|
||||
@@ -1506,12 +1506,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) {
|
||||
static ggml::cpu::kleidiai::extra_buffer_type ctx;
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
/* .get_name = */ ggml_backend_cpu_kleidiai_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_kleidiai_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ nullptr,
|
||||
/* .get_alignment = */ ggml_backend_cpu_kleidiai_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_kleidiai_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
/* .context = */ &ctx,
|
||||
|
||||
@@ -4821,12 +4821,13 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_repack_buffer_type(void) {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_repack = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cpu_repack_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_repack_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .is_host = */ nullptr,
|
||||
/* .get_name = */ ggml_backend_cpu_repack_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_repack_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ nullptr,
|
||||
/* .get_alignment = */ ggml_backend_cpu_repack_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .is_host = */ nullptr,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
/* .context = */ new ggml::cpu::repack::extra_buffer_type(),
|
||||
|
||||
@@ -1648,12 +1648,13 @@ ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void) {
|
||||
static ggml_backend_buffer_type ggml_backend_cpu_buffer_type_riscv64_spacemit = {
|
||||
/* .iface = */
|
||||
{
|
||||
/* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr,
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
|
||||
/* .is_host = */ nullptr,
|
||||
/* .get_name = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cpu_riscv64_spacemit_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ nullptr,
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_riscv64_spacemit_nbytes,
|
||||
/* .is_host = */ nullptr,
|
||||
},
|
||||
/* .device = */
|
||||
ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
||||
|
||||
@@ -844,12 +844,13 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
||||
}
|
||||
|
||||
static const ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_cuda_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_cuda_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device) {
|
||||
@@ -1163,12 +1164,13 @@ static bool ggml_backend_cuda_split_buffer_type_is_host(ggml_backend_buffer_type
|
||||
}
|
||||
|
||||
static const ggml_backend_buffer_type_i ggml_backend_cuda_split_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_cuda_split_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_cuda_split_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host,
|
||||
};
|
||||
|
||||
// Communication context for multi-GPU AllReduce during tensor parallelism.
|
||||
@@ -1568,12 +1570,13 @@ static ggml_backend_buffer_t ggml_backend_cuda_host_buffer_type_alloc_buffer(ggm
|
||||
ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_cuda_buffer_type_host = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_cuda_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
/* .get_name = */ ggml_backend_cuda_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cuda_reg(), 0),
|
||||
/* .context = */ nullptr,
|
||||
|
||||
@@ -1766,21 +1766,23 @@ static bool ggml_backend_hexagon_repack_buffer_type_is_host(ggml_backend_buffer_
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_i ggml_backend_hexagon_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_hexagon_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_hexagon_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_hexagon_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_hexagon_buffer_type_is_host,
|
||||
};
|
||||
|
||||
static ggml_backend_buffer_type_i ggml_backend_hexagon_repack_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_hexagon_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_hexagon_repack_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_hexagon_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_hexagon_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_hexagon_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_hexagon_repack_buffer_type_is_host,
|
||||
};
|
||||
|
||||
struct ggml_hexagon_opbatch {
|
||||
|
||||
@@ -300,12 +300,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_shared(int devi
|
||||
|
||||
ggml_backend_buffer_type buft = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
|
||||
/* .get_name = */ ggml_backend_metal_buffer_type_shared_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_shared_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_shared_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_metal_buffer_type_shared_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_shared_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_metal_buffer_type_shared_is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
||||
/* .context = */ raw_ctx,
|
||||
@@ -375,12 +376,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_private(int dev
|
||||
|
||||
ggml_backend_buffer_type buft = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
|
||||
/* .get_name = */ ggml_backend_metal_buffer_type_private_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_private_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_private_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_metal_buffer_type_private_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_private_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_metal_buffer_type_private_is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
||||
/* .context = */ raw_ctx,
|
||||
@@ -453,12 +455,13 @@ static ggml_backend_buffer_type_t ggml_backend_metal_buffer_type_mapped(int devi
|
||||
// https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2333177099
|
||||
ggml_backend_buffer_type buft = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
|
||||
/* .get_name = */ ggml_backend_metal_buffer_type_mapped_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_mapped_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_mapped_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_metal_buffer_type_mapped_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_metal_buffer_type_mapped_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_metal_buffer_type_mapped_is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_metal_reg(), i),
|
||||
/* .context = */ raw_ctx,
|
||||
|
||||
@@ -8298,12 +8298,13 @@ static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_opencl_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ NULL,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_opencl_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ NULL,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
//
|
||||
|
||||
@@ -475,12 +475,13 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff
|
||||
}
|
||||
|
||||
static const ggml_backend_buffer_type_i ggml_backend_openvino_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_openvino_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
/* .get_name = */ ggml_backend_openvino_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ nullptr,
|
||||
};
|
||||
|
||||
// Get buffer type for a specific device
|
||||
@@ -530,12 +531,13 @@ static bool ggml_backend_openvino_host_buffer_type_is_host(ggml_backend_buffer_t
|
||||
}
|
||||
|
||||
static const ggml_backend_buffer_type_i ggml_backend_openvino_host_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_openvino_host_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_openvino_host_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_openvino_host_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_openvino_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_openvino_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_openvino_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_openvino_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_openvino_host_buffer_type_is_host,
|
||||
};
|
||||
|
||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_openvino_host_buffer_type(int device) {
|
||||
|
||||
@@ -629,12 +629,13 @@ static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_ty
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_i ggml_backend_rpc_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_rpc_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_rpc_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_rpc_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_rpc_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_rpc_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_rpc_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_rpc_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_rpc_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_rpc_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
static const char * ggml_backend_rpc_name(ggml_backend_t backend) {
|
||||
|
||||
@@ -809,12 +809,13 @@ static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
||||
}
|
||||
|
||||
static const ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_sycl_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_sycl_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
||||
@@ -1244,12 +1245,13 @@ static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host,
|
||||
/* .get_name = */ ggml_backend_sycl_split_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_sycl_split_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
||||
/* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host,
|
||||
};
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
||||
@@ -1339,12 +1341,13 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
|
||||
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_sycl_host_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||
/* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_sycl_reg(), 0),
|
||||
/* .context = */ nullptr,
|
||||
|
||||
@@ -63,19 +63,21 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff
|
||||
}
|
||||
|
||||
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = {
|
||||
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ NULL,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
@@ -286,12 +286,13 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type
|
||||
static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft);
|
||||
static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor);
|
||||
static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = {
|
||||
/* .get_name = */ ggml_backend_vk_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
/* .get_name = */ ggml_backend_vk_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
|
||||
/* .is_host = */ NULL,
|
||||
};
|
||||
|
||||
class vk_memory_logger;
|
||||
@@ -14916,12 +14917,13 @@ static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_
|
||||
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
||||
static struct ggml_backend_buffer_type ggml_backend_vk_buffer_type_host = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
||||
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ nullptr,
|
||||
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||
},
|
||||
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_vk_reg(), 0),
|
||||
/* .context = */ nullptr,
|
||||
|
||||
@@ -4010,6 +4010,7 @@ static ggml_backend_buffer_type_t ggml_backend_webgpu_device_get_buffer_type(ggm
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_webgpu_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_webgpu_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_webgpu_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ ggml_backend_webgpu_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ ggml_backend_webgpu_buffer_type_get_alloc_size,
|
||||
|
||||
@@ -385,6 +385,7 @@ ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void) {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_zdnn_buffer_type_get_name,
|
||||
/* .alloc_buffer = */ ggml_backend_zdnn_buffer_type_alloc_buffer,
|
||||
/* .alloc_buffer_n = */ NULL,
|
||||
/* .get_alignment = */ ggml_backend_zdnn_buffer_type_get_alignment,
|
||||
/* .get_max_size = */ NULL,
|
||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||
|
||||
Reference in New Issue
Block a user