Compare commits

...

12 Commits

Author SHA1 Message Date
ag2s20150909 bec2183f2c fix: Vulkan shader gen binary path when Cross-compiling (#11096)
* fix: Vulkan shader gen binary path when cross compiling
2025-01-08 09:17:29 +01:00
Johannes Gäßler 53ff6b9b9f GGUF: C++ refactor, backend support, misc fixes (#11030)
* GGUF: C++ refactor, backend support, misc fixes

remove ggml_tensor.backend

update CODEOWNERS [no ci]

remove gguf_get_data from API

revise GGUF API data types
2025-01-07 18:01:58 +01:00
Diego Devesa 017cc5f446 ggml-backend : only offload from host buffers (fix) (#11124) 2025-01-07 16:11:57 +01:00
Diego Devesa a3d50bc022 ggml-backend : only offload from host buffers (#11120) 2025-01-07 12:38:05 +01:00
Radoslav Gerganov a4dd490069 rpc : code cleanup (#11107)
Remove duplicated macros, use GGML_LOG_ERROR for errors
2025-01-07 08:37:02 +02:00
Akarshan Biswas c0d6f790d0 SYCL: Use get_multi_ptr instead of deprecated get_pointer in wkv6 (#11087)
* SYCL: Use get_multi_ptr instead of deprecated get_pointer in wkv6

* Revert "SYCL: Use get_multi_ptr instead of deprecated get_pointer in wkv6"

This reverts commit f62dc45f31.

* Reland: Use get_multi_ptr instead of deprecated get_pointer in wkv6
2025-01-07 14:26:07 +08:00
Eric Curtin dc7cef9f37 llama-run : fix context size (#11094)
Set `n_ctx` equal to `n_batch` in `Opt` class. Now context size is
a more reasonable 2048.

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
2025-01-06 23:45:28 +01:00
Georgi Gerganov ecebbd292d llama : remove unused headers (#11109)
ggml-ci
2025-01-06 17:52:35 +02:00
Xuan Son Nguyen 96be8c3264 github : add cmd line field to bug report (#11090)
* github : cmd line to bug report

* codeowners : (@ngxson) only watch dockerfile

* Apply suggestions from code review [no ci]

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

* rm cmd in log output [no ci]

* rm 2 [no ci]

* no need backticks [no ci]

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
2025-01-06 16:34:49 +01:00
Georgi Gerganov e6e7c75d94 server : fix extra BOS in infill endpoint (#11106)
* server : fix extra BOS in infill endpoing

ggml-ci

* server : update infill tests
2025-01-06 15:36:08 +02:00
Xuan Son Nguyen 09186fabbe llama : remove check flash_attn with lora (#11104) 2025-01-06 13:41:12 +01:00
Asghar Ghorbani 96a1dc27c3 llama : prevent system info string accumulation across calls (#11101) 2025-01-06 13:21:46 +02:00
32 changed files with 1855 additions and 1681 deletions
+11 -1
View File
@@ -65,12 +65,22 @@ body:
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: command
attributes:
label: Compile command
description: >
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
Please copy and paste any relevant log output, including the command that you entered and any generated text.
Please copy and paste any relevant log output, including any generated text.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
+11 -1
View File
@@ -52,6 +52,16 @@ body:
- Other (Please specify in the next section)
validations:
required: false
- type: textarea
id: command
attributes:
label: Command line
description: >
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: false
- type: textarea
id: info
attributes:
@@ -74,7 +84,7 @@ body:
attributes:
label: Relevant log output
description: >
If applicable, please copy and paste any relevant log output, including the command that you entered and any generated text.
If applicable, please copy and paste any relevant log output, including any generated text.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
+7 -1
View File
@@ -1,5 +1,11 @@
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
/ci/ @ggerganov
/.devops/ @ngxson
/.devops/*.Dockerfile @ngxson
/examples/server/ @ngxson
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
/ggml/src/ggml-opt.cpp @JohannesGaessler
/ggml/src/gguf.cpp @JohannesGaessler
+3
View File
@@ -2,6 +2,9 @@
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
#endif
#include "ggml.h"
#include "gguf.h"
#include "common.h"
#include "log.h"
// Change JSON_ASSERT from assert() to GGML_ASSERT:
@@ -1,4 +1,6 @@
#include "ggml.h"
#include "gguf.h"
#include "llama.h"
#include "common.h"
#include "log.h"
@@ -1,7 +1,9 @@
#include "ggml.h"
#include "gguf.h"
#include "arg.h"
#include "common.h"
#include "llama.h"
#include "ggml.h"
#include "pca.hpp"
#include "mean.hpp"
+4 -2
View File
@@ -1,7 +1,9 @@
#include "arg.h"
#include "common.h"
#include "ggml.h"
#include "ggml-alloc.h"
#include "gguf.h"
#include "arg.h"
#include "common.h"
#include <map>
#include <vector>
+1
View File
@@ -1,4 +1,5 @@
#include "ggml.h"
#include "gguf.h"
#include <cstdlib> /* abort() */
#include <cstddef>
+8 -6
View File
@@ -1,16 +1,18 @@
#include "ggml.h"
#include "gguf.h"
#include "llama.h"
#include "common.h"
#include <algorithm>
#include <cinttypes>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <stdexcept>
#include <cstring>
#include <fstream>
#include <string>
#include <vector>
#include <climits>
#include <cstdio>
#include <cstring>
#include <stdexcept>
#if defined(_WIN32)
#include <windows.h>
@@ -296,7 +298,7 @@ struct split_strategy {
total_size += ggml_nbytes(t);
}
total_size = total_size / 1000 / 1000; // convert to megabytes
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
i_split++;
}
}
+10 -6
View File
@@ -1,10 +1,9 @@
#include "ggml.h"
#include "gguf.h"
#include <cstdio>
#include <cinttypes>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#undef MIN
@@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name (ctx, i);
const size_t size = gguf_get_tensor_size (ctx, i);
const size_t offset = gguf_get_tensor_offset(ctx, i);
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
}
}
@@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name (ctx, i);
const size_t size = gguf_get_tensor_size (ctx, i);
const size_t offset = gguf_get_tensor_offset(ctx, i);
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
}
}
@@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
// print first 10 elements
const float * data = (const float *) cur->data;
@@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
const float * data = (const float *) cur->data;
for (int j = 0; j < ggml_nelements(cur); ++j) {
if (data[j] != 100 + i) {
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
gguf_free(ctx);
return false;
}
@@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
check_data = false;
}
srand(123456);
const std::string fname(argv[1]);
const std::string mode (argv[2]);
+4 -2
View File
@@ -7,6 +7,7 @@
#include "ggml-cpu.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "gguf.h"
//#ifdef GGML_USE_CUDA
//#include "ggml-cuda.h"
@@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
@@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
total_size_org += orig_size;
total_size_new += new_size;
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
fout.write((const char *)new_data, new_size);
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
for (size_t j = 0; j < pad; ++j) {
+1
View File
@@ -83,6 +83,7 @@ class Opt {
}
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
ctx_params.n_ctx = ctx_params.n_batch;
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
temperature = temperature >= 0 ? temperature : temperature_default;
+1 -1
View File
@@ -3797,7 +3797,7 @@ int main(int argc, char ** argv) {
data["input_extra"] = input_extra; // default to empty array if it's not exist
std::string prompt = json_value(data, "prompt", std::string());
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.ctx, prompt, true, true);
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.ctx, prompt, false, true);
SRV_DBG("creating infill tasks, n_prompts = %d\n", (int) tokenized_prompts.size());
data["prompt"] = format_infill(
ctx_server.ctx,
+1 -1
View File
@@ -18,7 +18,7 @@ def test_infill_without_input_extra():
"input_suffix": "}\n",
})
assert res.status_code == 200
assert match_regex("(Ann|small|shiny)+", res.body["content"])
assert match_regex("(Ann|small|shiny|Daddy)+", res.body["content"])
def test_infill_with_input_extra():
+2 -1
View File
@@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS
include/ggml-metal.h
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h)
include/ggml-vulkan.h
include/gguf.h)
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
#if (GGML_METAL)
+1
View File
@@ -7,6 +7,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "gguf.h"
#include <memory>
// Smart pointers for ggml types
-140
View File
@@ -241,12 +241,6 @@
#define GGML_ROPE_TYPE_MROPE 8
#define GGML_ROPE_TYPE_VISION 24
#define GGUF_MAGIC "GGUF"
#define GGUF_VERSION 3
#define GGUF_DEFAULT_ALIGNMENT 32
#define GGML_UNUSED(x) (void)(x)
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -403,12 +397,6 @@ extern "C" {
GGML_PREC_F32,
};
enum ggml_backend_type {
GGML_BACKEND_TYPE_CPU = 0,
GGML_BACKEND_TYPE_GPU = 10,
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
};
// model file types
enum ggml_ftype {
GGML_FTYPE_UNKNOWN = -1,
@@ -587,8 +575,6 @@ extern "C" {
struct ggml_tensor {
enum ggml_type type;
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
struct ggml_backend_buffer * buffer;
int64_t ne[GGML_MAX_DIMS]; // number of elements
@@ -2111,132 +2097,6 @@ extern "C" {
int64_t n_per_row,
const float * imatrix);
//
// gguf
//
enum gguf_type {
GGUF_TYPE_UINT8 = 0,
GGUF_TYPE_INT8 = 1,
GGUF_TYPE_UINT16 = 2,
GGUF_TYPE_INT16 = 3,
GGUF_TYPE_UINT32 = 4,
GGUF_TYPE_INT32 = 5,
GGUF_TYPE_FLOAT32 = 6,
GGUF_TYPE_BOOL = 7,
GGUF_TYPE_STRING = 8,
GGUF_TYPE_ARRAY = 9,
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_COUNT, // marks the end of the enum
};
struct gguf_context;
struct gguf_init_params {
bool no_alloc;
// if not NULL, create a ggml_context and allocate the tensor data in it
struct ggml_context ** ctx;
};
GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
GGML_API void gguf_free(struct gguf_context * ctx);
GGML_API const char * gguf_type_name(enum gguf_type type);
GGML_API int gguf_get_version (const struct gguf_context * ctx);
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
// will abort if the wrong type is used for the key
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
// removes key if it exists
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
// overrides existing values or adds a new one
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
// manage tensor info
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
// writing gguf files can be done in 2 ways:
//
// - write the entire gguf_context to a binary file in a single pass:
//
// gguf_write_to_file(ctx, fname);
//
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
//
// FILE * f = fopen(fname, "wb");
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
// fwrite(f, ...);
// void * data = gguf_meta_get_meta_data(ctx);
// fseek(f, 0, SEEK_SET);
// fwrite(f, data, gguf_get_meta_size(ctx));
// free(data);
// fclose(f);
//
// write the entire context to a binary file
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
#ifdef __cplusplus
// restrict not standard in C++
# if defined(__GNUC__)
+202
View File
@@ -0,0 +1,202 @@
// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
// GGUF files have the following structure:
//
// 1. File magic "GGUF" (4 bytes).
// 2. File version (uint32_t).
// 3. Number of ggml tensors in file (int64_t).
// 4. Number of key-value-pairs in file (int64_t).
// 5. For each KV pair:
// 1. The key (string).
// 2. The value type (gguf_type).
// 3a. If the value type is GGUF_TYPE_ARRAY:
// 1. The type of the array (gguf_type).
// 2. The number of elements in the array (uint64_t).
// 3. The binary representation of each element in the array.
// 3b. Otherwise:
// 1. The binary representation of the value.
// 6. For each ggml tensor:
// 1. The tensor name (string).
// 2. The number of dimensions of the tensor (uint32_t).
// 3. For each dimension:
// 1. The size of the tensor in the dimension (int64_t).
// 4. The tensor data type (ggml_type).
// 5. The tensor data offset in the tensor data binary blob (uint64_t).
// 7. The tensor data binary blob (optional, aligned).
//
// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
// All enums are stored as int32_t.
// All bool values are stored as int8_t.
// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
// otherwise GGUF_DEFAULT_ALIGNMENT is used.
//
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
#pragma once
#include "ggml.h"
#include <stdbool.h>
#include <stdint.h>
#define GGUF_MAGIC "GGUF"
#define GGUF_VERSION 3
#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
#define GGUF_DEFAULT_ALIGNMENT 32
#ifdef __cplusplus
extern "C" {
#endif
// types that can be stored as GGUF KV data
enum gguf_type {
GGUF_TYPE_UINT8 = 0,
GGUF_TYPE_INT8 = 1,
GGUF_TYPE_UINT16 = 2,
GGUF_TYPE_INT16 = 3,
GGUF_TYPE_UINT32 = 4,
GGUF_TYPE_INT32 = 5,
GGUF_TYPE_FLOAT32 = 6,
GGUF_TYPE_BOOL = 7,
GGUF_TYPE_STRING = 8,
GGUF_TYPE_ARRAY = 9,
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_COUNT, // marks the end of the enum
};
struct gguf_context;
struct gguf_init_params {
bool no_alloc;
// if not NULL, create a ggml_context and allocate the tensor data in it
struct ggml_context ** ctx;
};
GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
GGML_API void gguf_free(struct gguf_context * ctx);
GGML_API const char * gguf_type_name(enum gguf_type type);
GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx);
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx);
GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
// will abort if the wrong type is used for the key
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id);
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id);
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id);
// get raw pointer to the first element of the array with the given key_id
// for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
// get ith C string from array with given key_id
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx);
GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id);
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id);
GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id);
// removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
// overrides an existing KV pair or adds a new one, the new KV pair is always at the back
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
// creates a new array with n elements of the given type and copies the corresponding number of bytes from data
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
// creates a new array with n strings and copies the corresponding strings from data
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
// add tensor to GGUF context, tensor name must be unique
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
// after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
// in such a way that the tensor data remains as one contiguous block (except for padding)
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
// assumes that at least gguf_get_tensor_size bytes can be read from data
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
// writing gguf files can be done in 3 ways:
//
// - write the entire gguf_context to a binary file in a single pass:
//
// gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
//
// - write only the meta data to a file, then re-open the file and append the tensor data:
//
// gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
// FILE * f = fopen(fname, "ab");
// fwrite(f, ...); // write tensor data
// fclose(f);
//
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
//
// FILE * f = fopen(fname, "wb");
// const size_t size_meta = gguf_get_meta_size(ctx);
// fseek(f, size_meta, SEEK_SET);
// fwrite(f, ...); // write tensor data
// void * data = malloc(size_meta);
// gguf_get_meta_data(ctx, data);
// rewind(f);
// fwrite(data, 1, data, f);
// free(data);
// fclose(f);
//
// write the entire context to a binary file
GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
// writes the meta data to pointer "data"
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
#ifdef __cplusplus
}
#endif
+3 -1
View File
@@ -208,6 +208,7 @@ add_library(ggml-base
../include/ggml-backend.h
../include/ggml-cpp.h
../include/ggml-opt.h
../include/gguf.h
ggml.c
ggml-alloc.c
ggml-backend.cpp
@@ -215,7 +216,8 @@ add_library(ggml-base
ggml-threading.cpp
ggml-threading.h
ggml-quants.c
ggml-quants.h)
ggml-quants.h
gguf.cpp)
target_include_directories(ggml-base PRIVATE .)
+1 -1
View File
@@ -764,7 +764,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
// check if a backend with higher prio wants to offload the op
if (src_backend_id == sched->n_backends - 1) {
if (src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
for (int b = 0; b < src_backend_id; b++) {
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
SET_CAUSE(tensor, "1.off");
+2
View File
@@ -4169,6 +4169,8 @@ static ggml_backend_buffer_t ggml_backend_cpu_aarch64_buffer_type_alloc_buffer(g
buffer->buft = buft;
buffer->iface.init_tensor = ggml_backend_cpu_aarch64_buffer_init_tensor;
buffer->iface.set_tensor = ggml_backend_cpu_aarch64_buffer_set_tensor;
buffer->iface.get_tensor = nullptr;
buffer->iface.cpy_tensor = nullptr;
return buffer;
}
+11 -16
View File
@@ -3,6 +3,8 @@
// GGML internal header
#include "ggml.h"
#include "gguf.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
@@ -551,22 +553,15 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
struct gguf_buf {
void * data;
size_t size;
size_t offset;
};
GGML_API struct gguf_buf gguf_buf_init(size_t size);
GGML_API void gguf_buf_free(struct gguf_buf buf);
GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
#ifdef __cplusplus
}
#endif
#ifdef __cplusplus
#include <vector>
// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
#endif // __cplusplus
+20 -29
View File
@@ -27,15 +27,6 @@
#endif
#include <cstring>
#define UNUSED GGML_UNUSED
#define GGML_DEBUG 0
#if (GGML_DEBUG >= 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif
#ifdef _WIN32
typedef SOCKET sockfd_t;
using ssize_t = __int64;
@@ -411,7 +402,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
initialized = true;
}
#else
UNUSED(initialized);
GGML_UNUSED(initialized);
#endif
auto sock = socket_connect(host.c_str(), port);
if (sock == nullptr) {
@@ -640,7 +631,7 @@ static void ggml_backend_rpc_free(ggml_backend_t backend) {
}
static void ggml_backend_rpc_synchronize(ggml_backend_t backend) {
UNUSED(backend);
GGML_UNUSED(backend);
// this is no-op because we don't have any async operations
}
@@ -850,7 +841,7 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, request.size, response.remote_ptr, response.remote_size);
buffers.insert(buffer);
} else {
GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size);
GGML_LOG_ERROR("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size);
}
}
@@ -872,7 +863,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
if (buffers.find(buffer) == buffers.end()) {
GGML_PRINT_DEBUG("[%s] buffer not found\n", __func__);
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
return false;
}
void * base = ggml_backend_buffer_get_base(buffer);
@@ -884,7 +875,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
if (buffers.find(buffer) == buffers.end()) {
GGML_PRINT_DEBUG("[%s] buffer not found\n", __func__);
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
return false;
}
ggml_backend_buffer_free(buffer);
@@ -896,7 +887,7 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
if (buffers.find(buffer) == buffers.end()) {
GGML_PRINT_DEBUG("[%s] buffer not found\n", __func__);
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
return false;
}
ggml_backend_buffer_clear(buffer, request.value);
@@ -952,7 +943,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
struct ggml_context * ctx = ggml_init(params);
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
if (tensor == nullptr) {
GGML_PRINT_DEBUG("[%s] error deserializing tensor\n", __func__);
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
ggml_free(ctx);
return false;
}
@@ -1017,7 +1008,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
struct ggml_context * ctx = ggml_init(params);
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
if (tensor == nullptr) {
GGML_PRINT_DEBUG("[%s] error deserializing tensor\n", __func__);
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
ggml_free(ctx);
return false;
}
@@ -1051,7 +1042,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
if (src == nullptr || dst == nullptr) {
GGML_PRINT_DEBUG("[%s] error deserializing tensors\n", __func__);
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
ggml_free(ctx);
return false;
}
@@ -1385,14 +1376,14 @@ static void ggml_backend_rpc_device_get_memory(ggml_backend_dev_t dev, size_t *
ggml_backend_rpc_get_device_memory(ctx->endpoint.c_str(), free, total);
UNUSED(dev);
GGML_UNUSED(dev);
}
static enum ggml_backend_dev_type ggml_backend_rpc_device_get_type(ggml_backend_dev_t dev) {
// TODO: obtain value from the server
return GGML_BACKEND_DEVICE_TYPE_GPU;
UNUSED(dev);
GGML_UNUSED(dev);
}
static void ggml_backend_rpc_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
@@ -1413,7 +1404,7 @@ static ggml_backend_t ggml_backend_rpc_device_init(ggml_backend_dev_t dev, const
return ggml_backend_rpc_init(ctx->endpoint.c_str());
UNUSED(params);
GGML_UNUSED(params);
}
static ggml_backend_buffer_type_t ggml_backend_rpc_device_get_buffer_type(ggml_backend_dev_t dev) {
@@ -1421,12 +1412,12 @@ static ggml_backend_buffer_type_t ggml_backend_rpc_device_get_buffer_type(ggml_b
return ggml_backend_rpc_buffer_type(ctx->endpoint.c_str());
UNUSED(dev);
GGML_UNUSED(dev);
}
static bool ggml_backend_rpc_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
UNUSED(dev);
UNUSED(op);
GGML_UNUSED(dev);
GGML_UNUSED(op);
//TODO: call the remote backend and cache the results
return true;
}
@@ -1463,20 +1454,20 @@ static const struct ggml_backend_device_i ggml_backend_rpc_device_i = {
static const char * ggml_backend_rpc_reg_get_name(ggml_backend_reg_t reg) {
return "RPC";
UNUSED(reg);
GGML_UNUSED(reg);
}
static size_t ggml_backend_rpc_reg_get_device_count(ggml_backend_reg_t reg) {
return 0;
UNUSED(reg);
GGML_UNUSED(reg);
}
static ggml_backend_dev_t ggml_backend_rpc_reg_get_device(ggml_backend_reg_t reg, size_t index) {
GGML_ABORT("The RPC backend does not have enumerated devices - use ggml_backend_add_device instead");
UNUSED(reg);
UNUSED(index);
GGML_UNUSED(reg);
GGML_UNUSED(index);
}
static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const char * name) {
@@ -1485,7 +1476,7 @@ static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const ch
}
return NULL;
UNUSED(reg);
GGML_UNUSED(reg);
}
static const struct ggml_backend_reg_i ggml_backend_rpc_reg_i = {
+1 -1
View File
@@ -131,7 +131,7 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* s
[=](sycl::nd_item<3> item_ct1) {
rwkv_wkv_f32_kernel(
B, T, C, H, k_d, v_d, r_d, tf_d, td_d, s_d, dst_d,
item_ct1, shared_mem_acc.get_pointer()
item_ct1, (float*)shared_mem_acc.get_multi_ptr<sycl::access::decorated::no>().get()
);
});
});
+5 -1
View File
@@ -69,11 +69,15 @@ if (Vulkan_FOUND)
file(GLOB _ggml_vk_shader_deps "${_ggml_vk_input_dir}/*.comp")
if (NOT CMAKE_CROSSCOMPILING)
set(_ggml_vk_genshaders_cmd "$<TARGET_FILE_DIR:vulkan-shaders-gen>/${_ggml_vk_genshaders_cmd}")
endif ()
add_custom_command(
OUTPUT ${_ggml_vk_header}
${_ggml_vk_source}
COMMAND "$<TARGET_FILE_DIR:vulkan-shaders-gen>/${_ggml_vk_genshaders_cmd}"
COMMAND ${_ggml_vk_genshaders_cmd}
--glslc ${Vulkan_GLSLC_EXECUTABLE}
--input-dir ${_ggml_vk_input_dir}
--output-dir ${_ggml_vk_output_dir}
-1276
View File
File diff suppressed because it is too large Load Diff
+1325
View File
File diff suppressed because it is too large Load Diff
+2 -1
View File
@@ -1,5 +1,6 @@
#include "llama-impl.h"
#include "gguf.h"
#include "llama.h"
#include <cinttypes>
@@ -138,7 +139,7 @@ std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
+5 -4
View File
@@ -18,7 +18,7 @@ const char * llama_file_version_name(llama_fver version) {
}
namespace GGUFMeta {
template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int)>
template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int64_t)>
struct GKV_Base_Type {
static constexpr gguf_type gt = gt_;
@@ -60,10 +60,11 @@ namespace GGUFMeta {
public:
static constexpr gguf_type gt = GGUF_TYPE_ARRAY;
static ArrayInfo getter(const gguf_context *ctx, const int k) {
const enum gguf_type arr_type = gguf_get_arr_type(ctx, k);
return ArrayInfo {
gguf_get_arr_type(ctx, k),
arr_type,
size_t(gguf_get_arr_n(ctx, k)),
gguf_get_arr_data(ctx, k),
arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx, k),
};
}
};
@@ -553,7 +554,7 @@ llama_model_loader::llama_model_loader(const std::string & fname, bool use_mmap,
const enum gguf_type type = gguf_get_kv_type(meta.get(), i);
const std::string type_name =
type == GGUF_TYPE_ARRAY
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(meta.get(), i)), gguf_get_arr_n(meta.get(), i))
? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(meta.get(), i)), gguf_get_arr_n(meta.get(), i))
: gguf_type_name(type);
std::string value = gguf_kv_to_str(meta.get(), i);
+2 -1
View File
@@ -875,7 +875,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
// update the gguf meta data as we go
gguf_set_tensor_type(ctx_outs[cur_split].get(), name.c_str(), new_type);
gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data, new_size);
GGML_ASSERT(gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), name.c_str())) == new_size);
gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data);
// write tensor data + padding
fout.write((const char *) new_data, new_size);
+5 -17
View File
@@ -8,7 +8,6 @@
#include "llama-kv-cache.h"
#include "llama-model-loader.h"
#include "llama-model.h"
#include "llama-quant.h"
#include "ggml.h"
#include "ggml-alloc.h"
@@ -18,12 +17,8 @@
#include <algorithm>
#include <array>
#include <cassert>
#include <cctype>
#include <cfloat>
#include <cinttypes>
#include <climits>
#include <cmath>
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
@@ -31,10 +26,7 @@
#include <ctime>
#include <functional>
#include <initializer_list>
#include <locale>
#include <map>
#include <numeric>
#include <type_traits>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -11519,13 +11511,7 @@ int32_t llama_lora_adapter_set(
struct llama_context * ctx,
struct llama_lora_adapter * adapter,
float scale) {
if (ctx->cparams.flash_attn) {
LLAMA_LOG_ERROR("%s: flash_attn is not compatible with LoRA\n", __func__);
return -1;
}
ctx->lora_adapters[adapter] = scale;
return 0;
}
@@ -12440,16 +12426,16 @@ int llama_split_path(char * split_path, size_t maxlen, const char * path_prefix,
return 0;
}
int llama_split_prefix(char * dest, size_t maxlen, const char * split_path, int split_no, int split_count) {
int llama_split_prefix(char * split_prefix, size_t maxlen, const char * split_path, int split_no, int split_count) {
std::string str_split_path(split_path);
char postfix[32];
snprintf(postfix, 32, "-%05d-of-%05d.gguf", split_no + 1, split_count);
std::string str_postfix(postfix);
// check if dest ends with postfix
// check if split_prefix ends with postfix
int size_prefix = str_split_path.size() - str_postfix.size();
if (size_prefix > 0 && str_split_path.find(str_postfix, size_prefix) != std::string::npos) {
snprintf(dest, std::min((size_t) size_prefix + 1, maxlen), "%s", split_path);
snprintf(split_prefix, std::min((size_t) size_prefix + 1, maxlen), "%s", split_path);
return size_prefix;
}
@@ -12458,6 +12444,8 @@ int llama_split_prefix(char * dest, size_t maxlen, const char * split_path, int
const char * llama_print_system_info(void) {
static std::string s;
s.clear(); // Clear the string, since it's static, otherwise it will accumulate data from previous calls.
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
auto * reg = ggml_backend_reg_get(i);
+201 -170
View File
@@ -15,66 +15,71 @@ constexpr int offset_has_tensors = 2000;
constexpr int offset_has_data = 3000;
enum handcrafted_file_type {
HANDCRAFTED_HEADER_BAD_MAGIC = 10,
HANDCRAFTED_HEADER_BAD_VERSION_1 = 20,
HANDCRAFTED_HEADER_BAD_VERSION_FUTURE = 30,
HANDCRAFTED_HEADER_BAD_N_TENSORS = 40,
HANDCRAFTED_HEADER_BAD_N_KV = 50,
HANDCRAFTED_HEADER_EMPTY = 800,
HANDCRAFTED_HEADER_BAD_MAGIC = 10,
HANDCRAFTED_HEADER_BAD_VERSION_1 = 20,
HANDCRAFTED_HEADER_BAD_VERSION_FUTURE = 30,
HANDCRAFTED_HEADER_BAD_N_TENSORS = 40,
HANDCRAFTED_HEADER_BAD_N_KV = 50,
HANDCRAFTED_HEADER_EMPTY = 800,
HANDCRAFTED_KV_BAD_KEY_SIZE = 10 + offset_has_kv,
HANDCRAFTED_KV_BAD_TYPE = 20 + offset_has_kv,
HANDCRAFTED_KV_BAD_VALUE_SIZE = 30 + offset_has_kv,
HANDCRAFTED_KV_DUPLICATE_KEY = 40 + offset_has_kv,
HANDCRAFTED_KV_SUCCESS = 800 + offset_has_kv,
HANDCRAFTED_KV_BAD_KEY_SIZE = 10 + offset_has_kv,
HANDCRAFTED_KV_BAD_TYPE = 20 + offset_has_kv,
// HANDCRAFTED_KV_BAD_VALUE_SIZE = 30 + offset_has_kv, // removed because it can result in allocations > 1 TB (default sanitizer limit)
HANDCRAFTED_KV_DUPLICATE_KEY = 40 + offset_has_kv,
HANDCRAFTED_KV_BAD_ALIGN = 50 + offset_has_kv,
HANDCRAFTED_KV_SUCCESS = 800 + offset_has_kv,
HANDCRAFTED_TENSORS_BAD_NAME_SIZE = 10 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_N_DIMS = 20 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_SHAPE = 30 + offset_has_tensors,
HANDCRAFTED_TENSORS_NE_TOO_BIG = 40 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_TYPE = 50 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_OFFSET = 60 + offset_has_tensors,
HANDCRAFTED_TENSORS_DUPLICATE_NAME = 70 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_ALIGNMENT = 80 + offset_has_tensors,
HANDCRAFTED_TENSORS_SUCCESS = 800 + offset_has_tensors,
HANDCRAFTED_TENSORS_CUSTOM_ALIGN = 810 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_NAME_SIZE = 10 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_N_DIMS = 20 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_SHAPE = 30 + offset_has_tensors,
HANDCRAFTED_TENSORS_NE_TOO_BIG = 40 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_TYPE = 50 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_OFFSET = 60 + offset_has_tensors,
HANDCRAFTED_TENSORS_DUPLICATE_NAME = 70 + offset_has_tensors,
HANDCRAFTED_TENSORS_BAD_ALIGN = 75 + offset_has_tensors,
HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN = 80 + offset_has_tensors,
HANDCRAFTED_TENSORS_SUCCESS = 800 + offset_has_tensors,
HANDCRAFTED_TENSORS_CUSTOM_ALIGN = 810 + offset_has_tensors,
HANDCRAFTED_DATA_NOT_ENOUGH_DATA = 10 + offset_has_data,
HANDCRAFTED_DATA_BAD_ALIGNMENT = 20 + offset_has_data,
HANDCRAFTED_DATA_SUCCESS = 800 + offset_has_data,
HANDCRAFTED_DATA_CUSTOM_ALIGN = 810 + offset_has_data,
HANDCRAFTED_DATA_NOT_ENOUGH_DATA = 10 + offset_has_data,
HANDCRAFTED_DATA_BAD_ALIGN = 15 + offset_has_data,
HANDCRAFTED_DATA_INCONSISTENT_ALIGN = 20 + offset_has_data,
HANDCRAFTED_DATA_SUCCESS = 800 + offset_has_data,
HANDCRAFTED_DATA_CUSTOM_ALIGN = 810 + offset_has_data,
};
std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) {
switch (hft) {
case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC";
case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1";
case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE: return "HEADER_BAD_VERSION_FUTURE";
case HANDCRAFTED_HEADER_BAD_N_KV: return "HEADER_BAD_N_KV";
case HANDCRAFTED_HEADER_BAD_N_TENSORS: return "HEADER_BAD_N_TENSORS";
case HANDCRAFTED_HEADER_EMPTY: return "HEADER_EMPTY";
case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC";
case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1";
case HANDCRAFTED_HEADER_BAD_VERSION_FUTURE: return "HEADER_BAD_VERSION_FUTURE";
case HANDCRAFTED_HEADER_BAD_N_KV: return "HEADER_BAD_N_KV";
case HANDCRAFTED_HEADER_BAD_N_TENSORS: return "HEADER_BAD_N_TENSORS";
case HANDCRAFTED_HEADER_EMPTY: return "HEADER_EMPTY";
case HANDCRAFTED_KV_BAD_KEY_SIZE: return "KV_BAD_KEY_SIZE";
case HANDCRAFTED_KV_BAD_TYPE: return "KV_BAD_TYPE";
case HANDCRAFTED_KV_BAD_VALUE_SIZE: return "KV_BAD_VALUE_SIZE";
case HANDCRAFTED_KV_DUPLICATE_KEY: return "KV_DUPLICATE_KEY";
case HANDCRAFTED_KV_SUCCESS: return "KV_RANDOM_KV";
case HANDCRAFTED_KV_BAD_KEY_SIZE: return "KV_BAD_KEY_SIZE";
case HANDCRAFTED_KV_BAD_TYPE: return "KV_BAD_TYPE";
case HANDCRAFTED_KV_DUPLICATE_KEY: return "KV_DUPLICATE_KEY";
case HANDCRAFTED_KV_BAD_ALIGN: return "KV_BAD_ALIGN";
case HANDCRAFTED_KV_SUCCESS: return "KV_RANDOM_KV";
case HANDCRAFTED_TENSORS_BAD_NAME_SIZE: return "TENSORS_BAD_NAME_SIZE";
case HANDCRAFTED_TENSORS_BAD_N_DIMS: return "TENSORS_BAD_N_DIMS";
case HANDCRAFTED_TENSORS_BAD_SHAPE: return "TENSORS_BAD_SHAPE";
case HANDCRAFTED_TENSORS_NE_TOO_BIG: return "TENSORS_NE_TOO_BIG";
case HANDCRAFTED_TENSORS_BAD_TYPE: return "TENSORS_BAD_TYPE";
case HANDCRAFTED_TENSORS_BAD_OFFSET: return "TENSORS_BAD_OFFSET";
case HANDCRAFTED_TENSORS_DUPLICATE_NAME: return "TENSORS_DUPLICATE_NAME";
case HANDCRAFTED_TENSORS_BAD_ALIGNMENT: return "TENSORS_BAD_ALIGNMENT";
case HANDCRAFTED_TENSORS_SUCCESS: return "TENSORS_SUCCESS";
case HANDCRAFTED_TENSORS_CUSTOM_ALIGN: return "TENSORS_CUSTOM_ALIGN";
case HANDCRAFTED_TENSORS_BAD_NAME_SIZE: return "TENSORS_BAD_NAME_SIZE";
case HANDCRAFTED_TENSORS_BAD_N_DIMS: return "TENSORS_BAD_N_DIMS";
case HANDCRAFTED_TENSORS_BAD_SHAPE: return "TENSORS_BAD_SHAPE";
case HANDCRAFTED_TENSORS_NE_TOO_BIG: return "TENSORS_NE_TOO_BIG";
case HANDCRAFTED_TENSORS_BAD_TYPE: return "TENSORS_BAD_TYPE";
case HANDCRAFTED_TENSORS_BAD_OFFSET: return "TENSORS_BAD_OFFSET";
case HANDCRAFTED_TENSORS_DUPLICATE_NAME: return "TENSORS_DUPLICATE_NAME";
case HANDCRAFTED_TENSORS_BAD_ALIGN: return "TENSORS_BAD_ALIGN";
case HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN: return "TENSORS_INCONSISTENT_ALIGN";
case HANDCRAFTED_TENSORS_SUCCESS: return "TENSORS_SUCCESS";
case HANDCRAFTED_TENSORS_CUSTOM_ALIGN: return "TENSORS_CUSTOM_ALIGN";
case HANDCRAFTED_DATA_NOT_ENOUGH_DATA: return "DATA_NOT_ENOUGH_DATA";
case HANDCRAFTED_DATA_BAD_ALIGNMENT: return "DATA_BAD_ALIGNMENT";
case HANDCRAFTED_DATA_SUCCESS: return "DATA_SUCCESS";
case HANDCRAFTED_DATA_CUSTOM_ALIGN: return "DATA_CUSTOM_ALIGN";
case HANDCRAFTED_DATA_NOT_ENOUGH_DATA: return "DATA_NOT_ENOUGH_DATA";
case HANDCRAFTED_DATA_BAD_ALIGN: return "DATA_BAD_ALIGN";
case HANDCRAFTED_DATA_INCONSISTENT_ALIGN: return "DATA_INCONSISTENT_ALIGN";
case HANDCRAFTED_DATA_SUCCESS: return "DATA_SUCCESS";
case HANDCRAFTED_DATA_CUSTOM_ALIGN: return "DATA_CUSTOM_ALIGN";
}
GGML_ABORT("fatal error");
}
@@ -140,31 +145,41 @@ std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::mt19937
return kv_types;
}
static void helper_write(const void * data, const size_t nbytes, FILE * file) {
template <typename T>
static void helper_write(FILE * file, const T & val) {
GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val));
}
static void helper_write(FILE * file, const void * data, const size_t nbytes) {
GGML_ASSERT(fwrite(data, 1, nbytes, file) == nbytes);
}
static FILE * get_handcrafted_file(const unsigned int seed, const enum handcrafted_file_type hft, const int extra_bytes = 0) {
FILE * file = tmpfile();
if (!file) {
return file;
}
std::mt19937 rng(seed);
uint32_t alignment = GGUF_DEFAULT_ALIGNMENT;
if (hft == HANDCRAFTED_HEADER_BAD_MAGIC) {
const char bad_magic[4] = {'F', 'U', 'G', 'G'};
helper_write(bad_magic, sizeof(bad_magic), file);
helper_write(file, bad_magic, sizeof(bad_magic));
} else {
helper_write(GGUF_MAGIC, 4, file);
helper_write(file, GGUF_MAGIC, 4);
}
if (hft == HANDCRAFTED_HEADER_BAD_VERSION_1) {
const uint32_t version = 1;
helper_write(&version, sizeof(version), file);
helper_write(file, version);
} else if (hft == HANDCRAFTED_HEADER_BAD_VERSION_FUTURE) {
const uint32_t version = GGUF_VERSION + 1;
helper_write(&version, sizeof(version), file);
helper_write(file, version);
} else {
const uint32_t version = GGUF_VERSION;
helper_write(&version, sizeof(version), file);
helper_write(file, version);
}
std::vector<tensor_config_t> tensor_configs;
@@ -174,10 +189,10 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
if (hft == HANDCRAFTED_HEADER_BAD_N_TENSORS) {
const uint64_t n_tensors = -1;
helper_write(&n_tensors, sizeof(n_tensors), file);
helper_write(file, n_tensors);
} else {
const uint64_t n_tensors = tensor_configs.size();
helper_write(&n_tensors, sizeof(n_tensors), file);
helper_write(file, n_tensors);
}
std::vector<std::pair<enum gguf_type, enum gguf_type>> kv_types;
@@ -186,41 +201,49 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
}
{
uint64_t n_kv = kv_types.size();
if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
if (hft == HANDCRAFTED_KV_BAD_ALIGN ||
hft == HANDCRAFTED_TENSORS_BAD_ALIGN || hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN ||
hft == HANDCRAFTED_DATA_BAD_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
n_kv += 1;
} else if (hft == HANDCRAFTED_HEADER_BAD_N_KV) {
n_kv = -1;
}
helper_write(&n_kv, sizeof(n_kv), file);
helper_write(file, n_kv);
}
if (hft < offset_has_kv) {
while (ftell(file) % alignment != 0) {
const char pad = 0;
helper_write(file, pad);
}
for (int i = 0; i < extra_bytes; ++i) {
const char tmp = 0;
helper_write(&tmp, sizeof(tmp), file);
helper_write(file, tmp);
}
rewind(file);
return file;
}
for (int i = 0; i < int(kv_types.size()); ++i) {
const enum gguf_type type = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? -1 : kv_types[i].first);
const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? -1 : kv_types[i].second);
const enum gguf_type type = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types[i].first);
const enum gguf_type type_arr = gguf_type(hft == HANDCRAFTED_KV_BAD_TYPE ? GGUF_TYPE_COUNT : kv_types[i].second);
const std::string key = "my_key_" + std::to_string((hft == HANDCRAFTED_KV_DUPLICATE_KEY ? i/2 : i));
if (hft == HANDCRAFTED_KV_BAD_KEY_SIZE) {
const uint64_t n = -1;
helper_write(&n, sizeof(n), file);
helper_write(file, n);
} else {
const uint64_t n = key.length();
helper_write(&n, sizeof(n), file);
helper_write(file, n);
}
helper_write(key.data(), key.length(), file);
helper_write(file, key.data(), key.length());
{
const int32_t type32 = int32_t(type);
helper_write(&type32, sizeof(type32), file);
helper_write(file, type32);
}
uint32_t data[16];
@@ -233,69 +256,67 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
if (type == GGUF_TYPE_STRING) {
const uint64_t n = rng() % sizeof(data);
helper_write(&n, sizeof(n), file);
helper_write(data, n, file);
helper_write(file, n);
helper_write(file, data, n);
continue;
}
if (type == GGUF_TYPE_ARRAY) {
{
const int32_t type32 = int32_t(type_arr);
helper_write(&type32, sizeof(type32), file);
helper_write(file, type32);
}
if (type_arr == GGUF_TYPE_STRING) {
const uint64_t nstr = rng() % (16 + 1);
helper_write(&nstr, sizeof(nstr), file);
helper_write(file, nstr);
for (uint64_t istr = 0; istr < nstr; ++istr) {
const uint64_t n = rng() % (sizeof(uint32_t) + 1);
helper_write(&n, sizeof(n), file);
helper_write(&data[istr], n, file);
helper_write(file, n);
helper_write(file, &data[istr], n);
}
continue;
}
const size_t type_size = gguf_type_size(type_arr);
const uint64_t n = (rng() % sizeof(data)) / type_size;
helper_write(&n, sizeof(n), file);
helper_write(&data, n*type_size, file);
helper_write(file, n);
helper_write(file, &data, n*type_size);
continue;
}
size_t type_size = hft == HANDCRAFTED_KV_BAD_TYPE ? 1 : gguf_type_size(type);
if (hft == HANDCRAFTED_KV_BAD_VALUE_SIZE) {
type_size += rng() % 3;
}
helper_write(data, type_size, file);
helper_write(file, data, hft == HANDCRAFTED_KV_BAD_TYPE ? 1 : gguf_type_size(type));
}
if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
const std::string key = "general.alignment";
{
const uint64_t n = key.length();
helper_write(&n, sizeof(n), file);
}
helper_write(key.data(), key.length(), file);
if (hft == HANDCRAFTED_KV_BAD_ALIGN ||
hft == HANDCRAFTED_TENSORS_BAD_ALIGN || hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN ||
hft == HANDCRAFTED_DATA_BAD_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
const uint64_t n = strlen(GGUF_KEY_GENERAL_ALIGNMENT);
helper_write(file, n);
helper_write(file, GGUF_KEY_GENERAL_ALIGNMENT, n);
const int32_t type = gguf_type(GGUF_TYPE_UINT32);
helper_write(&type, sizeof(type), file);
helper_write(file, type);
const uint32_t alignment = GGUF_DEFAULT_ALIGNMENT + 1;
helper_write(&alignment, sizeof(alignment), file);
alignment = expect_context_not_null(hft) ? 1 : 13;
helper_write(file, alignment);
}
if (hft < offset_has_tensors) {
while (ftell(file) % alignment != 0) {
const char pad = 0;
helper_write(file, pad);
}
for (int i = 0; i < extra_bytes; ++i) {
const char tmp = 0;
helper_write(&tmp, sizeof(tmp), file);
helper_write(file, tmp);
}
rewind(file);
return file;
}
uint32_t alignment = GGUF_DEFAULT_ALIGNMENT;
if (hft == HANDCRAFTED_TENSORS_BAD_ALIGNMENT || hft == HANDCRAFTED_DATA_BAD_ALIGNMENT) {
alignment -= 1;
} else if (hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN) {
alignment += 1;
if (hft == HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN || hft == HANDCRAFTED_DATA_INCONSISTENT_ALIGN) {
alignment = 1;
}
uint64_t offset = 0;
@@ -313,9 +334,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
}
{
const uint64_t n = name.length();
helper_write(&n, sizeof(n), file);
helper_write(file, n);
}
helper_write(name.data(), name.length(), file);
helper_write(file, name.data(), name.length());
uint32_t n_dims = hft == HANDCRAFTED_TENSORS_NE_TOO_BIG ? 2 : 1;
for (int i = GGML_MAX_DIMS-1; i >= 1; --i) {
@@ -326,35 +347,35 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
}
if (hft == HANDCRAFTED_TENSORS_BAD_N_DIMS) {
const uint32_t n_dims_bad = GGML_MAX_DIMS + 1;
helper_write(&n_dims_bad, sizeof(n_dims_bad), file);
helper_write(file, n_dims_bad);
} else {
helper_write(&n_dims, sizeof(n_dims), file);
helper_write(file, n_dims);
}
if (hft == HANDCRAFTED_TENSORS_BAD_SHAPE) {
for (uint32_t j = 0; j < n_dims; ++j) {
const int64_t bad_dim = -1;
helper_write(&bad_dim, sizeof(bad_dim), file);
helper_write(file, bad_dim);
}
} else if (hft == HANDCRAFTED_TENSORS_NE_TOO_BIG){
for (uint32_t j = 0; j < n_dims; ++j) {
const int64_t big_dim = 4*int64_t(INT32_MAX);
helper_write(&big_dim, sizeof(big_dim), file);
helper_write(file, big_dim);
}
} else {
helper_write(shape.data(), n_dims*sizeof(int64_t), file);
helper_write(file, shape.data(), n_dims*sizeof(int64_t));
}
{
const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? -1 : int32_t(type);
helper_write(&type32, sizeof(type32), file);
const int32_t type32 = hft == HANDCRAFTED_TENSORS_BAD_TYPE ? GGML_TYPE_COUNT : int32_t(type);
helper_write(file, type32);
}
if (hft == HANDCRAFTED_TENSORS_BAD_OFFSET) {
const uint64_t bad_offset = -1;
helper_write(&bad_offset, sizeof(bad_offset), file);
helper_write(file, bad_offset);
} else {
helper_write(&offset, sizeof(offset), file);
helper_write(file, offset);
}
int64_t ne = shape[0];
@@ -364,12 +385,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
offset += GGML_PAD(ggml_row_size(type, ne), alignment);
}
const uint32_t alignment_overshoot = ftell(file) % alignment;
if (alignment_overshoot != 0) {
for (size_t i = alignment_overshoot; i < alignment; ++i) {
const char pad = 0;
helper_write(&pad, sizeof(pad), file);
}
while (ftell(file) % alignment != 0) {
const char pad = 0;
helper_write(file, pad);
}
if (hft >= offset_has_data) {
@@ -380,13 +398,13 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
}
for (uint64_t i = 0; i < nbytes; ++i) {
const uint8_t random_byte = i % 256;
helper_write(&random_byte, sizeof(random_byte), file);
helper_write(file, random_byte);
}
}
for (int i = 0; i < extra_bytes; ++i) {
const char tmp = 0;
helper_write(&tmp, sizeof(tmp), file);
helper_write(file, tmp);
}
rewind(file);
return file;
@@ -505,6 +523,16 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
}
const char * data_gguf = reinterpret_cast<const char *>(gguf_get_arr_data(gguf_ctx, id));
if (type_arr == GGUF_TYPE_BOOL) {
for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) {
if (bool(data8[arr_i]) != bool(data_gguf[arr_i])) {
ok = false;
}
}
continue;
}
if (!std::equal(data8, data8 + arr_n*type_size, data_gguf)) {
ok = false;
}
@@ -512,12 +540,20 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
}
const char * data_gguf = reinterpret_cast<const char *>(gguf_get_val_data(gguf_ctx, id));
if (type == GGUF_TYPE_BOOL) {
if (bool(*data8) != bool(*data_gguf)) {
ok = false;
}
continue;
}
if (!std::equal(data8, data8 + gguf_type_size(type), data_gguf)) {
ok = false;
}
}
const uint32_t expected_alignment = alignment_defined ? GGUF_DEFAULT_ALIGNMENT + 1 : GGUF_DEFAULT_ALIGNMENT;
const uint32_t expected_alignment = alignment_defined ? 1 : GGUF_DEFAULT_ALIGNMENT;
if (gguf_get_alignment(gguf_ctx) != expected_alignment) {
ok = false;
}
@@ -539,7 +575,7 @@ static bool handcrafted_check_tensors(const gguf_context * gguf_ctx, const unsig
bool ok = true;
const int id_alignment = gguf_find_key(gguf_ctx, "general.alignment");
const int id_alignment = gguf_find_key(gguf_ctx, GGUF_KEY_GENERAL_ALIGNMENT);
const uint32_t alignment = id_alignment >= 0 ? gguf_get_val_u32(gguf_ctx, id_alignment) : GGUF_DEFAULT_ALIGNMENT;
uint64_t expected_offset = 0;
@@ -607,7 +643,7 @@ static bool handcrafted_check_tensor_data(const gguf_context * gguf_ctx, const u
std::vector<uint8_t> data(size);
GGML_ASSERT(fseek(file, gguf_get_data_offset(gguf_ctx) + offset, SEEK_SET) == 0);
GGML_ASSERT(fread(data.data(), 1, size, file) == size);
GGML_ASSERT(fread(data.data(), 1, data.size(), file) == data.size());
for (size_t j = 0; j < size; ++j) {
const uint8_t expected_byte = (j + offset) % 256;
@@ -627,15 +663,15 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
const std::vector<handcrafted_file_type> hfts = {
HANDCRAFTED_HEADER_BAD_MAGIC,
HANDCRAFTED_HEADER_BAD_VERSION_1,
// HANDCRAFTED_FILE_TYPE_BAD_VERSION_FUTURE, // FIXME
HANDCRAFTED_HEADER_BAD_VERSION_FUTURE,
HANDCRAFTED_HEADER_BAD_N_KV,
HANDCRAFTED_HEADER_BAD_N_TENSORS,
HANDCRAFTED_HEADER_EMPTY,
HANDCRAFTED_KV_BAD_KEY_SIZE,
HANDCRAFTED_KV_BAD_TYPE,
// HANDCRAFTED_KV_BAD_VALUE_SIZE, // FIXME sanitizer limit
// HANDCRAFTED_FILE_TYPE_DUPLICATE_KEY, // FIXME
HANDCRAFTED_KV_DUPLICATE_KEY,
HANDCRAFTED_KV_BAD_ALIGN,
HANDCRAFTED_KV_SUCCESS,
HANDCRAFTED_TENSORS_BAD_NAME_SIZE,
@@ -643,14 +679,16 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
HANDCRAFTED_TENSORS_BAD_SHAPE,
HANDCRAFTED_TENSORS_NE_TOO_BIG,
HANDCRAFTED_TENSORS_BAD_TYPE,
// HANDCRAFTED_TENSORS_BAD_OFFSET, // FIXME
HANDCRAFTED_TENSORS_BAD_OFFSET,
HANDCRAFTED_TENSORS_DUPLICATE_NAME,
// HANDCRAFTED_TENSORS_BAD_ALIGNMENT, // FIXME
HANDCRAFTED_TENSORS_BAD_ALIGN,
HANDCRAFTED_TENSORS_INCONSISTENT_ALIGN,
HANDCRAFTED_TENSORS_SUCCESS,
HANDCRAFTED_TENSORS_CUSTOM_ALIGN,
HANDCRAFTED_DATA_NOT_ENOUGH_DATA,
// HANDCRAFTED_DATA_BAD_ALIGNMENT, // FIXME
HANDCRAFTED_DATA_BAD_ALIGN,
HANDCRAFTED_DATA_INCONSISTENT_ALIGN,
HANDCRAFTED_DATA_SUCCESS,
HANDCRAFTED_DATA_CUSTOM_ALIGN,
};
@@ -674,6 +712,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
/*no_alloc =*/ false,
/*ctx =*/ hft >= offset_has_data ? &ctx : nullptr,
};
struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
if (expect_context_not_null(hft)) {
@@ -689,7 +728,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
}
ntest++;
if (false && hft >= offset_has_data && !expect_context_not_null(hft)) { // FIXME
if (hft >= offset_has_data && !expect_context_not_null(hft)) {
printf("%s: - no_dangling_ggml_context_pointer: ", __func__);
if (ctx) {
printf("\033[1;31mFAIL\033[0m\n");
@@ -700,23 +739,6 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
ntest++;
}
if (false && expect_context_not_null(hft)) { // FIXME
FILE * file_eb = get_handcrafted_file(seed, hft, /*extra_bytes =*/ 1);
struct gguf_context * gguf_ctx_eb = gguf_init_from_file_impl(file_eb, gguf_params);
printf("%s: - context_null_with_extra_bytes: ", __func__);
if (gguf_ctx_eb) {
printf("\033[1;31mFAIL\033[0m\n");
} else {
printf("\033[1;32mOK\033[0m\n");
npass++;
}
ntest++;
gguf_free(gguf_ctx_eb);
fclose(file_eb);
}
const bool alignment_defined = hft == HANDCRAFTED_TENSORS_CUSTOM_ALIGN || hft == HANDCRAFTED_DATA_CUSTOM_ALIGN;
if (expect_context_not_null(hft)) {
@@ -763,14 +785,15 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
ntest++;
}
fclose(file);
if (gguf_ctx) {
ggml_free(ctx);
gguf_free(gguf_ctx);
}
fclose(file);
printf("\n");
}
return std::make_pair(npass, ntest);
}
@@ -789,10 +812,6 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t
const std::string key = "my_key_" + std::to_string(rng() % 1024);
const enum gguf_type type = gguf_type(rng() % GGUF_TYPE_COUNT);
if (type == GGUF_TYPE_STRING || type == GGUF_TYPE_ARRAY) {
continue; // FIXME memory leak
}
switch (type) {
case GGUF_TYPE_UINT8: gguf_set_val_u8 (gguf_ctx, key.c_str(), rng() % (1 << 7)); break;
case GGUF_TYPE_INT8: gguf_set_val_i8 (gguf_ctx, key.c_str(), rng() % (1 << 7) - (1 << 6)); break;
@@ -826,6 +845,9 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t
std::vector<uint32_t> random_data((nbytes + sizeof(uint32_t) - 1) / sizeof(uint32_t));
for (size_t j = 0; j < random_data.size(); ++j) {
random_data[j] = rng();
if (type_arr == GGUF_TYPE_BOOL) {
random_data[j] &= 0x01010101; // the sanitizer complains if booleans are not 0 or 1
}
}
gguf_set_arr_data(gguf_ctx, key.c_str(), type_arr, random_data.data(), ne);
} break;
@@ -928,6 +950,17 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
continue;
}
if (type_arr == GGUF_TYPE_BOOL) {
const int8_t * data = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx, id));
const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(other, idx_other));
for (int arr_i = 0; arr_i < arr_n; ++arr_i) {
if (bool(data[arr_i]) != bool(data_other[arr_i])) {
ok = false;
}
}
continue;
}
if (type_arr == GGUF_TYPE_STRING) {
for (int arr_i = 0; arr_i < arr_n; ++arr_i) {
const std::string str = gguf_get_arr_str(ctx, id, arr_i);
@@ -939,8 +972,8 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other
continue;
}
const char * data = reinterpret_cast<const char *>(gguf_get_arr_data(ctx, id));
const char * data_other = reinterpret_cast<const char *>(gguf_get_arr_data(other, idx_other));
const int8_t * data = reinterpret_cast<const int8_t *>(gguf_get_arr_data(ctx, id));
const int8_t * data_other = reinterpret_cast<const int8_t *>(gguf_get_arr_data(other, idx_other));
if (!std::equal(data, data + arr_n*gguf_type_size(type_arr), data_other)) {
ok = false;
}
@@ -1028,21 +1061,6 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml
}
static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) {
FILE * file = tmpfile();
#ifdef _WIN32
if (!file) {
printf("%s: failed to create tmpfile(), needs elevated privileges on Windows");
printf("%s: skipping tests");
return std::make_pair(0, 0);
}
#else
GGML_ASSERT(file);
#endif // _WIN32
if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) {
return std::make_pair(0, 0); // FIXME
}
ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr);
printf("%s: device=%s, backend=%s, only_meta=%s\n",
__func__, ggml_backend_dev_description(dev), ggml_backend_name(backend), only_meta ? "yes" : "no");
@@ -1060,10 +1078,24 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
bbuf = result.buffer;
}
struct gguf_buf gbuf = gguf_buf_init(16 * 1024);
gguf_write_to_buf(gguf_ctx_0, &gbuf, only_meta);
helper_write(gbuf.data, gbuf.offset, file);
rewind(file);
FILE * file = tmpfile();
#ifdef _WIN32
if (!file) {
printf("%s: failed to create tmpfile(), needs elevated privileges on Windows");
printf("%s: skipping tests");
return std::make_pair(0, 0);
}
#else
GGML_ASSERT(file);
#endif // _WIN32
{
std::vector<int8_t> buf;
gguf_write_to_buf(gguf_ctx_0, buf, only_meta);
GGML_ASSERT(fwrite(buf.data(), 1, buf.size(), file) == buf.size());
rewind(file);
}
struct ggml_context * ctx_1 = nullptr;
struct gguf_init_params gguf_params = {
@@ -1151,9 +1183,8 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
ggml_free(ctx_1);
gguf_free(gguf_ctx_0);
gguf_free(gguf_ctx_1);
gguf_buf_free(gbuf);
ggml_backend_free(backend);
GGML_ASSERT(fclose(file) == 0);
fclose(file);
printf("\n");
return std::make_pair(npass, ntest);