add tool to allow plotting tensor allocation maps within buffers

simple : update name of executable to llama-simple (#8885 )
This commit updates the name of the executable in README.md from `simple` to `llama-simple`.
2026-07-04 19:45:57 +02:00 · 2024-08-06 22:09:51 +02:00 · 2024-08-06 16:44:35 +02:00 · 2024-08-06 15:21:47 +02:00 · 2024-08-06 13:32:03 +02:00 · 2024-08-06 11:48:01 +03:00
10 changed files with 341 additions and 1 deletions
@@ -5,6 +5,7 @@
  - Execute [the full CI locally on your machine](ci/README.md) before publishing
 - Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
  - The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
+- Consider allowing write access to your branch for faster review
 - If your PR becomes stale, don't hesitate to ping the maintainers in the comments

 # Pull requests (for collaborators)
@@ -3,7 +3,7 @@
 The purpose of this example is to demonstrate a minimal usage of llama.cpp for generating text with a given prompt.

 ```bash
-./simple -m ./models/llama-7b-v2/ggml-model-f16.gguf -p "Hello my name is"
+./llama-simple -m ./models/llama-7b-v2/ggml-model-f16.gguf -p "Hello my name is"

 ...

@@ -71,6 +71,9 @@ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_i
 GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
 GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);

+// Export tensor allocations in a graph to a file that can be plotted
+GGML_API void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph);
+
 #ifdef  __cplusplus
 }
 #endif
@@ -208,6 +208,9 @@ extern "C" {
    // Set a callback to be called for each resulting node during graph compute
    GGML_API void                 ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);

+    // internal
+    GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched);
+
    //
    // Utils
    //
@@ -1034,3 +1034,30 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
 ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
    return ggml_backend_alloc_ctx_tensors_from_buft(ctx, ggml_backend_get_default_buffer_type(backend));
 }
+
+
+static void export_tensor(FILE * f, struct ggml_tensor * t) {
+    size_t offset = (uintptr_t)t->data - (uintptr_t)ggml_backend_buffer_get_base(t->buffer);
+    // [tensor_id] [tensor_view_src_id] [tensor_view_offs] [tensor_name] [buffer_id] [buffer_name] [offset] [size]
+    fprintf(f, "%p,%p,%zu,\"%s\",%p,\"%s\",%zu,%zu\n",
+        (void *)t, (void *)t->view_src, t->view_offs, t->name,
+        (void *)t->buffer, ggml_backend_buffer_name(t->buffer),
+        offset, ggml_backend_buft_get_alloc_size(t->buffer->buft, t));
+
+}
+
+void ggml_gallocr_export_allocs(const char * filename, struct ggml_cgraph * graph) {
+    FILE * f = fopen(filename, "wb");
+
+    fprintf(f, "tensor_id,tensor_view_src_id,tensor_view_offs,tensor_name,buffer_id,buffer_name,offset,size\n");
+
+    for (int i = 0; i < graph->n_leafs; i++) {
+        export_tensor(f, graph->leafs[i]);
+    }
+
+    for (int i = 0; i < graph->n_nodes; i++) {
+        export_tensor(f, graph->nodes[i]);
+    }
+
+    fclose(f);
+}
@@ -2028,6 +2028,10 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
    return sched->backends[backend_index];
 }

+GGML_API struct ggml_cgraph * ggml_backend_sched_get_graph_copy(ggml_backend_sched_t sched) {
+    return &sched->graph;
+}
+
 // utils

 void ggml_backend_view_init(struct ggml_tensor * tensor) {
@@ -1,5 +1,7 @@
+find_package (Threads REQUIRED)

 set(TARGET vulkan-shaders-gen)
 add_executable(${TARGET} vulkan-shaders-gen.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
+target_link_libraries(vulkan-shaders-gen PUBLIC Threads::Threads)
@@ -22,6 +22,7 @@
 #ifdef _WIN32
    #include <windows.h>
    #include <direct.h> // For _mkdir on Windows
+    #include <algorithm> // For std::replace on w64devkit
 #else
    #include <unistd.h>
    #include <sys/wait.h>
@@ -14642,6 +14642,13 @@ static int llama_decode_internal(

        ggml_backend_sched_alloc_graph(lctx.sched, gf);

+        #if 1
+        static int id = 0;
+        printf("saving allocs %d (%d tokens)\n", id, n_tokens);
+        ggml_gallocr_export_allocs(format("allocs%d.csv", id).c_str(), ggml_backend_sched_get_graph_copy(lctx.sched));
+        id++;
+        #endif
+
        llama_set_inputs(lctx, u_batch);

        llama_graph_compute(lctx, gf, n_threads);
Author	SHA1	Message	Date
slaren	cad8abb49b	add tool to allow plotting tensor allocation maps within buffers	2024-08-06 22:09:51 +02:00
Daniel Bevenius	5f4dcb1e60	simple : update name of executable to llama-simple (#8885 ) This commit updates the name of the executable in README.md from `simple` to `llama-simple`.	2024-08-06 16:44:35 +02:00
Jaeden Amero	db20f50cf4	cmake : Link vulkan-shaders-gen with pthreads (#8835 ) When using CMake to build with Vulkan support, compiling vulkan-shaders-gen fails due to missing a CMakeLists.txt specification to link vulkan-shaders-gen with the threading library, resulting in the following error. [5/172] Linking CXX executable bin/vulkan-shaders-gen FAILED: bin/vulkan-shaders-gen : && /usr/bin/c++ ggml/src/vulkan-shaders/CMakeFiles/vulkan-shaders-gen.dir/vulkan-shaders-gen.cpp.o -o bin/vulkan-shaders-gen && : ld: error: undefined symbol: pthread_create >>> referenced by vulkan-shaders-gen.cpp >>> ggml/src/vulkan-shaders/CMakeFiles/vulkan-shaders-gen.dir/vulkan-shaders-gen.cpp.o:(std::__1::__libcpp_thread_create[abi:se180100](pthread*, >>> void ()(void), void*)) c++: error: linker command failed with exit code 1 (use -v to see invocation) [6/172] Generating build details from Git -- Found Git: /usr/local/bin/git (found version "2.45.2") ninja: build stopped: subcommand failed. Add the CMakeLists.txt specification to link vulkan-shaders-gen with the threading library and fix the above error. Fixes #8834	2024-08-06 15:21:47 +02:00
MaggotHATE	efda90c93a	[Vulkan] Fix compilation of `vulkan-shaders-gen` on w64devkit after `e31a4f6` (#8880 ) * Fix compilation issue in `vulkan-shaders-gen` https://github.com/ggerganov/llama.cpp/commit/e31a4f679779220312c165b0f5994c680a610e38 broke compilation on w64devkit. Including `algorithm` seems to fix that. * Guard it under `#ifdef _WIN32`	2024-08-06 13:32:03 +02:00
Georgi Gerganov	0bf16de07b	contributing : add note about write access	2024-08-06 11:48:01 +03:00