Add tensor name to JSON output

2026-06-09 07:16:44 +02:00 · 2026-06-06 22:33:01 +02:00
parent 2bfe4ff9ca
commit 22634e0eee
4 changed files with 16 additions and 1 deletions
@@ -31,6 +31,7 @@ typedef struct ggml_profile_record {
    const char *                 extra;       // fusion name for fused ops, or NULL

    // Output tensor info
+    char                         tensor_name[GGML_MAX_NAME];           // output tensor name (e.g. "ffn_out-0"), "" if unnamed
    int64_t                      ne[4];                                // output tensor dimensions
    int                          out_type;                             // output tensor type (ggml_type), -1 if N/A

@@ -1577,6 +1577,7 @@ static ggml_profile_record make_copy_record(const char * copy_dir, int backend_i
    rec.end_ns     = end_ns;
    rec.bytes      = bytes;
    rec.extra      = input ? input->name : NULL;
+    snprintf(rec.tensor_name, sizeof(rec.tensor_name), "%s", input ? input->name : "");
    rec.out_type   = -1;
    rec.sub_op     = -1;
    rec.n_src      = 0;
@@ -2792,6 +2793,11 @@ int ggml_backend_sched_write_profiling_json(ggml_backend_sched_t sched, FILE * f
        }

        // Output tensor info
+        if (rec.tensor_name[0] != '\0') {
+            fprintf(fp, ", \"tensor_name\": \"%s\"", rec.tensor_name);
+        } else {
+            fprintf(fp, ", \"tensor_name\": null");
+        }
        fprintf(fp, ", \"ne\": [%lld, %lld, %lld, %lld]", (long long) rec.ne[0], (long long) rec.ne[1],
                (long long) rec.ne[2], (long long) rec.ne[3]);
        fprintf(fp, ", \"out_type\": %d", rec.out_type);
@@ -53,10 +53,14 @@ void ggml_profile_record_from_tensor(ggml_profile_record * rec, const struct ggm
        memcpy(rec->ne, node->ne, sizeof(rec->ne));
        rec->out_type = (int) node->type;
        memcpy(rec->op_params, node->op_params, sizeof(rec->op_params));
+        // Copy the tensor name (rather than aliasing node->name) so the record stays
+        // valid after the graph's meta context is reused on the next build.
+        snprintf(rec->tensor_name, sizeof(rec->tensor_name), "%s", node->name);
    } else {
        memset(rec->ne, 0, sizeof(rec->ne));
        rec->out_type = -1;
        memset(rec->op_params, 0, sizeof(rec->op_params));
+        rec->tensor_name[0] = '\0';
    }

    // Sub-op (UNARY/GLU)
@@ -169,6 +169,7 @@ class ProfileRecord:
    bytes: int
    extra: Optional[str]
    # Output tensor info
+    tensor_name: Optional[str] = None
    ne: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
    out_type: int = -1
    # Source tensors (variable length, up to GGML_MAX_SRC)
@@ -266,6 +267,7 @@ class ProfileRecord:
            "duration_ns": self.duration_ns,
            "bytes": self.bytes,
            "extra": self.extra,
+            "tensor_name": self.tensor_name,
            "ne": self.ne,
            "out_type": self.out_type,
            "n_src": len(self.ne_src),
@@ -416,6 +418,7 @@ class ProfileData:
                duration_ns=r.get("duration_ns", 0),
                bytes=r.get("bytes", 0),
                extra=r.get("extra"),
+                tensor_name=r.get("tensor_name"),
                ne=ne_out,
                out_type=int(r.get("out_type", -1)),
                ne_src=ne_src,
@@ -565,7 +568,8 @@ class ProfileData:
        print(f"\n  --- Top 5 Longest Kernels ---")
        for rec in top_k:
            shape = f" {rec.shape_str}" if rec.shape_str else ""
-            print(f"  {rec.type_name:<5} {rec.name:<28} {rec.duration_us:>10.2f} us{shape}  "
+            tname = f" '{rec.tensor_name}'" if rec.tensor_name else ""
+            print(f"  {rec.type_name:<5} {rec.name:<28}{tname} {rec.duration_us:>10.2f} us{shape}  "
                  f"(split={rec.split_id}, backend={rec.backend_id})")

        print()