mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-09 07:16:44 +02:00
Add tensor name to JSON output
This commit is contained in:
@@ -31,6 +31,7 @@ typedef struct ggml_profile_record {
|
||||
const char * extra; // fusion name for fused ops, or NULL
|
||||
|
||||
// Output tensor info
|
||||
char tensor_name[GGML_MAX_NAME]; // output tensor name (e.g. "ffn_out-0"), "" if unnamed
|
||||
int64_t ne[4]; // output tensor dimensions
|
||||
int out_type; // output tensor type (ggml_type), -1 if N/A
|
||||
|
||||
|
||||
@@ -1577,6 +1577,7 @@ static ggml_profile_record make_copy_record(const char * copy_dir, int backend_i
|
||||
rec.end_ns = end_ns;
|
||||
rec.bytes = bytes;
|
||||
rec.extra = input ? input->name : NULL;
|
||||
snprintf(rec.tensor_name, sizeof(rec.tensor_name), "%s", input ? input->name : "");
|
||||
rec.out_type = -1;
|
||||
rec.sub_op = -1;
|
||||
rec.n_src = 0;
|
||||
@@ -2792,6 +2793,11 @@ int ggml_backend_sched_write_profiling_json(ggml_backend_sched_t sched, FILE * f
|
||||
}
|
||||
|
||||
// Output tensor info
|
||||
if (rec.tensor_name[0] != '\0') {
|
||||
fprintf(fp, ", \"tensor_name\": \"%s\"", rec.tensor_name);
|
||||
} else {
|
||||
fprintf(fp, ", \"tensor_name\": null");
|
||||
}
|
||||
fprintf(fp, ", \"ne\": [%lld, %lld, %lld, %lld]", (long long) rec.ne[0], (long long) rec.ne[1],
|
||||
(long long) rec.ne[2], (long long) rec.ne[3]);
|
||||
fprintf(fp, ", \"out_type\": %d", rec.out_type);
|
||||
|
||||
@@ -53,10 +53,14 @@ void ggml_profile_record_from_tensor(ggml_profile_record * rec, const struct ggm
|
||||
memcpy(rec->ne, node->ne, sizeof(rec->ne));
|
||||
rec->out_type = (int) node->type;
|
||||
memcpy(rec->op_params, node->op_params, sizeof(rec->op_params));
|
||||
// Copy the tensor name (rather than aliasing node->name) so the record stays
|
||||
// valid after the graph's meta context is reused on the next build.
|
||||
snprintf(rec->tensor_name, sizeof(rec->tensor_name), "%s", node->name);
|
||||
} else {
|
||||
memset(rec->ne, 0, sizeof(rec->ne));
|
||||
rec->out_type = -1;
|
||||
memset(rec->op_params, 0, sizeof(rec->op_params));
|
||||
rec->tensor_name[0] = '\0';
|
||||
}
|
||||
|
||||
// Sub-op (UNARY/GLU)
|
||||
|
||||
@@ -169,6 +169,7 @@ class ProfileRecord:
|
||||
bytes: int
|
||||
extra: Optional[str]
|
||||
# Output tensor info
|
||||
tensor_name: Optional[str] = None
|
||||
ne: list[int] = field(default_factory=lambda: [0, 0, 0, 0])
|
||||
out_type: int = -1
|
||||
# Source tensors (variable length, up to GGML_MAX_SRC)
|
||||
@@ -266,6 +267,7 @@ class ProfileRecord:
|
||||
"duration_ns": self.duration_ns,
|
||||
"bytes": self.bytes,
|
||||
"extra": self.extra,
|
||||
"tensor_name": self.tensor_name,
|
||||
"ne": self.ne,
|
||||
"out_type": self.out_type,
|
||||
"n_src": len(self.ne_src),
|
||||
@@ -416,6 +418,7 @@ class ProfileData:
|
||||
duration_ns=r.get("duration_ns", 0),
|
||||
bytes=r.get("bytes", 0),
|
||||
extra=r.get("extra"),
|
||||
tensor_name=r.get("tensor_name"),
|
||||
ne=ne_out,
|
||||
out_type=int(r.get("out_type", -1)),
|
||||
ne_src=ne_src,
|
||||
@@ -565,7 +568,8 @@ class ProfileData:
|
||||
print(f"\n --- Top 5 Longest Kernels ---")
|
||||
for rec in top_k:
|
||||
shape = f" {rec.shape_str}" if rec.shape_str else ""
|
||||
print(f" {rec.type_name:<5} {rec.name:<28} {rec.duration_us:>10.2f} us{shape} "
|
||||
tname = f" '{rec.tensor_name}'" if rec.tensor_name else ""
|
||||
print(f" {rec.type_name:<5} {rec.name:<28}{tname} {rec.duration_us:>10.2f} us{shape} "
|
||||
f"(split={rec.split_id}, backend={rec.backend_id})")
|
||||
|
||||
print()
|
||||
|
||||
Reference in New Issue
Block a user