diff --git a/internal/perf/gpu_parse.go b/internal/perf/gpu_parse.go index 011ef42..9fa3041 100644 --- a/internal/perf/gpu_parse.go +++ b/internal/perf/gpu_parse.go @@ -1,6 +1,10 @@ package perf import ( + "encoding/json" + "fmt" + "math" + "regexp" "strconv" "strings" "time" @@ -43,3 +47,168 @@ func ParseNvidiaSmiLine(line string) *GpuStat { PowerDrawW: powerDraw, } } + +// mactopOutput maps the subset of mactop's headless JSON output that is +// relevant to GpuStat. Note that mactop's memory object is whole-system memory, +// not GPU-attributed; the darwin monitor overlays ioreg's GPU-attributed +// unified memory (see overlayIoregMem) so both backends report consistent +// memory figures. +type mactopOutput struct { + SocMetrics struct { + GPUPower float64 `json:"gpu_power"` + GPUFreq int `json:"gpu_freq_mhz"` + GPUTemp float64 `json:"gpu_temp"` + } `json:"soc_metrics"` + Memory struct { + Total uint64 `json:"total"` + Used uint64 `json:"used"` + } `json:"memory"` + GPUUsage float64 `json:"gpu_usage"` + SystemInfo struct { + Name string `json:"name"` + GPUCoreCount int `json:"gpu_core_count"` + } `json:"system_info"` + Fans []struct { + RPM int `json:"rpm"` + MinRPM int `json:"min_rpm"` + MaxRPM int `json:"max_rpm"` + } `json:"fans"` + Temperatures []struct { + Group string `json:"group"` + Avg float64 `json:"avg_celsius"` + } `json:"temperatures"` +} + +// ioreg output uses ` = ` (with spaces) for top-level device properties and +// `=` (no spaces) for values inside nested dictionaries such as +// PerformanceStatistics. +var ( + reIoregModel = regexp.MustCompile(`"model"\s*=\s*"([^"]+)"`) + reIoregCoreCount = regexp.MustCompile(`"gpu-core-count"\s*=\s*(\d+)`) + reIoregUtil = regexp.MustCompile(`"Device Utilization %"=(\d+)`) + reIoregMemUsed = regexp.MustCompile(`"In use system memory"=(\d+)`) +) + +// ParseIoregOutput parses `ioreg -r -c IOGPU -d 1 -f` output into a GpuStat for +// the Apple Silicon integrated GPU. This is a fallback for when mactop is not +// installed: utilization and used memory are available, but power, temperature, +// and fan speed are not exposed by ioreg. memTotalMB is the unified memory size +// supplied by the caller, since Apple Silicon shares memory between CPU and GPU. +// Returns nil if no GPU device is found in the output. +func ParseIoregOutput(out []byte, memTotalMB int) *GpuStat { + utilMatch := reIoregUtil.FindSubmatch(out) + memMatch := reIoregMemUsed.FindSubmatch(out) + if utilMatch == nil && memMatch == nil { + return nil + } + + var gpuUtil float64 + if utilMatch != nil { + gpuUtil, _ = strconv.ParseFloat(string(utilMatch[1]), 64) + } + + const toMB = 1024 * 1024 + var memUsedMB int + if memMatch != nil { + memUsedBytes, _ := strconv.ParseInt(string(memMatch[1]), 10, 64) + memUsedMB = int(memUsedBytes / toMB) + } + + var memUtil float64 + if memTotalMB > 0 { + memUtil = float64(memUsedMB) / float64(memTotalMB) * 100 + } + + name := "Apple GPU" + if m := reIoregModel.FindSubmatch(out); m != nil { + name = string(m[1]) + } + if m := reIoregCoreCount.FindSubmatch(out); m != nil { + if cores, err := strconv.Atoi(string(m[1])); err == nil && cores > 0 { + name = fmt.Sprintf("%s (%d-core GPU)", name, cores) + } + } + + return &GpuStat{ + Timestamp: time.Now(), + ID: 0, + Name: name, + GpuUtilPct: gpuUtil, + MemUtilPct: memUtil, + MemUsedMB: memUsedMB, + MemTotalMB: memTotalMB, + } +} + +// ParseMactopLine parses a single line of mactop headless JSON output into a +// GpuStat for the Apple Silicon integrated GPU. Returns nil if the line cannot +// be parsed. +func ParseMactopLine(line string) *GpuStat { + line = strings.TrimSpace(line) + if line == "" { + return nil + } + + var out mactopOutput + if err := json.Unmarshal([]byte(line), &out); err != nil { + return nil + } + + const toMB = 1024 * 1024 + memUsedMB := int(out.Memory.Used / toMB) + memTotalMB := int(out.Memory.Total / toMB) + + var memUtil float64 + if memTotalMB > 0 { + memUtil = float64(memUsedMB) / float64(memTotalMB) * 100 + } + + name := out.SystemInfo.Name + if name == "" { + name = "Apple GPU" + } + if out.SystemInfo.GPUCoreCount > 0 { + name = fmt.Sprintf("%s (%d-core GPU)", name, out.SystemInfo.GPUCoreCount) + } + + // Unified memory has no dedicated VRAM sensor; use the memory temperature + // group when mactop exposes it. + var vramTempC int + for _, t := range out.Temperatures { + if strings.EqualFold(t.Group, "Memory") { + vramTempC = int(math.Round(t.Avg)) + break + } + } + + // Average fan load across all fans as a percentage of their RPM range. + var fanSpeed float64 + var fanCount int + for _, f := range out.Fans { + if f.MaxRPM > f.MinRPM { + pct := float64(f.RPM-f.MinRPM) / float64(f.MaxRPM-f.MinRPM) * 100 + if pct < 0 { + pct = 0 + } + fanSpeed += pct + fanCount++ + } + } + if fanCount > 0 { + fanSpeed /= float64(fanCount) + } + + return &GpuStat{ + Timestamp: time.Now(), + ID: 0, + Name: name, + TempC: int(math.Round(out.SocMetrics.GPUTemp)), + VramTempC: vramTempC, + GpuUtilPct: out.GPUUsage, + MemUtilPct: memUtil, + MemUsedMB: memUsedMB, + MemTotalMB: memTotalMB, + FanSpeedPct: fanSpeed, + PowerDrawW: out.SocMetrics.GPUPower, + } +} diff --git a/internal/perf/monitor_darwin.go b/internal/perf/monitor_darwin.go index ff6dee1..498903a 100644 --- a/internal/perf/monitor_darwin.go +++ b/internal/perf/monitor_darwin.go @@ -1,7 +1,11 @@ package perf import ( + "bufio" "context" + "fmt" + "os/exec" + "strings" "time" "github.com/mostlygeek/llama-swap/internal/logmon" @@ -11,7 +15,156 @@ import ( ) func getGpuStats(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { - return nil, ErrNotImplemented + if ch, err := tryMactop(ctx, every, logger); err == nil { + logger.Info("using mactop for GPU monitoring") + return ch, nil + } else { + logger.Debugf("mactop: %s", err.Error()) + } + + if ch, err := tryIoreg(ctx, every, logger); err == nil { + logger.Info("using ioreg for GPU monitoring") + return ch, nil + } else { + logger.Debugf("ioreg: %s", err.Error()) + } + + return nil, ErrNoGpuTool +} + +// tryIoreg polls `ioreg -r -c IOGPU -d 1 -f` for Apple Silicon GPU stats. It is +// a fallback for when mactop is not installed. ioreg exposes GPU utilization and +// used memory but not power, temperature, or fan speed. +func tryIoreg(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + if _, err := exec.LookPath("ioreg"); err != nil { + return nil, ErrNoGpuTool + } + + // Verify ioreg actually reports a GPU device before committing to it, so we + // can fall through to ErrNoGpuTool otherwise. + if stat := sampleIoreg(ctx); stat == nil { + return nil, fmt.Errorf("ioreg reported no GPU device") + } + + if every < time.Second { + every = time.Second + } + + ch := make(chan []GpuStat, 1) + + go func() { + defer close(ch) + ticker := time.NewTicker(every) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + stat := sampleIoreg(ctx) + if stat == nil { + continue + } + select { + case ch <- []GpuStat{*stat}: + default: + } + } + } + }() + + return ch, nil +} + +// sampleIoreg runs ioreg once and parses a single GpuStat, or returns nil. +func sampleIoreg(ctx context.Context) *GpuStat { + out, err := exec.CommandContext(ctx, "ioreg", "-r", "-c", "IOGPU", "-d", "1", "-f").Output() + if err != nil { + return nil + } + + var memTotalMB int + if vmStat, err := mem.VirtualMemory(); err == nil { + memTotalMB = int(vmStat.Total / (1024 * 1024)) + } + + return ParseIoregOutput(out, memTotalMB) +} + +// overlayIoregMem replaces a GpuStat's memory fields with the GPU-attributed +// unified memory reported by ioreg. mactop only exposes whole-system memory, so +// without this the mactop and ioreg backends would report different memory +// semantics. It is a no-op when ioreg is unavailable or reports no GPU memory, +// leaving the mactop-supplied values in place. +func overlayIoregMem(ctx context.Context, stat *GpuStat) { + ioStat := sampleIoreg(ctx) + if ioStat == nil { + return + } + stat.MemUsedMB = ioStat.MemUsedMB + stat.MemTotalMB = ioStat.MemTotalMB + stat.MemUtilPct = ioStat.MemUtilPct +} + +// tryMactop streams Apple Silicon GPU stats from mactop's headless mode. +// See https://github.com/metaspartan/mactop. mactop emits one JSON object per +// sample to stdout, which we parse into GpuStat. +func tryMactop(ctx context.Context, every time.Duration, logger *logmon.Monitor) (chan []GpuStat, error) { + if _, err := exec.LookPath("mactop"); err != nil { + return nil, ErrNoGpuTool + } + + // mactop samples power over the interval, so give it at least a second. + intervalMs := int(every.Milliseconds()) + if intervalMs < 1000 { + intervalMs = 1000 + } + + cmd := exec.CommandContext(ctx, "mactop", + "--headless", + "--format", "json", + "--interval", fmt.Sprintf("%d", intervalMs), + ) + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("mactop stdout pipe failed: %w", err) + } + + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("mactop start failed: %w", err) + } + + ch := make(chan []GpuStat, 1) + + go func() { + defer close(ch) + + scanner := bufio.NewScanner(stdout) + // mactop's JSON objects can be large; allow generous line lengths. + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + + stat := ParseMactopLine(line) + if stat != nil { + // mactop only reports whole-system memory; overlay ioreg's + // GPU-attributed unified memory so both backends are consistent. + overlayIoregMem(ctx, stat) + select { + case ch <- []GpuStat{*stat}: + default: + } + } + } + cmd.Wait() + }() + + return ch, nil } func readSysStats() (SysStat, error) { diff --git a/internal/perf/monitor_test.go b/internal/perf/monitor_test.go index 5a500a0..70380f0 100644 --- a/internal/perf/monitor_test.go +++ b/internal/perf/monitor_test.go @@ -264,3 +264,50 @@ func TestParseNvidiaSmiLine_ZeroMemoryTotal(t *testing.T) { require.NotNil(t, stat) assert.Equal(t, 0.0, stat.MemUtilPct) } + +const ioregSample = `+-o AGXAcceleratorG13X + { + "model" = "Apple M1 Pro" + "gpu-core-count" = 16 + "PerformanceStatistics" = {"In use system memory (driver)"=0,"Alloc system memory"=14511046656,"Tiler Utilization %"=34,"recoveryCount"=0,"Renderer Utilization %"=34,"Device Utilization %"=34,"In use system memory"=7688503296} + "IOClass" = "AGXAcceleratorG13X" + }` + +func TestParseIoregOutput_ValidOutput(t *testing.T) { + const memTotalMB = 32768 + + stat := ParseIoregOutput([]byte(ioregSample), memTotalMB) + require.NotNil(t, stat) + + assert.Equal(t, 0, stat.ID) + assert.Equal(t, "Apple M1 Pro (16-core GPU)", stat.Name) + assert.Equal(t, 34.0, stat.GpuUtilPct) + assert.Equal(t, 7688503296/(1024*1024), stat.MemUsedMB) + assert.Equal(t, memTotalMB, stat.MemTotalMB) + assert.InDelta(t, float64(stat.MemUsedMB)/memTotalMB*100, stat.MemUtilPct, 0.01) + // Not exposed by ioreg. + assert.Equal(t, 0, stat.TempC) + assert.Equal(t, 0.0, stat.PowerDrawW) + assert.Equal(t, 0.0, stat.FanSpeedPct) +} + +func TestParseIoregOutput_NoGpuDevice(t *testing.T) { + stat := ParseIoregOutput([]byte("no gpu here"), 32768) + assert.Nil(t, stat) +} + +func TestParseIoregOutput_ZeroMemTotal(t *testing.T) { + stat := ParseIoregOutput([]byte(ioregSample), 0) + require.NotNil(t, stat) + assert.Equal(t, 0.0, stat.MemUtilPct) +} + +func TestParseIoregOutput_MissingModel(t *testing.T) { + const out = `"Device Utilization %"=50,"In use system memory"=1048576` + + stat := ParseIoregOutput([]byte(out), 1024) + require.NotNil(t, stat) + assert.Equal(t, "Apple GPU", stat.Name) + assert.Equal(t, 50.0, stat.GpuUtilPct) + assert.Equal(t, 1, stat.MemUsedMB) +} diff --git a/ui-svelte/src/routes/Performance.svelte b/ui-svelte/src/routes/Performance.svelte index 6f16f31..f6cc78a 100644 --- a/ui-svelte/src/routes/Performance.svelte +++ b/ui-svelte/src/routes/Performance.svelte @@ -402,7 +402,7 @@

This is an experimental feature. Please use discussion #711discussion #771 for instructions and to share feedback.