Files
llama-swap/internal/perf/monitor_test.go
T
Benson Wong 29d3d9ba20
UI Tests / run-tests (push) Failing after 11m11s
Linux CI / run-tests (push) Failing after 14m56s
Windows CI / run-tests (push) Has been cancelled
perf: add macOS GPU monitoring via mactop and ioreg (#816)
Implement performance monitoring on OSX for Apple Silicon hardware. 

The implementation uses a combination of mactop and ioreg. If mactop is
installed (`brew install mactop`) it is used in a headless cli mode to
stream usage metrics. mactop hooks into unpublished(?) C based APIs in
OSX. Rather than introduce a cgo dependency into llama-swap's build
chain only for darwin I opted to go the external process route.

ioreg, which comes bundled with OSX is used as the fallback. It does not
provide temperature and power usage data but is able to show accurate
GPU and memory utilization.

Updates #771, #814
2026-06-03 21:51:03 -07:00

314 lines
7.9 KiB
Go

package perf
import (
"io"
"sync"
"testing"
"time"
"github.com/mostlygeek/llama-swap/internal/config"
"github.com/mostlygeek/llama-swap/internal/logmon"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func newTestLogger() *logmon.Monitor {
return logmon.NewWriter(io.Discard)
}
func TestNew_DefaultConfig(t *testing.T) {
logger := newTestLogger()
m, err := New(config.PerformanceConfig{}, logger)
require.NoError(t, err)
require.NotNil(t, m)
assert.Equal(t, 100*time.Millisecond, m.conf.Every)
}
func TestNew_CustomConfig(t *testing.T) {
logger := newTestLogger()
cfg := config.PerformanceConfig{
Every: 500 * time.Millisecond,
}
m, err := New(cfg, logger)
require.NoError(t, err)
assert.Equal(t, 500*time.Millisecond, m.conf.Every)
}
func TestNew_NilLogger(t *testing.T) {
m, err := New(config.PerformanceConfig{}, nil)
assert.Error(t, err)
assert.Nil(t, m)
}
func TestNew_BelowMinimumConfig(t *testing.T) {
logger := newTestLogger()
cfg := config.PerformanceConfig{
Every: 1 * time.Millisecond,
}
m, err := New(cfg, logger)
require.NoError(t, err)
assert.Equal(t, 100*time.Millisecond, m.conf.Every)
}
func TestSubscribe_ReturnsChannels(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
sysCh, gpuCh, unsub := m.Subscribe()
defer unsub()
assert.NotNil(t, sysCh)
assert.NotNil(t, gpuCh)
assert.NotNil(t, unsub)
}
func TestSubscribe_UnsubscribeRemovesListeners(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
_, _, unsub := m.Subscribe()
m.mutex.RLock()
assert.Len(t, m.sysListeners, 1)
assert.Len(t, m.gpuListeners, 1)
m.mutex.RUnlock()
unsub()
m.mutex.RLock()
assert.Len(t, m.sysListeners, 0)
assert.Len(t, m.gpuListeners, 0)
m.mutex.RUnlock()
}
func TestSubscribe_MultipleSubscriptions(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
sysCh1, gpuCh1, unsub1 := m.Subscribe()
sysCh2, gpuCh2, unsub2 := m.Subscribe()
defer unsub1()
defer unsub2()
assert.NotEqual(t, sysCh1, sysCh2)
assert.NotEqual(t, gpuCh1, gpuCh2)
m.mutex.RLock()
assert.Len(t, m.sysListeners, 2)
assert.Len(t, m.gpuListeners, 2)
m.mutex.RUnlock()
}
func TestCurrent_EmptyByDefault(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
sysStats, gpuStats := m.Current()
assert.Empty(t, sysStats)
assert.Empty(t, gpuStats)
}
func TestCurrent_ReturnsCopies(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
now := time.Now()
m.sysRing.Push(SysStat{Timestamp: now, MemTotalMB: 1024})
m.gpuRing.Push([]GpuStat{{Timestamp: now, ID: 0, Name: "gpu0"}})
sysStats, gpuStats := m.Current()
assert.Len(t, sysStats, 1)
assert.Len(t, gpuStats, 1)
assert.Equal(t, 1024, sysStats[0].MemTotalMB)
assert.Equal(t, "gpu0", gpuStats[0].Name)
// modifying the returned slice should not affect the original
sysStats[0].MemTotalMB = 999
original, _ := m.Current()
assert.Equal(t, 1024, original[0].MemTotalMB)
}
func TestStart_CollectsSysStats(t *testing.T) {
if testing.Short() {
t.Skip("skipping slow test")
}
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
require.NoError(t, err)
m.Start()
time.Sleep(350 * time.Millisecond)
m.Stop()
sysStats, _ := m.Current()
assert.NotEmpty(t, sysStats, "expected sys stats to be collected")
}
func TestStart_StopStopsGoroutines(t *testing.T) {
if testing.Short() {
t.Skip("skipping slow test")
}
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
require.NoError(t, err)
m.Start()
if m.stopCancel == nil {
t.Error("stopCancel should not be nil after Start()")
}
m.Stop()
if m.stopCancel != nil {
t.Error("stopCancel should be nil after Stop()")
}
}
func TestStart_SubscriberReceivesStats(t *testing.T) {
if testing.Short() {
t.Skip("skipping slow test")
}
m, err := New(config.PerformanceConfig{Every: 100 * time.Millisecond}, newTestLogger())
require.NoError(t, err)
sysCh, _, unsub := m.Subscribe()
defer unsub()
m.Start()
defer m.Stop()
select {
case s := <-sysCh:
assert.False(t, s.Timestamp.IsZero())
assert.NotEmpty(t, s.CpuUtilPerCore)
case <-time.After(500 * time.Millisecond):
t.Fatal("timed out waiting for sys stats")
}
}
func TestReadSysStats(t *testing.T) {
s, err := ReadSysStats()
require.NoError(t, err)
assert.False(t, s.Timestamp.IsZero())
assert.NotEmpty(t, s.CpuUtilPerCore)
assert.Greater(t, s.MemTotalMB, 0)
}
func TestCurrent_ConcurrentAccess(t *testing.T) {
m, err := New(config.PerformanceConfig{}, newTestLogger())
require.NoError(t, err)
m.sysRing.Push(SysStat{Timestamp: time.Now(), MemTotalMB: 1024})
m.gpuRing.Push([]GpuStat{{Timestamp: time.Now(), ID: 0}})
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
sys, gpu := m.Current()
assert.Len(t, sys, 1)
assert.Len(t, gpu, 1)
}()
}
wg.Wait()
}
func TestParseNvidiaSmiLine_ValidLine(t *testing.T) {
line := "0, NVIDIA GeForce RTX 3080, GPU-12345678-1234-1234-1234-123456789abc, 65, 80, 8192, 10240, 75, 250"
stat := ParseNvidiaSmiLine(line)
require.NotNil(t, stat)
assert.Equal(t, 0, stat.ID)
assert.Equal(t, "NVIDIA GeForce RTX 3080", stat.Name)
assert.Equal(t, "GPU-12345678-1234-1234-1234-123456789abc", stat.UUID)
assert.Equal(t, 65, stat.TempC)
assert.Equal(t, 80.0, stat.GpuUtilPct)
assert.Equal(t, 8192, stat.MemUsedMB)
assert.Equal(t, 10240, stat.MemTotalMB)
assert.Equal(t, 75.0, stat.FanSpeedPct)
assert.Equal(t, 250.0, stat.PowerDrawW)
assert.InDelta(t, 80.0, stat.MemUtilPct, 0.01)
}
func TestParseNvidiaSmiLine_ShortLine(t *testing.T) {
line := "0, NVIDIA GPU, GPU-123"
stat := ParseNvidiaSmiLine(line)
assert.Nil(t, stat)
}
func TestParseNvidiaSmiLine_MissingFields(t *testing.T) {
line := "0, NVIDIA GPU, GPU-123, 65, 80, 8192, 10240, 75"
stat := ParseNvidiaSmiLine(line)
assert.Nil(t, stat)
}
func TestParseNvidiaSmiLine_ZeroMemoryTotal(t *testing.T) {
line := "0, NVIDIA GPU, GPU-123, 65, 80, 0, 0, 75, 250"
stat := ParseNvidiaSmiLine(line)
require.NotNil(t, stat)
assert.Equal(t, 0.0, stat.MemUtilPct)
}
const ioregSample = `+-o AGXAcceleratorG13X <class AGXAcceleratorG13X, id 0x1000009a1, registered, matched, active, busy 0 (39191 ms), retain 108>
{
"model" = "Apple M1 Pro"
"gpu-core-count" = 16
"PerformanceStatistics" = {"In use system memory (driver)"=0,"Alloc system memory"=14511046656,"Tiler Utilization %"=34,"recoveryCount"=0,"Renderer Utilization %"=34,"Device Utilization %"=34,"In use system memory"=7688503296}
"IOClass" = "AGXAcceleratorG13X"
}`
func TestParseIoregOutput_ValidOutput(t *testing.T) {
const memTotalMB = 32768
stat := ParseIoregOutput([]byte(ioregSample), memTotalMB)
require.NotNil(t, stat)
assert.Equal(t, 0, stat.ID)
assert.Equal(t, "Apple M1 Pro (16-core GPU)", stat.Name)
assert.Equal(t, 34.0, stat.GpuUtilPct)
assert.Equal(t, 7688503296/(1024*1024), stat.MemUsedMB)
assert.Equal(t, memTotalMB, stat.MemTotalMB)
assert.InDelta(t, float64(stat.MemUsedMB)/memTotalMB*100, stat.MemUtilPct, 0.01)
// Not exposed by ioreg.
assert.Equal(t, 0, stat.TempC)
assert.Equal(t, 0.0, stat.PowerDrawW)
assert.Equal(t, 0.0, stat.FanSpeedPct)
}
func TestParseIoregOutput_NoGpuDevice(t *testing.T) {
stat := ParseIoregOutput([]byte("no gpu here"), 32768)
assert.Nil(t, stat)
}
func TestParseIoregOutput_ZeroMemTotal(t *testing.T) {
stat := ParseIoregOutput([]byte(ioregSample), 0)
require.NotNil(t, stat)
assert.Equal(t, 0.0, stat.MemUtilPct)
}
func TestParseIoregOutput_MissingModel(t *testing.T) {
const out = `"Device Utilization %"=50,"In use system memory"=1048576`
stat := ParseIoregOutput([]byte(out), 1024)
require.NotNil(t, stat)
assert.Equal(t, "Apple GPU", stat.Name)
assert.Equal(t, 50.0, stat.GpuUtilPct)
assert.Equal(t, 1, stat.MemUsedMB)
}