llama-swap/proxy/matrix_test.go

package proxy

import (
	"net/http"
	"net/http/httptest"
	"runtime"
	"testing"
	"time"

	"github.com/mostlygeek/llama-swap/internal/config"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// Helper to build expanded sets for solver tests
func makeExpandedSets(sets ...struct {
	name   string
	models []string
}) []config.ExpandedSet {
	var result []config.ExpandedSet
	for _, s := range sets {
		result = append(result, config.ExpandedSet{
			SetName: s.name,
			Models:  s.models,
		})
	}
	return result
}

func es(name string, models ...string) struct {
	name   string
	models []string
} {
	return struct {
		name   string
		models []string
	}{name, models}
}

func TestMatrixSolver_AlreadyRunning(t *testing.T) {
	solver := NewMatrixSolver(
		makeExpandedSets(es("s1", "a", "b")),
		nil,
	)

	result, err := solver.Solve("a", []string{"a"})
	require.NoError(t, err)
	assert.Empty(t, result.Evict)
	assert.Equal(t, []string{"a"}, result.TargetSet)
	assert.Equal(t, "s1", result.SetName)
}

func TestMatrixSolver_NotInAnySet_RunsAlone(t *testing.T) {
	solver := NewMatrixSolver(
		makeExpandedSets(es("s1", "a", "b")),
		nil,
	)

	// Model "c" not in any set
	result, err := solver.Solve("c", []string{"a", "b"})
	require.NoError(t, err)
	assert.ElementsMatch(t, []string{"a", "b"}, result.Evict)
	assert.Equal(t, []string{"c"}, result.TargetSet)
}

func TestMatrixSolver_NotInAnySet_NothingRunning(t *testing.T) {
	solver := NewMatrixSolver(
		makeExpandedSets(es("s1", "a", "b")),
		nil,
	)

	result, err := solver.Solve("c", []string{})
	require.NoError(t, err)
	assert.Empty(t, result.Evict)
	assert.Equal(t, []string{"c"}, result.TargetSet)
}

func TestMatrixSolver_SingleSet_EvictsNonMembers(t *testing.T) {
	// Set: [a, b]. Request a when b and c are running.
	solver := NewMatrixSolver(
		makeExpandedSets(es("s1", "a", "b")),
		nil,
	)

	result, err := solver.Solve("a", []string{"b", "c"})
	require.NoError(t, err)
	// c is not in the set, so it gets evicted. b is in the set, so it stays.
	assert.Equal(t, []string{"c"}, result.Evict)
	assert.Equal(t, []string{"a", "b"}, result.TargetSet)
}

func TestMatrixSolver_PicksLowestCost(t *testing.T) {
	// Two sets containing model "a":
	// s1: [a, v]  — if v is running, cost=0; if L is running, cost=30
	// s2: [a, L]  — if L is running, cost=0; if v is running, cost=50
	solver := NewMatrixSolver(
		makeExpandedSets(
			es("s1", "a", "v"),
			es("s2", "a", "L"),
		),
		map[string]int{"v": 50, "L": 30},
	)

	// v is running. Switching to a:
	// s1 cost: v is in s1, so 0
	// s2 cost: v is NOT in s2, so 50
	// => pick s1
	result, err := solver.Solve("a", []string{"v"})
	require.NoError(t, err)
	assert.Empty(t, result.Evict)
	assert.Equal(t, []string{"a", "v"}, result.TargetSet)

	// L is running. Switching to a:
	// s1 cost: L is NOT in s1, so 30
	// s2 cost: L is in s2, so 0
	// => pick s2
	result, err = solver.Solve("a", []string{"L"})
	require.NoError(t, err)
	assert.Empty(t, result.Evict)
	assert.Equal(t, []string{"a", "L"}, result.TargetSet)
}

func TestMatrixSolver_TieBreakingByDefinitionOrder(t *testing.T) {
	// Two sets with identical cost. Definition order should win.
	solver := NewMatrixSolver(
		makeExpandedSets(
			es("s1", "a", "x"),
			es("s2", "a", "y"),
		),
		nil,
	)

	// Nothing running, both sets cost 0. s1 is first.
	result, err := solver.Solve("a", []string{})
	require.NoError(t, err)
	assert.Empty(t, result.Evict)
	assert.Equal(t, []string{"a", "x"}, result.TargetSet)
}

func TestMatrixSolver_EvictCostPreservesExpensive(t *testing.T) {
	// Model "v" costs 50 to evict, "m" costs 1 (default).
	// Sets: [g,v], [g,m]
	// Running: v, m. Request g.
	// s1=[g,v]: evict m (cost 1), keep v
	// s2=[g,m]: evict v (cost 50), keep m
	// => pick s1
	solver := NewMatrixSolver(
		makeExpandedSets(
			es("s1", "g", "v"),
			es("s2", "g", "m"),
		),
		map[string]int{"v": 50},
	)

	result, err := solver.Solve("g", []string{"v", "m"})
	require.NoError(t, err)
	assert.Equal(t, []string{"m"}, result.Evict)
	assert.Equal(t, []string{"g", "v"}, result.TargetSet)
}

func TestMatrixSolver_NothingRunning(t *testing.T) {
	solver := NewMatrixSolver(
		makeExpandedSets(
			es("s1", "g", "v"),
			es("s2", "q", "v"),
		),
		nil,
	)

	result, err := solver.Solve("g", []string{})
	require.NoError(t, err)
	assert.Empty(t, result.Evict)
	assert.Equal(t, []string{"g", "v"}, result.TargetSet)
}

// TestMatrix_ProxyRequestSwapRaceAgainstFastPath verifies that an eviction
// cannot stop a process while an in-flight ProxyRequest for that process is
// still in the [m.Unlock, Process.inFlightRequests.Add(1)] window. Without
// matrix-level inflight tracking, the eviction's Stop() races with the
// pending request and kills it mid-start.
func TestMatrix_ProxyRequestSwapRaceAgainstFastPath(t *testing.T) {
	cfg := config.Config{
		HealthCheckTimeout: 15,
		Models: map[string]config.ModelConfig{
			"model1": getTestSimpleResponderConfig("model1"),
			"model2": getTestSimpleResponderConfig("model2"),
		},
		ExpandedSets: []config.ExpandedSet{
			{SetName: "s1", Models: []string{"model1"}},
			{SetName: "s2", Models: []string{"model2"}},
		},
		Matrix: &config.MatrixConfig{},
	}

	m := NewMatrix(cfg, testLogger, testLogger)
	defer m.StopProcesses(StopImmediately)

	// Bypass real subprocesses so the test is fast and deterministic.
	m.processes["model1"].testHandler = newTestHandler("model1")
	m.processes["model2"].testHandler = newTestHandler("model2")

	// Prime: run a request through model1 so it reaches StateReady and
	// subsequent requests take the no-eviction path.
	primeReq := httptest.NewRequest("POST", "/v1/chat/completions", nil)
	primeW := httptest.NewRecorder()
	require.NoError(t, m.ProxyRequest("model1", primeW, primeReq))
	require.Equal(t, http.StatusOK, primeW.Code)
	require.Equal(t, StateReady, m.processes["model1"].CurrentState())
	require.Equal(t, StateStopped, m.processes["model2"].CurrentState())

	// Install fast-path hook that signals arrival and waits for release.
	// This parks R2 at the race window — after m.Lock is released but
	// before Process.inFlightRequests.Add(1).
	r2Reached := make(chan struct{})
	r2Release := make(chan struct{})
	m.testDelayFastPath = func() {
		close(r2Reached)
		<-r2Release
	}

	// R2: no-eviction request for model1. Will pause at the hook.
	r2Done := make(chan struct{})
	w2 := httptest.NewRecorder()
	go func() {
		defer close(r2Done)
		req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
		assert.NoError(t, m.ProxyRequest("model1", w2, req))
	}()

	// Deterministically wait for R2 to reach the race window.
	<-r2Reached

	// R3: request for model2 which requires evicting model1. Must wait for
	// R2 to finish before touching model1.
	r3Done := make(chan struct{})
	w3 := httptest.NewRecorder()
	go func() {
		defer close(r3Done)
		req := httptest.NewRequest("POST", "/v1/chat/completions", nil)
		assert.NoError(t, m.ProxyRequest("model2", w3, req))
	}()

	// Spin until R3 has acquired m.Lock and entered the eviction path. In
	// the fixed code, R3 then blocks on m.inflight.Wait() while still
	// holding the lock, so TryLock keeps failing.
	for m.TryLock() {
		m.Unlock()
		runtime.Gosched()
	}

	// Bounded poll: give R3 a chance to demonstrate the bug by mutating
	// state. In the fixed code R3 is blocked and nothing changes; in the
	// buggy code R3 will Stop() model1 and start model2 within microseconds.
	deadline := time.Now().Add(100 * time.Millisecond)
	for time.Now().Before(deadline) {
		if m.processes["model1"].CurrentState() != StateReady ||
			m.processes["model2"].CurrentState() != StateStopped {
			break
		}
		done := false
		select {
		case <-r3Done:
			done = true
		default:
		}
		if done {
			break
		}
		runtime.Gosched()
	}

	// Invariant: R3 must be blocked while R2 is still in flight.
	select {
	case <-r3Done:
		t.Fatal("eviction completed while in-flight request was still pending — race not prevented")
	default:
	}
	assert.Equal(t, StateReady, m.processes["model1"].CurrentState(),
		"model1 must stay Ready while an in-flight request is pending")
	assert.Equal(t, StateStopped, m.processes["model2"].CurrentState(),
		"model2 must not be started until R2 finishes and model1 is evicted")

	// Release R2 and let both requests finish.
	close(r2Release)
	<-r2Done
	<-r3Done

	assert.Equal(t, http.StatusOK, w2.Code)
	assert.Contains(t, w2.Body.String(), "model1")
	assert.Equal(t, http.StatusOK, w3.Code)
	assert.Contains(t, w3.Body.String(), "model2")
}

func TestMatrixSolver_FullScenario(t *testing.T) {
	// Simulates the example config:
	// standard: [g,v], [q,v], [m,v]
	// with_rerank: [g,v,e], [q,v,e]
	// creative: [g,sd], [q,sd]
	// full: [L]
	solver := NewMatrixSolver(
		makeExpandedSets(
			es("standard", "g", "v"),
			es("standard", "q", "v"),
			es("standard", "m", "v"),
			es("with_rerank", "e", "g", "v"),
			es("with_rerank", "e", "q", "v"),
			es("creative", "g", "sd"),
			es("creative", "q", "sd"),
			es("full", "L"),
		),
		map[string]int{"v": 50, "L": 30, "whisper": 10},
	)

	// Running: g, v. Request q.
	// standard[q,v]: evict g (cost 1), keep v. Total: 1.
	// with_rerank[q,v,e]: evict g (cost 1), keep v. Total: 1.
	// => tie, pick first by definition order = standard[q,v]
	result, err := solver.Solve("q", []string{"g", "v"})
	require.NoError(t, err)
	assert.Equal(t, []string{"g"}, result.Evict)
	assert.Equal(t, []string{"q", "v"}, result.TargetSet)

	// Running: g, v. Request L.
	// full[L]: evict g (cost 1) + v (cost 50). Total: 51.
	// Only one set contains L, so pick it.
	result, err = solver.Solve("L", []string{"g", "v"})
	require.NoError(t, err)
	assert.ElementsMatch(t, []string{"g", "v"}, result.Evict)
	assert.Equal(t, []string{"L"}, result.TargetSet)

	// Running: g, v. Request sd.
	// creative[g,sd]: evict v (cost 50). Total: 50.
	// creative[q,sd]: evict g (cost 1) + v (cost 50). Total: 51.
	// => pick creative[g,sd]
	result, err = solver.Solve("sd", []string{"g", "v"})
	require.NoError(t, err)
	assert.Equal(t, []string{"v"}, result.Evict)
	assert.Equal(t, []string{"g", "sd"}, result.TargetSet)

	// Running: q, v, e. Request g.
	// standard[g,v]: evict q (1) + e (1). Total: 2.
	// with_rerank[g,v,e]: evict q (1). Total: 1.
	// creative[g,sd]: evict q (1) + v (50) + e (1). Total: 52.
	// => pick with_rerank[g,v,e]
	result, err = solver.Solve("g", []string{"e", "q", "v"})
	require.NoError(t, err)
	assert.Equal(t, []string{"q"}, result.Evict)
	assert.Equal(t, []string{"e", "g", "v"}, result.TargetSet)
}