mirror of
https://github.com/mostlygeek/llama-swap.git
synced 2026-06-09 06:46:34 +02:00
9be9a87fa0
Add Windows specific shutdown code paths so stopping of child processes is more reliable: - stopping llama-swap won't leave behind any child processes it created - uses Job Objects in Windows so the whole llama-swap tree is closed by the os - add procCtx to baseRouter. It replaces shutdownCtx as a signal for managing lifetime state. - shutdownCtx is only used by the router to stop handling new requests during shutdown - improve debug logging to make it easier to trace source of issues Fixes #804 Updates #807
102 lines
2.9 KiB
Go
102 lines
2.9 KiB
Go
package router
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
|
|
"github.com/mostlygeek/llama-swap/internal/config"
|
|
"github.com/mostlygeek/llama-swap/internal/logmon"
|
|
"github.com/mostlygeek/llama-swap/internal/process"
|
|
)
|
|
|
|
type Matrix struct {
|
|
*baseRouter
|
|
}
|
|
|
|
func NewMatrix(conf config.Config, proxylog, upstreamlog *logmon.Monitor) (*Matrix, error) {
|
|
if conf.Matrix == nil {
|
|
return nil, fmt.Errorf("matrix router requires a matrix configuration")
|
|
}
|
|
|
|
planner := &matrixPlanner{
|
|
solver: newMatrixSolver(conf.ExpandedSets, conf.Matrix.ResolvedEvictCosts()),
|
|
logger: proxylog,
|
|
}
|
|
|
|
// Build a process for every model in the config. Any model can run alone
|
|
// even if it is not part of a set; this mirrors proxy.NewMatrix.
|
|
processes := make(map[string]process.Process, len(conf.Models))
|
|
base := newBaseRouter("matrix", conf, processes, planner, proxylog)
|
|
planner.processes = processes
|
|
|
|
for mid, modelCfg := range conf.Models {
|
|
procLog := logmon.NewWriter(upstreamlog)
|
|
p, err := process.New(base.procCtx, mid, modelCfg, procLog, proxylog)
|
|
if err != nil {
|
|
base.shutdownFn()
|
|
base.procCancel()
|
|
return nil, fmt.Errorf("creating process for %q: %w", mid, err)
|
|
}
|
|
processes[mid] = p
|
|
}
|
|
|
|
r := &Matrix{baseRouter: base}
|
|
go base.run()
|
|
return r, nil
|
|
}
|
|
|
|
// matrixPlanner decides evictions by asking the matrix solver against the
|
|
// current running set.
|
|
type matrixPlanner struct {
|
|
solver *matrixSolver
|
|
processes map[string]process.Process
|
|
logger *logmon.Monitor
|
|
}
|
|
|
|
func (p *matrixPlanner) EvictionFor(target string, alsoRunning []string) []string {
|
|
return p.solver.Solve(target, p.runningSet(alsoRunning)).Evict
|
|
}
|
|
|
|
func (p *matrixPlanner) OnSwapStart(target string) {
|
|
running := p.runningModels()
|
|
result := p.solver.Solve(target, running)
|
|
switch {
|
|
case len(result.Evict) > 0:
|
|
p.logger.Infof("matrix: model=%s set=%s dsl=%q evict=%v target=%v cost=%d",
|
|
target, result.SetName, result.DSL, result.Evict, result.TargetSet, result.TotalCost)
|
|
case len(running) == 0:
|
|
p.logger.Infof("matrix: model=%s starting (no models running)", target)
|
|
default:
|
|
p.logger.Debugf("matrix: model=%s already running in set=%s dsl=%q", target, result.SetName, result.DSL)
|
|
}
|
|
}
|
|
|
|
func (p *matrixPlanner) runningModels() []string {
|
|
return p.runningSet(nil)
|
|
}
|
|
|
|
// runningSet returns the union of live processes (State != Stopped/Shutdown)
|
|
// and any extra IDs the baseRouter has already committed to loading but which
|
|
// the process state machine has not yet reflected.
|
|
func (p *matrixPlanner) runningSet(alsoRunning []string) []string {
|
|
seen := make(map[string]struct{}, len(p.processes))
|
|
var running []string
|
|
for id, proc := range p.processes {
|
|
st := proc.State()
|
|
if st == process.StateStopped || st == process.StateShutdown {
|
|
continue
|
|
}
|
|
seen[id] = struct{}{}
|
|
running = append(running, id)
|
|
}
|
|
for _, id := range alsoRunning {
|
|
if _, dup := seen[id]; dup {
|
|
continue
|
|
}
|
|
seen[id] = struct{}{}
|
|
running = append(running, id)
|
|
}
|
|
sort.Strings(running)
|
|
return running
|
|
}
|