proxy: add configurable HTTP timeouts for models and peers (#619)

Add configurable HTTP timeout settings to both models and peers to support installations that requires longer timeouts than the current hardcoded defaults.

Closes #618
This commit is contained in:
Ron M
2026-04-06 04:30:27 -07:00
committed by GitHub
parent 981910d734
commit a37b4866d8
13 changed files with 437 additions and 15 deletions
+1
View File
@@ -21,6 +21,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m
- Follow test naming conventions like `TestProxyManager_<test name>`, `TestProcessGroup_<test name>`, etc.
- Use `go test -v -run <name pattern for new tests>` to run any new tests you've written.
- Run `gofmt -l .` before committing to verify formatting. Fix any reported files with `gofmt -w <file>`.
- Use `make test-dev` after running new tests for a quick over all test run. This runs `go test` and `staticcheck`. Fix any static checking errors. Use this only when changes are made to any code under the `proxy/` directory
- Use `make test-all` before completing work. This includes long running concurrency tests.
+71
View File
@@ -39,6 +39,43 @@
},
"default": {},
"description": "A dictionary of string substitutions. Macros are reusable snippets used in model cmd, cmdStop, proxy, checkEndpoint, filters.stripParams. Macro names must be <64 chars, match ^[a-zA-Z0-9_-]+$, and not be PORT or MODEL_ID. Values can be string, number, or boolean. Macros can reference other macros defined before them."
},
"timeouts": {
"type": "object",
"properties": {
"connect": {
"type": "integer",
"minimum": 0,
"default": 30,
"description": "TCP connection timeout in seconds. Set to 0 to disable (not recommended)."
},
"responseHeader": {
"type": "integer",
"minimum": 0,
"default": 60,
"description": "Time to wait for response headers in seconds. Set to 0 to disable (not recommended)."
},
"tlsHandshake": {
"type": "integer",
"minimum": 0,
"default": 10,
"description": "TLS handshake timeout in seconds. Set to 0 to disable (not recommended)."
},
"expectContinue": {
"type": "integer",
"minimum": 0,
"default": 1,
"description": "Expect-Continue timeout in seconds. Set to 0 to disable (not recommended)."
},
"idleConn": {
"type": "integer",
"minimum": 0,
"default": 90,
"description": "Idle connection timeout in seconds. Set to 0 to disable (not recommended)."
}
},
"additionalProperties": false,
"description": "Timeout settings for proxy connections."
}
},
"properties": {
@@ -241,6 +278,9 @@
"type": "boolean",
"default": false,
"description": "If true the model will not show up in /v1/models responses. It can still be used as normal in API requests."
},
"timeouts": {
"$ref": "#/definitions/timeouts"
}
}
}
@@ -367,6 +407,37 @@
"additionalProperties": false,
"default": {},
"description": "Dictionary of filter settings for peer requests. Supports stripParams and setParams."
},
"timeouts": {
"type": "object",
"properties": {
"connect": {
"type": "integer",
"minimum": 1,
"default": 30,
"description": "TCP connection timeout in seconds."
},
"responseHeader": {
"type": "integer",
"minimum": 1,
"default": 60,
"description": "Time to wait for response headers in seconds."
},
"tlsHandshake": {
"type": "integer",
"minimum": 1,
"default": 10,
"description": "TLS handshake timeout in seconds."
},
"idleConn": {
"type": "integer",
"minimum": 1,
"default": 90,
"description": "Idle connection timeout in seconds."
}
},
"additionalProperties": false,
"description": "Timeout settings for proxy connections to this peer."
}
}
},
+25
View File
@@ -284,6 +284,21 @@ models:
# - optional, default: undefined (use global setting)
sendLoadingState: false
# timeouts: configure proxy connection timeouts for this model
# - optional, defaults shown below
# - useful for models running on slower hardware that need longer timeouts
# - connect: TCP connection timeout in seconds
# - responseHeader: time to wait for response headers in seconds
# (increasing this helps avoid 502 errors on slow hardware)
# - tlsHandshake: TLS handshake timeout in seconds
# - idleConn: idle connection timeout in seconds
# - set any value to 0 to disable that timeout (not recommended)
timeouts:
connect: 30
responseHeader: 60
tlsHandshake: 10
idleConn: 90
# Unlisted model example:
"qwen-unlisted":
# unlisted: boolean, true or false
@@ -426,6 +441,16 @@ peers:
- z-ai/glm-4.7
- moonshotai/kimi-k2-0905
- minimax/minimax-m2.1
# timeouts: configure proxy connection timeouts for this peer
# - optional, defaults shown below
# - useful when the peer runs on slower hardware
# - set any value to 0 to disable that timeout (not recommended)
timeouts:
connect: 30
responseHeader: 60
tlsHandshake: 10
idleConn: 90
# filters: a dictionary of filter settings for peer requests
# - optional, default: empty dictionary
# - same capabilities as model filters (stripParams, setParams)
+34
View File
@@ -319,6 +319,29 @@ models:
# - recommended to be omitted and the default used
concurrencyLimit: 0
# timeouts: configure proxy connection timeouts for this model
# - optional, defaults shown below
# - useful for models on slower hardware that need longer timeouts
# - increase responseHeader to avoid "timeout awaiting response headers" errors
# - set any value to 0 to disable that timeout (not recommended)
timeouts:
# connect: TCP connection timeout in seconds
# - default: 30
connect: 30
# responseHeader: time to wait for response headers in seconds
# - default: 60
# - for slow image generation or large models, consider increasing to 300+ seconds
responseHeader: 60
# tlsHandshake: TLS handshake timeout in seconds
# - default: 10
tlsHandshake: 10
# idleConn: idle connection timeout in seconds
# - default: 90
idleConn: 90
# sendLoadingState: overrides the global sendLoadingState setting for this model
# - optional, default: undefined (use global setting)
sendLoadingState: false
@@ -444,6 +467,17 @@ peers:
# - required
# - requested path to llama-swap will be appended to the end of the proxy value
proxy: http://192.168.1.23
# timeouts: configure proxy connection timeouts for this peer
# - optional, defaults shown below
# - useful when the peer runs on slower hardware
# - set any value to 0 to disable that timeout (not recommended)
timeouts:
connect: 30
responseHeader: 60
tlsHandshake: 10
idleConn: 90
# models: a list of models served by the peer
# - required
models:
+28
View File
@@ -187,6 +187,13 @@ groups:
Name: "Model 1",
Description: "This is model 1",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
"model2": {
Cmd: "path/to/server --arg1 one",
@@ -195,6 +202,13 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
"model3": {
Cmd: "path/to/cmd --arg1 one",
@@ -203,6 +217,13 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
"model4": {
Cmd: "path/to/cmd --arg1 one",
@@ -211,6 +232,13 @@ groups:
Aliases: []string{},
Env: []string{},
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
},
HealthCheckTimeout: 15,
+106
View File
@@ -6,6 +6,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestConfig_GroupMemberIsUnique(t *testing.T) {
@@ -1438,3 +1439,108 @@ models:
})
}
func TestConfig_TimeoutsParsing(t *testing.T) {
configYaml := `
models:
model1:
cmd: test-server --port ${PORT}
timeouts:
connect: 45
responseHeader: 120
`
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
require.NoError(t, err)
modelConfig, found := config.Models["model1"]
require.True(t, found, "model1 should exist in config")
assert.Equal(t, 45, modelConfig.Timeouts.Connect)
assert.Equal(t, 120, modelConfig.Timeouts.ResponseHeader)
}
func TestConfig_TimeoutsDefaults(t *testing.T) {
configYaml := `
models:
model1:
cmd: test-server --port ${PORT}
`
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
require.NoError(t, err)
modelConfig, found := config.Models["model1"]
require.True(t, found, "model1 should exist in config")
// Default values should be set during unmarshaling
assert.Equal(t, 30, modelConfig.Timeouts.Connect)
assert.Equal(t, 60, modelConfig.Timeouts.ResponseHeader)
assert.Equal(t, 10, modelConfig.Timeouts.TLSHandshake)
assert.Equal(t, 1, modelConfig.Timeouts.ExpectContinue)
assert.Equal(t, 90, modelConfig.Timeouts.IdleConn)
}
func TestConfig_TimeoutsZeroAllowed(t *testing.T) {
configYaml := `
models:
model1:
cmd: test-server --port ${PORT}
timeouts:
connect: 0
responseHeader: 0
`
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
require.NoError(t, err)
modelConfig, found := config.Models["model1"]
require.True(t, found, "model1 should exist in config")
// Explicit 0 should be preserved (disables timeout)
assert.Equal(t, 0, modelConfig.Timeouts.Connect)
assert.Equal(t, 0, modelConfig.Timeouts.ResponseHeader)
}
func TestConfig_PeerTimeoutsParsing(t *testing.T) {
configYaml := `
peers:
peer1:
proxy: http://example.com
models: [model1]
timeouts:
connect: 45
responseHeader: 120
`
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
require.NoError(t, err)
peerConfig, found := config.Peers["peer1"]
require.True(t, found, "peer1 should exist in config")
assert.Equal(t, 45, peerConfig.Timeouts.Connect)
assert.Equal(t, 120, peerConfig.Timeouts.ResponseHeader)
}
func TestConfig_PeerTimeoutsDefaults(t *testing.T) {
configYaml := `
peers:
peer1:
proxy: http://example.com
models: [model1]
`
config, err := LoadConfigFromReader(strings.NewReader(configYaml))
require.NoError(t, err)
peerConfig, found := config.Peers["peer1"]
require.True(t, found, "peer1 should exist in config")
// Default values should be set during unmarshaling
assert.Equal(t, 30, peerConfig.Timeouts.Connect)
assert.Equal(t, 60, peerConfig.Timeouts.ResponseHeader)
assert.Equal(t, 10, peerConfig.Timeouts.TLSHandshake)
assert.Equal(t, 1, peerConfig.Timeouts.ExpectContinue)
assert.Equal(t, 90, peerConfig.Timeouts.IdleConn)
}
+28
View File
@@ -173,6 +173,13 @@ groups:
Env: []string{"VAR1=value1", "VAR2=value2"},
CheckEndpoint: "/health",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
"model2": {
Cmd: "path/to/server --arg1 one",
@@ -182,6 +189,13 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
"model3": {
Cmd: "path/to/cmd --arg1 one",
@@ -191,6 +205,13 @@ groups:
Env: []string{},
CheckEndpoint: "/",
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
"model4": {
Cmd: "path/to/cmd --arg1 one",
@@ -200,6 +221,13 @@ groups:
Aliases: []string{},
Env: []string{},
SendLoadingState: &modelLoadingState,
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
},
},
HealthCheckTimeout: 15,
+19
View File
@@ -9,6 +9,15 @@ const (
MODEL_CONFIG_DEFAULT_TTL = -1
)
// TimeoutsConfig holds timeout settings for proxy connections
type TimeoutsConfig struct {
Connect int `yaml:"connect"` // seconds, 0 = no timeout (not recommended)
ResponseHeader int `yaml:"responseHeader"` // seconds, 0 = no timeout (not recommended)
TLSHandshake int `yaml:"tlsHandshake"` // seconds, 0 = no timeout (not recommended)
ExpectContinue int `yaml:"expectContinue"` // seconds, 0 = no timeout (not recommended)
IdleConn int `yaml:"idleConn"` // seconds, 0 = no timeout (not recommended)
}
type ModelConfig struct {
Cmd string `yaml:"cmd"`
CmdStop string `yaml:"cmdStop"`
@@ -40,6 +49,9 @@ type ModelConfig struct {
// override global setting
SendLoadingState *bool `yaml:"sendLoadingState"`
// Timeout settings for proxy connections
Timeouts TimeoutsConfig `yaml:"timeouts"`
}
func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -57,6 +69,13 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
ConcurrencyLimit: 0,
Name: "",
Description: "",
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}
// the default cmdStop to taskkill /f /t /pid ${PID}
+10
View File
@@ -12,6 +12,9 @@ type PeerConfig struct {
ApiKey string `yaml:"apiKey"`
Models []string `yaml:"models"`
Filters Filters `yaml:"filters"`
// Timeout settings for proxy connections
Timeouts TimeoutsConfig `yaml:"timeouts"`
}
func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
@@ -21,6 +24,13 @@ func (c *PeerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
ApiKey: "",
Models: []string{},
Filters: Filters{},
Timeouts: TimeoutsConfig{
Connect: 30,
ResponseHeader: 60,
TLSHandshake: 10,
ExpectContinue: 1,
IdleConn: 90,
},
}
if err := unmarshal(&defaults); err != nil {
+17 -15
View File
@@ -34,23 +34,25 @@ func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*
}
sort.Strings(peerIDs)
// Create a shared transport with reasonable timeouts for peer connections
// these can be tuned with feedback later
peerTransport := &http.Transport{
DialContext: (&net.Dialer{
Timeout: 30 * time.Second, // Connection timeout
KeepAlive: 30 * time.Second,
}).DialContext,
TLSHandshakeTimeout: 10 * time.Second,
ResponseHeaderTimeout: 60 * time.Second, // Time to wait for response headers
ExpectContinueTimeout: 1 * time.Second,
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
}
for _, peerID := range peerIDs {
peer := peers[peerID]
// Create a transport with per-peer timeout configuration
peerTransport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(peer.Timeouts.Connect) * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
TLSHandshakeTimeout: time.Duration(peer.Timeouts.TLSHandshake) * time.Second,
ResponseHeaderTimeout: time.Duration(peer.Timeouts.ResponseHeader) * time.Second,
ExpectContinueTimeout: time.Duration(peer.Timeouts.ExpectContinue) * time.Second,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: time.Duration(peer.Timeouts.IdleConn) * time.Second,
}
// Create reverse proxy for this peer
reverseProxy := httputil.NewSingleHostReverseProxy(peer.ProxyURL)
reverseProxy.Transport = peerTransport
+43
View File
@@ -6,6 +6,7 @@ import (
"net/url"
"strings"
"testing"
"time"
"github.com/mostlygeek/llama-swap/proxy/config"
"github.com/stretchr/testify/assert"
@@ -266,3 +267,45 @@ func TestProxyRequest_SSEHeaderModification(t *testing.T) {
// The X-Accel-Buffering header should be set to "no" for SSE
assert.Equal(t, "no", w.Header().Get("X-Accel-Buffering"))
}
func TestNewPeerProxy_CustomTimeouts(t *testing.T) {
proxyURL, _ := url.Parse("http://localhost:8080")
peers := config.PeerDictionaryConfig{
"test-peer": config.PeerConfig{
Proxy: "http://localhost:8080",
ProxyURL: proxyURL,
Models: []string{"model1"},
Timeouts: config.TimeoutsConfig{
Connect: 45,
ResponseHeader: 300,
TLSHandshake: 15,
ExpectContinue: 2,
IdleConn: 120,
},
},
}
peerProxy, err := NewPeerProxy(peers, testLogger)
assert.NoError(t, err)
assert.NotNil(t, peerProxy)
assert.True(t, peerProxy.HasPeerModel("model1"))
// Verify the timeout values are actually applied to the transport
member, found := peerProxy.proxyMap["model1"]
require.True(t, found, "model1 should exist in proxyMap")
assert.NotNil(t, member.reverseProxy)
assert.NotNil(t, member.reverseProxy.Transport)
transport, ok := member.reverseProxy.Transport.(*http.Transport)
require.True(t, ok, "Transport should be *http.Transport")
// Verify all timeout values are correctly applied
assert.Equal(t, 300*time.Second, transport.ResponseHeaderTimeout)
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
// ForceAttemptHTTP2 should be enabled
assert.True(t, transport.ForceAttemptHTTP2)
}
+18
View File
@@ -96,6 +96,24 @@ func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, pr
var reverseProxy *httputil.ReverseProxy
if proxyURL != nil {
reverseProxy = httputil.NewSingleHostReverseProxy(proxyURL)
// Create custom transport with configured timeouts
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: time.Duration(config.Timeouts.Connect) * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
TLSHandshakeTimeout: time.Duration(config.Timeouts.TLSHandshake) * time.Second,
ResponseHeaderTimeout: time.Duration(config.Timeouts.ResponseHeader) * time.Second,
ExpectContinueTimeout: time.Duration(config.Timeouts.ExpectContinue) * time.Second,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: time.Duration(config.Timeouts.IdleConn) * time.Second,
}
reverseProxy.Transport = transport
reverseProxy.ModifyResponse = func(resp *http.Response) error {
// prevent nginx from buffering streaming responses (e.g., SSE)
if strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "text/event-stream") {
+37
View File
@@ -2,6 +2,7 @@ package proxy
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
@@ -569,3 +570,39 @@ func (w *panicOnWriteResponseWriter) Write(b []byte) (int, error) {
}
return w.ResponseRecorder.Write(b)
}
func TestProcess_CustomTimeouts(t *testing.T) {
modelConfig := config.ModelConfig{
Cmd: "echo test",
Proxy: "http://localhost:8080",
CheckEndpoint: "/health",
Timeouts: config.TimeoutsConfig{
Connect: 45,
ResponseHeader: 120,
TLSHandshake: 15,
ExpectContinue: 2,
IdleConn: 120,
},
}
debugLogger := NewLogMonitorWriter(io.Discard)
process := NewProcess("test-model", 30, modelConfig, debugLogger, debugLogger)
// Verify the process was created successfully
assert.NotNil(t, process)
assert.Equal(t, "test-model", process.ID)
assert.NotNil(t, process.reverseProxy)
assert.NotNil(t, process.reverseProxy.Transport)
// Verify it's using http.Transport (not some other type)
transport, ok := process.reverseProxy.Transport.(*http.Transport)
assert.True(t, ok, "Transport should be *http.Transport")
assert.NotNil(t, transport)
// Verify the timeouts are correctly applied
assert.Equal(t, 120*time.Second, transport.ResponseHeaderTimeout)
assert.Equal(t, 15*time.Second, transport.TLSHandshakeTimeout)
assert.Equal(t, 2*time.Second, transport.ExpectContinueTimeout)
assert.Equal(t, 120*time.Second, transport.IdleConnTimeout)
assert.True(t, transport.ForceAttemptHTTP2)
}