mirror of
https://github.com/mostlygeek/llama-swap.git
synced 2026-06-09 06:46:34 +02:00
fix: ?no-history flag and improve /logs monitoring docs (#721)
Linux CI / run-tests (push) Successful in 4m31s
Close inactive issues / close-issues (push) Successful in 7s
Build Unified Docker Image / setup (push) Successful in 3s
Build Containers / build-and-push (cpu) (push) Failing after 12s
Build Containers / build-and-push (cuda) (push) Failing after 11s
Build Containers / build-and-push (cuda13) (push) Failing after 14s
Build Containers / build-and-push (intel) (push) Failing after 12s
Build Containers / build-and-push (musa) (push) Failing after 26s
Build Containers / build-and-push (rocm) (push) Failing after 26s
Build Containers / build-and-push (vulkan) (push) Failing after 11s
Build Containers / delete-untagged-containers (push) Has been skipped
Build Unified Docker Image / build (push) Failing after 12s
Windows CI / run-tests (push) Has been cancelled
Linux CI / run-tests (push) Successful in 4m31s
Close inactive issues / close-issues (push) Successful in 7s
Build Unified Docker Image / setup (push) Successful in 3s
Build Containers / build-and-push (cpu) (push) Failing after 12s
Build Containers / build-and-push (cuda) (push) Failing after 11s
Build Containers / build-and-push (cuda13) (push) Failing after 14s
Build Containers / build-and-push (intel) (push) Failing after 12s
Build Containers / build-and-push (musa) (push) Failing after 26s
Build Containers / build-and-push (rocm) (push) Failing after 26s
Build Containers / build-and-push (vulkan) (push) Failing after 11s
Build Containers / delete-untagged-containers (push) Has been skipped
Build Unified Docker Image / build (push) Failing after 12s
Windows CI / run-tests (push) Has been cancelled
- improve logging documentation - small tweaks for edge case issues in upstream and log requests
This commit is contained in:
@@ -20,6 +20,7 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
||||
- `v1/chat/completions`
|
||||
- `v1/responses`
|
||||
- `v1/embeddings`
|
||||
- `v1/models` - list available models
|
||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||
- `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867))
|
||||
- `v1/audio/voices`
|
||||
@@ -39,9 +40,17 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and
|
||||
- ✅ llama-swap API
|
||||
- `/ui` - web UI
|
||||
- `/upstream/:model_id` - direct access to upstream server ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||
- `/models/unload` - manually unload running models ([#58](https://github.com/mostlygeek/llama-swap/issues/58))
|
||||
- `/running` - list currently running models ([#61](https://github.com/mostlygeek/llama-swap/issues/61))
|
||||
- `/log` - remote log monitoring
|
||||
- `POST /api/models/unload` - manually unload all running models ([#58](https://github.com/mostlygeek/llama-swap/issues/58))
|
||||
- `POST /api/models/unload/:model_id` - unload a specific model
|
||||
- `/logs` - remote log monitoring
|
||||
- `GET /logs` returns buffered plain text logs.
|
||||
- If `Accept: text/html` is sent, `/logs` redirects to `/ui/`.
|
||||
- `GET /logs/stream` keeps the connection open for live log streaming.
|
||||
- Stream endpoints send buffered history first by default; add `?no-history` to stream only new lines.
|
||||
- `GET /logs/stream/proxy` streams proxy logs only.
|
||||
- `GET /logs/stream/upstream` streams upstream process logs only.
|
||||
- `GET /logs/stream/{model_id}` streams logs for one model (including IDs with slashes, like `author/model`).
|
||||
- `/health` - just returns "OK"
|
||||
- ✅ API Key support - define keys to restrict access to API endpoints
|
||||
- ✅ Customizable
|
||||
|
||||
@@ -683,7 +683,7 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
||||
searchModelName, modelID, remainingPath, modelFound := pm.findModelInPath(upstreamPath)
|
||||
|
||||
if !modelFound {
|
||||
pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path")
|
||||
pm.sendErrorResponse(c, http.StatusNotFound, "model not found")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,13 @@ func (pm *ProxyManager) streamLogsHandler(c *gin.Context) {
|
||||
c.Header("X-Accel-Buffering", "no")
|
||||
|
||||
logMonitorId := strings.TrimPrefix(c.Param("logMonitorID"), "/")
|
||||
|
||||
// Handle case where query string might be included in the parameter
|
||||
// (can happen with catch-all routes on some versions/setups)
|
||||
if idx := strings.Index(logMonitorId, "?"); idx != -1 {
|
||||
logMonitorId = logMonitorId[:idx]
|
||||
}
|
||||
|
||||
logger, err := pm.getLogger(logMonitorId)
|
||||
if err != nil {
|
||||
c.String(http.StatusBadRequest, err.Error())
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLogMonitorIdQueryParameterStripping(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "upstream without query param",
|
||||
input: "upstream",
|
||||
expected: "upstream",
|
||||
},
|
||||
{
|
||||
name: "upstream with query param",
|
||||
input: "upstream?no-history",
|
||||
expected: "upstream",
|
||||
},
|
||||
{
|
||||
name: "proxy with multiple query params",
|
||||
input: "proxy?no-history&foo=bar",
|
||||
expected: "proxy",
|
||||
},
|
||||
{
|
||||
name: "model with slash and query param",
|
||||
input: "author/model?no-history",
|
||||
expected: "author/model",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Simulate the query parameter stripping logic
|
||||
logMonitorId := tt.input
|
||||
if idx := strings.Index(logMonitorId, "?"); idx != -1 {
|
||||
logMonitorId = logMonitorId[:idx]
|
||||
}
|
||||
|
||||
if logMonitorId != tt.expected {
|
||||
t.Errorf("Query parameter stripping failed: got %q, want %q", logMonitorId, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user