diff --git a/.github/workflows/closeinactive.yml b/.github/workflows/closeinactive.yml index ed74f16..56cf04b 100644 --- a/.github/workflows/closeinactive.yml +++ b/.github/workflows/closeinactive.yml @@ -11,7 +11,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v9 + - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f #v10.2.0 with: days-before-issue-stale: 14 days-before-issue-close: 14 diff --git a/.github/workflows/config-schema.yml b/.github/workflows/config-schema.yml index 6a8b897..342ce18 100644 --- a/.github/workflows/config-schema.yml +++ b/.github/workflows/config-schema.yml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 - name: Validate JSON Schema run: | @@ -45,7 +45,7 @@ jobs: echo "✓ config-schema.json is valid" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0 with: python-version: "3.x" diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 9d9175c..4c51f17 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -38,7 +38,7 @@ jobs: fail-fast: false steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 - name: Free up disk space if: matrix.platform == 'rocm' @@ -58,13 +58,13 @@ jobs: # no-op for amd64-only builds, so leaving it on for every matrix # entry keeps the workflow simple. - name: Set up QEMU - uses: docker/setup-qemu-action@v4 + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a #v4.0.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v4 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd #v4.0.0 - name: Log in to GitHub Container Registry - uses: docker/login-action@v4 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 #v4.1.0 with: registry: ghcr.io username: ${{ github.actor }} diff --git a/.github/workflows/go-ci-windows.yml b/.github/workflows/go-ci-windows.yml index df1abae..3cf003c 100644 --- a/.github/workflows/go-ci-windows.yml +++ b/.github/workflows/go-ci-windows.yml @@ -31,17 +31,17 @@ jobs: run-tests: runs-on: windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0 with: - go-version: '1.23' + go-version-file: go.mod # cache simple-responder to save the build time - name: Restore Simple Responder id: restore-simple-responder - uses: actions/cache/restore@v4 + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5 with: path: ./build key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }} @@ -56,11 +56,11 @@ jobs: # nothing new to save ... skip this step if: steps.restore-simple-responder.outputs.cache-hit != 'true' id: save-simple-responder - uses: actions/cache/save@v4 + uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5 with: path: ./build key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }} - name: Test all shell: bash - run: make test-all \ No newline at end of file + run: make test-all diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml index 6c3976c..28db9ed 100644 --- a/.github/workflows/go-ci.yml +++ b/.github/workflows/go-ci.yml @@ -30,24 +30,24 @@ jobs: run-tests: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0 with: go-version-file: go.mod # Only run in this linux based runner - name: Check Formatting run: | - if [ "$(gofmt -l . | grep -v 'event/.*_test.go' | wc -l)" -gt 0 ]; then - gofmt -l . | grep -v 'event/.*_test.go' + if [ "$(gofmt -l . | wc -l)" -gt 0 ]; then + gofmt -l . exit 1 fi # cache simple-responder to save the build time - name: Restore Simple Responder id: restore-simple-responder - uses: actions/cache/restore@v4 + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5 with: path: ./build key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }} @@ -60,7 +60,7 @@ jobs: # nothing new to save ... skip this step if: steps.restore-simple-responder.outputs.cache-hit != 'true' id: save-simple-responder - uses: actions/cache/save@v4 + uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae #v5.0.5 with: path: ./build key: ${{ runner.os }}-simple-responder-${{ hashFiles('cmd/simple-responder/simple-responder.go') }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1c4c119..0cc4ec2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,14 +20,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 with: fetch-depth: 0 ref: ${{ github.event.inputs.tag || github.ref }} - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c #6.4.0 - name: Set up Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # 6.4.0 with: node-version: "24" - name: Install dependencies and build UI @@ -37,7 +37,7 @@ jobs: npm run build - name: Run GoReleaser - uses: goreleaser/goreleaser-action@v6 + uses: goreleaser/goreleaser-action@1a80836c5c9d9e5755a25cb59ec6f45a3b5f41a8 #7.2.1 with: # either 'goreleaser' (default) or 'goreleaser-pro' distribution: goreleaser @@ -61,7 +61,7 @@ jobs: fi - name: "Trigger tap repository update" - uses: peter-evans/repository-dispatch@v2 + uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 #4.0.1 with: token: ${{ secrets.TAP_REPO_PAT }} repository: mostlygeek/homebrew-llama-swap diff --git a/.github/workflows/ui-tests.yml b/.github/workflows/ui-tests.yml index b0aded1..21c1152 100644 --- a/.github/workflows/ui-tests.yml +++ b/.github/workflows/ui-tests.yml @@ -20,10 +20,10 @@ jobs: run-tests: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 - name: Set up Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # 6.4.0 with: node-version: '24' cache: 'npm' diff --git a/.github/workflows/unified-docker.yml b/.github/workflows/unified-docker.yml index 43b6af4..93f1026 100644 --- a/.github/workflows/unified-docker.yml +++ b/.github/workflows/unified-docker.yml @@ -75,7 +75,7 @@ jobs: backend: ${{ fromJSON(needs.setup.outputs.matrix) }} steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2 - name: Free up disk space run: | @@ -94,11 +94,11 @@ jobs: # llama-swap-builder (which has ccache warm) to avoid exhausting disk. - name: Set up Docker Buildx if: ${{ !env.ACT }} - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd #v4.0.0 - name: Log in to GitHub Container Registry if: ${{ !env.ACT }} - uses: docker/login-action@v3 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 #v4.1.0 with: registry: ghcr.io username: ${{ github.actor }} diff --git a/README.md b/README.md index a68d339..254f3de 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,9 @@ Built in Go for performance and simplicity, llama-swap has zero dependencies and - ✅ Customizable - Run concurrent models with a custom DSL swap matrix ([#643](https://github.com/mostlygeek/llama-swap/issues/643)) - Automatic unloading of models after timeout by setting a `ttl` - - Reliable Docker and Podman support using `cmd` and `cmdStop` together + - Docker and Podman support using `cmd` and `cmdStop` together - Preload models on startup with `hooks` ([#235](https://github.com/mostlygeek/llama-swap/pull/235)) + - Apply filters to requests to control inference with `stripParams`, `setParams` and `setParamsByID` ### Web UI @@ -94,8 +95,24 @@ llama-swap can be installed in multiple ways ### Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap)) -Nightly container images with llama-swap and llama-server are built for multiple platforms (cuda, vulkan, intel, etc.) including [non-root variants with improved security](docs/container-security.md). -The stable-diffusion.cpp server is also included for the musa and vulkan platforms. +Two types of container images are built nightly for llama-swap: + +1. A unified container with llama-server, ik-llama-server, stable-diffusion.cpp, whisper.cpp and llama-swap built from source. This is only available for cuda and vulkan but has more capabilities. This one is recommended for use. +2. A legacy image that is based on llama.cpp's images and llama-swap copied into the container. Use this one if you prefer to stay close to llama.cpp's container images. + +#### Unified container (Recommended) + +```shell +$ docker pull ghcr.io/mostlygeek/llama-swap:unified-cuda + +# run with a custom configuration and models directory +$ docker run -it --rm --runtime nvidia -p 9292:8080 \ + -v /path/to/models:/models \ + -v /path/to/custom/config.yaml:/etc/llama-swap/config/config.yaml \ + ghcr.io/mostlygeek/llama-swap:unified-cuda +``` + +#### Legacy container ```shell $ docker pull ghcr.io/mostlygeek/llama-swap:cuda @@ -105,14 +122,6 @@ $ docker run -it --rm --runtime nvidia -p 9292:8080 \ -v /path/to/models:/models \ -v /path/to/custom/config.yaml:/app/config.yaml \ ghcr.io/mostlygeek/llama-swap:cuda - -# configuration hot reload supported with a -# directory volume mount -$ docker run -it --rm --runtime nvidia -p 9292:8080 \ - -v /path/to/models:/models \ - -v /path/to/custom/config.yaml:/app/config.yaml \ - -v /path/to/config:/config \ - ghcr.io/mostlygeek/llama-swap:cuda -config /config/config.yaml -watch-config ```
@@ -268,6 +277,6 @@ For Python based inference servers like vllm or tabbyAPI it is recommended to ru ## Star History > [!NOTE] -> ⭐️ Star this project to help others discover it! +> Thank you to everyone who has given this project a ⭐️! [![Star History Chart](https://api.star-history.com/svg?repos=mostlygeek/llama-swap&type=Date)](https://www.star-history.com/#mostlygeek/llama-swap&Date) diff --git a/cmd/monitor-test/main.go b/cmd/monitor-test/main.go index 6a5b6a2..6965d72 100644 --- a/cmd/monitor-test/main.go +++ b/cmd/monitor-test/main.go @@ -75,7 +75,7 @@ func main() { } if *stream { - m, _ := perf.New(config.PerformanceConfig{Enable: true, Every: every}, l) + m, _ := perf.New(config.PerformanceConfig{Every: every}, l) m.Start() defer m.Stop() sysCh, gpuCh, unsub := m.Subscribe() diff --git a/config-schema.json b/config-schema.json index 7039ace..8fa0d04 100644 --- a/config-schema.json +++ b/config-schema.json @@ -145,33 +145,21 @@ "performance": { "type": "object", "properties": { - "enable": { + "disabled": { "type": "boolean", - "default": true, - "description": "Enable or disable system performance monitoring." + "default": false, + "description": "Disable system performance monitoring." }, "every": { "type": "string", "pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$", "default": "15s", "description": "Delay between polling for new performance statistics. Minimum duration is 1s. Lower values use more RAM as stats are kept in memory." - }, - "maxAge": { - "type": "string", - "pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$", - "default": "1h", - "description": "Maximum age of performance statistics before they are eligible for garbage collection." - }, - "gc": { - "type": "string", - "pattern": "^[-+]?(\\d+(\\.\\d+)?(ns|us|ms|s|m|h))+$", - "default": "5m", - "description": "Garbage collection frequency for clearing old performance statistics." } }, "additionalProperties": false, "default": {}, - "description": "Configuration for system monitoring statistics. Timing values are duration strings like 1s, 1h30m, 90m, 2h10s." + "description": "Configuration for CPU, RAM and GPU monitoring statistics." }, "startPort": { "type": "integer", @@ -548,4 +536,4 @@ } } ] -} \ No newline at end of file +} diff --git a/config.example.yaml b/config.example.yaml index 060f8d2..9554326 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -58,25 +58,15 @@ captureBuffer: 15 # performance: configuration for system monitoring statistics # - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc. performance: - # enabled: boolean - # - default: true - enable: true + # disabled: boolean + # - default: false + disabled: false # every: delay between polling for new performance statistics - # - default: 15s - # - minimum duration 1s - # - note: setting this very low will use up more RAM as stats are kept in memory. + # - default: 5s + # - minimum duration 5s every: 15s - # maxAge: maximum age of a performance statistics before it is eligible for garbage collection - # - default: 1h - maxAge: 12h - - # gc: garbage collection frequency in seconds - # - how many seconds the garbage collector runs to clear old stats - # - default 5m - gc: 5m - # startPort: sets the starting port number for the automatic ${PORT} macro. # - optional, default: 5800 # - the ${PORT} macro can be used in model.cmd and model.proxy settings @@ -118,8 +108,7 @@ globalTTL: 0 macros: # Example of a multi-line macro "latest-llama": > - /path/to/llama-server/llama-server-ec9e0301 - --port ${PORT} + /path/to/llama-server/llama-server-ec9e0301 --port ${PORT} "default_ctx": 4096 @@ -279,7 +268,8 @@ models: # the ${temp} macro will remain a float temperature: ${temp} - note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}" + note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, + context=${default_ctx}" a_list: - 1 @@ -291,7 +281,7 @@ models: b: 2 # objects can contain complex types with macro substitution # becomes: c: [0.7, false, "model: llama"] - c: ["${temp}", false, "model: ${MODEL_ID}"] + c: [ "${temp}", false, "model: ${MODEL_ID}" ] # concurrencyLimit: overrides the allowed number of active parallel requests to a model # - optional, default: 0 diff --git a/docs/configuration.md b/docs/configuration.md index 738077c..3b9967f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -146,6 +146,18 @@ metricsMaxInMemory: 1000 # - set to 0 to disable captureBuffer: 15 +# performance: configuration for system monitoring statistics +# - timing values are duration strings like 1s, 1h30m, 90m, 2h10s, etc. +performance: + # disabled: boolean + # - default: false + enable: true + + # every: delay between polling for new performance statistics + # - default: 5s + # - minimum duration 5s + every: 5s + # startPort: sets the starting port number for the automatic ${PORT} macro. # - optional, default: 5800 # - the ${PORT} macro can be used in model.cmd and model.proxy settings @@ -187,8 +199,7 @@ globalTTL: 0 macros: # Example of a multi-line macro "latest-llama": > - /path/to/llama-server/llama-server-ec9e0301 - --port ${PORT} + /path/to/llama-server/llama-server-ec9e0301 --port ${PORT} "default_ctx": 4096 @@ -348,7 +359,8 @@ models: # the ${temp} macro will remain a float temperature: ${temp} - note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, context=${default_ctx}" + note: "The ${MODEL_ID} is running on port ${PORT} temp=${temp}, + context=${default_ctx}" a_list: - 1 diff --git a/event/default_test.go b/event/default_test.go index ded5a2e..2d845ee 100644 --- a/event/default_test.go +++ b/event/default_test.go @@ -1,54 +1,54 @@ -// Copyright (c) Roman Atachiants and contributore. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for detaile. - -package event - -import ( - "sync" - "sync/atomic" - "testing" - - "github.com/stretchr/testify/assert" -) - -/* -cpu: 13th Gen Intel(R) Core(TM) i7-13700K -BenchmarkSubcribeConcurrent-24 1826686 606.3 ns/op 1648 B/op 5 allocs/op -*/ -func BenchmarkSubscribeConcurrent(b *testing.B) { - d := NewDispatcher() - b.ReportAllocs() - b.ResetTimer() - - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - unsub := Subscribe(d, func(ev MyEvent1) {}) - unsub() - } - }) -} - -func TestDefaultPublish(t *testing.T) { - var wg sync.WaitGroup - - // Subscribe - var count int64 - defer On(func(ev MyEvent1) { - atomic.AddInt64(&count, 1) - wg.Done() - })() - - defer OnType(TypeEvent1, func(ev MyEvent1) { - atomic.AddInt64(&count, 1) - wg.Done() - })() - - // Publish - wg.Add(4) - Emit(MyEvent1{}) - Emit(MyEvent1{}) - - // Wait and check - wg.Wait() - assert.Equal(t, int64(4), count) -} +// Copyright (c) Roman Atachiants and contributore. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for detaile. + +package event + +import ( + "sync" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" +) + +/* +cpu: 13th Gen Intel(R) Core(TM) i7-13700K +BenchmarkSubcribeConcurrent-24 1826686 606.3 ns/op 1648 B/op 5 allocs/op +*/ +func BenchmarkSubscribeConcurrent(b *testing.B) { + d := NewDispatcher() + b.ReportAllocs() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + unsub := Subscribe(d, func(ev MyEvent1) {}) + unsub() + } + }) +} + +func TestDefaultPublish(t *testing.T) { + var wg sync.WaitGroup + + // Subscribe + var count int64 + defer On(func(ev MyEvent1) { + atomic.AddInt64(&count, 1) + wg.Done() + })() + + defer OnType(TypeEvent1, func(ev MyEvent1) { + atomic.AddInt64(&count, 1) + wg.Done() + })() + + // Publish + wg.Add(4) + Emit(MyEvent1{}) + Emit(MyEvent1{}) + + // Wait and check + wg.Wait() + assert.Equal(t, int64(4), count) +} diff --git a/event/event_test.go b/event/event_test.go index 1868ef0..e10cd54 100644 --- a/event/event_test.go +++ b/event/event_test.go @@ -1,324 +1,324 @@ -// Copyright (c) Roman Atachiants and contributore. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for detaile. - -package event - -import ( - "fmt" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/assert" -) - -func TestPublish(t *testing.T) { - d := NewDispatcher() - var wg sync.WaitGroup - - // Subscribe, must be received in order - var count int64 - defer Subscribe(d, func(ev MyEvent1) { - assert.Equal(t, int(atomic.AddInt64(&count, 1)), ev.Number) - wg.Done() - })() - - // Publish - wg.Add(3) - Publish(d, MyEvent1{Number: 1}) - Publish(d, MyEvent1{Number: 2}) - Publish(d, MyEvent1{Number: 3}) - - // Wait and check - wg.Wait() - assert.Equal(t, int64(3), count) -} - -func TestUnsubscribe(t *testing.T) { - d := NewDispatcher() - assert.Equal(t, 0, d.count(TypeEvent1)) - unsubscribe := Subscribe(d, func(ev MyEvent1) { - // Nothing - }) - - assert.Equal(t, 1, d.count(TypeEvent1)) - unsubscribe() - assert.Equal(t, 0, d.count(TypeEvent1)) -} - -func TestConcurrent(t *testing.T) { - const max = 1000000 - var count int64 - var wg sync.WaitGroup - wg.Add(1) - - d := NewDispatcher() - defer Subscribe(d, func(ev MyEvent1) { - if current := atomic.AddInt64(&count, 1); current == max { - wg.Done() - } - })() - - // Asynchronously publish - go func() { - for i := 0; i < max; i++ { - Publish(d, MyEvent1{}) - } - }() - - defer Subscribe(d, func(ev MyEvent1) { - // Subscriber that does nothing - })() - - wg.Wait() - assert.Equal(t, max, int(count)) -} - -func TestSubscribeDifferentType(t *testing.T) { - d := NewDispatcher() - assert.Panics(t, func() { - SubscribeTo(d, TypeEvent1, func(ev MyEvent1) {}) - SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {}) - }) -} - -func TestPublishDifferentType(t *testing.T) { - d := NewDispatcher() - assert.Panics(t, func() { - SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {}) - Publish(d, MyEvent1{}) - }) -} - -func TestCloseDispatcher(t *testing.T) { - d := NewDispatcher() - defer SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})() - - assert.NoError(t, d.Close()) - assert.Panics(t, func() { - SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {}) - }) -} - -func TestMatrix(t *testing.T) { - const amount = 1000 - for _, subs := range []int{1, 10, 100} { - for _, topics := range []int{1, 10} { - expected := subs * topics * amount - t.Run(fmt.Sprintf("%dx%d", topics, subs), func(t *testing.T) { - var count atomic.Int64 - var wg sync.WaitGroup - wg.Add(expected) - - d := NewDispatcher() - for i := 0; i < subs; i++ { - for id := 0; id < topics; id++ { - defer SubscribeTo(d, uint32(id), func(ev MyEvent3) { - count.Add(1) - wg.Done() - })() - } - } - - for n := 0; n < amount; n++ { - for id := 0; id < topics; id++ { - go Publish(d, MyEvent3{ID: id}) - } - } - - wg.Wait() - assert.Equal(t, expected, int(count.Load())) - }) - } - } -} - -func TestConcurrentSubscriptionRace(t *testing.T) { - // This test specifically targets the race condition that occurs when multiple - // goroutines try to subscribe to different event types simultaneously. - // Without the CAS loop, subscriptions could be lost due to registry corruption. - - const numGoroutines = 100 - const numEventTypes = 50 - - d := NewDispatcher() - defer d.Close() - - var wg sync.WaitGroup - var receivedCount int64 - var subscribedTypes sync.Map // Thread-safe map - - wg.Add(numGoroutines) - - // Start multiple goroutines that subscribe to different event types concurrently - for i := 0; i < numGoroutines; i++ { - go func(goroutineID int) { - defer wg.Done() - - // Each goroutine subscribes to a unique event type - eventType := uint32(goroutineID%numEventTypes + 1000) // Offset to avoid collision with other tests - - // Subscribe to the event type - SubscribeTo(d, eventType, func(ev MyEvent3) { - atomic.AddInt64(&receivedCount, 1) - }) - - // Record that this type was subscribed - subscribedTypes.Store(eventType, true) - }(i) - } - - // Wait for all subscriptions to complete - wg.Wait() - - // Count the number of unique event types subscribed - expectedTypes := 0 - subscribedTypes.Range(func(key, value interface{}) bool { - expectedTypes++ - return true - }) - - // Small delay to ensure all subscriptions are fully processed - time.Sleep(10 * time.Millisecond) - - // Publish events to each subscribed type - subscribedTypes.Range(func(key, value interface{}) bool { - eventType := key.(uint32) - Publish(d, MyEvent3{ID: int(eventType)}) - return true - }) - - // Wait for all events to be processed - time.Sleep(50 * time.Millisecond) - - // Verify that we received at least the expected number of events - // (there might be more if multiple goroutines subscribed to the same event type) - received := atomic.LoadInt64(&receivedCount) - assert.GreaterOrEqual(t, int(received), expectedTypes, - "Should have received at least %d events, got %d", expectedTypes, received) - - // Verify that we have the expected number of unique event types - assert.Equal(t, numEventTypes, expectedTypes, - "Should have exactly %d unique event types", numEventTypes) -} - -func TestConcurrentHandlerRegistration(t *testing.T) { - const numGoroutines = 100 - - // Test concurrent subscriptions to the same event type - t.Run("SameEventType", func(t *testing.T) { - d := NewDispatcher() - var handlerCount int64 - var wg sync.WaitGroup - - // Start multiple goroutines subscribing to the same event type (0x1) - for i := 0; i < numGoroutines; i++ { - wg.Add(1) - go func() { - defer wg.Done() - SubscribeTo(d, uint32(0x1), func(ev MyEvent1) { - atomic.AddInt64(&handlerCount, 1) - }) - }() - } - - wg.Wait() - - // Verify all handlers were registered by publishing an event - atomic.StoreInt64(&handlerCount, 0) - Publish(d, MyEvent1{}) - - // Small delay to ensure all handlers have executed - time.Sleep(10 * time.Millisecond) - - assert.Equal(t, int64(numGoroutines), atomic.LoadInt64(&handlerCount), - "Not all handlers were registered due to race condition") - }) - - // Test concurrent subscriptions to different event types - t.Run("DifferentEventTypes", func(t *testing.T) { - d := NewDispatcher() - var wg sync.WaitGroup - receivedEvents := make(map[uint32]*int64) - - // Create multiple event types and subscribe concurrently - for i := 0; i < numGoroutines; i++ { - eventType := uint32(100 + i) - counter := new(int64) - receivedEvents[eventType] = counter - - wg.Add(1) - go func(et uint32, cnt *int64) { - defer wg.Done() - SubscribeTo(d, et, func(ev MyEvent3) { - atomic.AddInt64(cnt, 1) - }) - }(eventType, counter) - } - - wg.Wait() - - // Publish events to all types - for eventType := uint32(100); eventType < uint32(100+numGoroutines); eventType++ { - Publish(d, MyEvent3{ID: int(eventType)}) - } - - // Small delay to ensure all handlers have executed - time.Sleep(10 * time.Millisecond) - - // Verify all event types received their events - for eventType, counter := range receivedEvents { - assert.Equal(t, int64(1), atomic.LoadInt64(counter), - "Event type %d did not receive its event", eventType) - } - }) -} - -func TestBackpressure(t *testing.T) { - d := NewDispatcher() - d.maxQueue = 10 - - var processedCount int64 - unsub := SubscribeTo(d, uint32(0x200), func(ev MyEvent3) { - atomic.AddInt64(&processedCount, 1) - }) - defer unsub() - - const eventsToPublish = 1000 - for i := 0; i < eventsToPublish; i++ { - Publish(d, MyEvent3{ID: 0x200}) - } - - time.Sleep(100 * time.Millisecond) - - // Verify all events were eventually processed - finalProcessed := atomic.LoadInt64(&processedCount) - assert.Equal(t, int64(eventsToPublish), finalProcessed) - t.Logf("Events processed: %d/%d", finalProcessed, eventsToPublish) -} - -// ------------------------------------- Test Events ------------------------------------- - -const ( - TypeEvent1 = 0x1 - TypeEvent2 = 0x2 -) - -type MyEvent1 struct { - Number int -} - -func (t MyEvent1) Type() uint32 { return TypeEvent1 } - -type MyEvent2 struct { - Text string -} - -func (t MyEvent2) Type() uint32 { return TypeEvent2 } - -type MyEvent3 struct { - ID int -} - -func (t MyEvent3) Type() uint32 { return uint32(t.ID) } +// Copyright (c) Roman Atachiants and contributore. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for detaile. + +package event + +import ( + "fmt" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestPublish(t *testing.T) { + d := NewDispatcher() + var wg sync.WaitGroup + + // Subscribe, must be received in order + var count int64 + defer Subscribe(d, func(ev MyEvent1) { + assert.Equal(t, int(atomic.AddInt64(&count, 1)), ev.Number) + wg.Done() + })() + + // Publish + wg.Add(3) + Publish(d, MyEvent1{Number: 1}) + Publish(d, MyEvent1{Number: 2}) + Publish(d, MyEvent1{Number: 3}) + + // Wait and check + wg.Wait() + assert.Equal(t, int64(3), count) +} + +func TestUnsubscribe(t *testing.T) { + d := NewDispatcher() + assert.Equal(t, 0, d.count(TypeEvent1)) + unsubscribe := Subscribe(d, func(ev MyEvent1) { + // Nothing + }) + + assert.Equal(t, 1, d.count(TypeEvent1)) + unsubscribe() + assert.Equal(t, 0, d.count(TypeEvent1)) +} + +func TestConcurrent(t *testing.T) { + const max = 1000000 + var count int64 + var wg sync.WaitGroup + wg.Add(1) + + d := NewDispatcher() + defer Subscribe(d, func(ev MyEvent1) { + if current := atomic.AddInt64(&count, 1); current == max { + wg.Done() + } + })() + + // Asynchronously publish + go func() { + for i := 0; i < max; i++ { + Publish(d, MyEvent1{}) + } + }() + + defer Subscribe(d, func(ev MyEvent1) { + // Subscriber that does nothing + })() + + wg.Wait() + assert.Equal(t, max, int(count)) +} + +func TestSubscribeDifferentType(t *testing.T) { + d := NewDispatcher() + assert.Panics(t, func() { + SubscribeTo(d, TypeEvent1, func(ev MyEvent1) {}) + SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {}) + }) +} + +func TestPublishDifferentType(t *testing.T) { + d := NewDispatcher() + assert.Panics(t, func() { + SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {}) + Publish(d, MyEvent1{}) + }) +} + +func TestCloseDispatcher(t *testing.T) { + d := NewDispatcher() + defer SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {})() + + assert.NoError(t, d.Close()) + assert.Panics(t, func() { + SubscribeTo(d, TypeEvent1, func(ev MyEvent2) {}) + }) +} + +func TestMatrix(t *testing.T) { + const amount = 1000 + for _, subs := range []int{1, 10, 100} { + for _, topics := range []int{1, 10} { + expected := subs * topics * amount + t.Run(fmt.Sprintf("%dx%d", topics, subs), func(t *testing.T) { + var count atomic.Int64 + var wg sync.WaitGroup + wg.Add(expected) + + d := NewDispatcher() + for i := 0; i < subs; i++ { + for id := 0; id < topics; id++ { + defer SubscribeTo(d, uint32(id), func(ev MyEvent3) { + count.Add(1) + wg.Done() + })() + } + } + + for n := 0; n < amount; n++ { + for id := 0; id < topics; id++ { + go Publish(d, MyEvent3{ID: id}) + } + } + + wg.Wait() + assert.Equal(t, expected, int(count.Load())) + }) + } + } +} + +func TestConcurrentSubscriptionRace(t *testing.T) { + // This test specifically targets the race condition that occurs when multiple + // goroutines try to subscribe to different event types simultaneously. + // Without the CAS loop, subscriptions could be lost due to registry corruption. + + const numGoroutines = 100 + const numEventTypes = 50 + + d := NewDispatcher() + defer d.Close() + + var wg sync.WaitGroup + var receivedCount int64 + var subscribedTypes sync.Map // Thread-safe map + + wg.Add(numGoroutines) + + // Start multiple goroutines that subscribe to different event types concurrently + for i := 0; i < numGoroutines; i++ { + go func(goroutineID int) { + defer wg.Done() + + // Each goroutine subscribes to a unique event type + eventType := uint32(goroutineID%numEventTypes + 1000) // Offset to avoid collision with other tests + + // Subscribe to the event type + SubscribeTo(d, eventType, func(ev MyEvent3) { + atomic.AddInt64(&receivedCount, 1) + }) + + // Record that this type was subscribed + subscribedTypes.Store(eventType, true) + }(i) + } + + // Wait for all subscriptions to complete + wg.Wait() + + // Count the number of unique event types subscribed + expectedTypes := 0 + subscribedTypes.Range(func(key, value interface{}) bool { + expectedTypes++ + return true + }) + + // Small delay to ensure all subscriptions are fully processed + time.Sleep(10 * time.Millisecond) + + // Publish events to each subscribed type + subscribedTypes.Range(func(key, value interface{}) bool { + eventType := key.(uint32) + Publish(d, MyEvent3{ID: int(eventType)}) + return true + }) + + // Wait for all events to be processed + time.Sleep(50 * time.Millisecond) + + // Verify that we received at least the expected number of events + // (there might be more if multiple goroutines subscribed to the same event type) + received := atomic.LoadInt64(&receivedCount) + assert.GreaterOrEqual(t, int(received), expectedTypes, + "Should have received at least %d events, got %d", expectedTypes, received) + + // Verify that we have the expected number of unique event types + assert.Equal(t, numEventTypes, expectedTypes, + "Should have exactly %d unique event types", numEventTypes) +} + +func TestConcurrentHandlerRegistration(t *testing.T) { + const numGoroutines = 100 + + // Test concurrent subscriptions to the same event type + t.Run("SameEventType", func(t *testing.T) { + d := NewDispatcher() + var handlerCount int64 + var wg sync.WaitGroup + + // Start multiple goroutines subscribing to the same event type (0x1) + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func() { + defer wg.Done() + SubscribeTo(d, uint32(0x1), func(ev MyEvent1) { + atomic.AddInt64(&handlerCount, 1) + }) + }() + } + + wg.Wait() + + // Verify all handlers were registered by publishing an event + atomic.StoreInt64(&handlerCount, 0) + Publish(d, MyEvent1{}) + + // Small delay to ensure all handlers have executed + time.Sleep(10 * time.Millisecond) + + assert.Equal(t, int64(numGoroutines), atomic.LoadInt64(&handlerCount), + "Not all handlers were registered due to race condition") + }) + + // Test concurrent subscriptions to different event types + t.Run("DifferentEventTypes", func(t *testing.T) { + d := NewDispatcher() + var wg sync.WaitGroup + receivedEvents := make(map[uint32]*int64) + + // Create multiple event types and subscribe concurrently + for i := 0; i < numGoroutines; i++ { + eventType := uint32(100 + i) + counter := new(int64) + receivedEvents[eventType] = counter + + wg.Add(1) + go func(et uint32, cnt *int64) { + defer wg.Done() + SubscribeTo(d, et, func(ev MyEvent3) { + atomic.AddInt64(cnt, 1) + }) + }(eventType, counter) + } + + wg.Wait() + + // Publish events to all types + for eventType := uint32(100); eventType < uint32(100+numGoroutines); eventType++ { + Publish(d, MyEvent3{ID: int(eventType)}) + } + + // Small delay to ensure all handlers have executed + time.Sleep(10 * time.Millisecond) + + // Verify all event types received their events + for eventType, counter := range receivedEvents { + assert.Equal(t, int64(1), atomic.LoadInt64(counter), + "Event type %d did not receive its event", eventType) + } + }) +} + +func TestBackpressure(t *testing.T) { + d := NewDispatcher() + d.maxQueue = 10 + + var processedCount int64 + unsub := SubscribeTo(d, uint32(0x200), func(ev MyEvent3) { + atomic.AddInt64(&processedCount, 1) + }) + defer unsub() + + const eventsToPublish = 1000 + for i := 0; i < eventsToPublish; i++ { + Publish(d, MyEvent3{ID: 0x200}) + } + + time.Sleep(100 * time.Millisecond) + + // Verify all events were eventually processed + finalProcessed := atomic.LoadInt64(&processedCount) + assert.Equal(t, int64(eventsToPublish), finalProcessed) + t.Logf("Events processed: %d/%d", finalProcessed, eventsToPublish) +} + +// ------------------------------------- Test Events ------------------------------------- + +const ( + TypeEvent1 = 0x1 + TypeEvent2 = 0x2 +) + +type MyEvent1 struct { + Number int +} + +func (t MyEvent1) Type() uint32 { return TypeEvent1 } + +type MyEvent2 struct { + Text string +} + +func (t MyEvent2) Type() uint32 { return TypeEvent2 } + +type MyEvent3 struct { + ID int +} + +func (t MyEvent3) Type() uint32 { return uint32(t.ID) } diff --git a/internal/perf/monitor.go b/internal/perf/monitor.go index 1cdc040..6b7cdfe 100644 --- a/internal/perf/monitor.go +++ b/internal/perf/monitor.go @@ -31,7 +31,7 @@ type Monitor struct { } func ringCapacity(c config.PerformanceConfig) int { - n := int(c.MaxAge / c.Every) + n := int(time.Hour / c.Every) if n < 1 { n = 1 } @@ -43,12 +43,6 @@ func New(c config.PerformanceConfig, logger *logmon.Monitor) (*Monitor, error) { if c.Every < 100*time.Millisecond { c.Every = 100 * time.Millisecond } - if c.GC < 1*time.Second { - c.GC = 1 * time.Second - } - if c.MaxAge < 1*time.Minute { - c.MaxAge = 1 * time.Minute - } if logger == nil { return nil, errors.New("logger is required") @@ -92,7 +86,9 @@ func (m *Monitor) UpdateConfig(newConf config.PerformanceConfig) { m.sysRing = ring.NewBuffer[SysStat](capacity) m.gpuRing = ring.NewBuffer[[]GpuStat](capacity) m.mutex.Unlock() - m.Start() + if !newConf.Disabled { + m.Start() + } } // Subscribe returns channels to listen to system and GPU stats. diff --git a/internal/perf/monitor_test.go b/internal/perf/monitor_test.go index a6665ee..e8aa267 100644 --- a/internal/perf/monitor_test.go +++ b/internal/perf/monitor_test.go @@ -24,26 +24,19 @@ func TestNew_DefaultConfig(t *testing.T) { require.NotNil(t, m) assert.Equal(t, 100*time.Millisecond, m.conf.Every) - assert.Equal(t, 1*time.Second, m.conf.GC) - assert.Equal(t, 1*time.Minute, m.conf.MaxAge) } func TestNew_CustomConfig(t *testing.T) { logger := newTestLogger() cfg := config.PerformanceConfig{ - Enable: true, - Every: 500 * time.Millisecond, - GC: 5 * time.Second, - MaxAge: 10 * time.Minute, + Every: 500 * time.Millisecond, } m, err := New(cfg, logger) require.NoError(t, err) assert.Equal(t, 500*time.Millisecond, m.conf.Every) - assert.Equal(t, 5*time.Second, m.conf.GC) - assert.Equal(t, 10*time.Minute, m.conf.MaxAge) } func TestNew_NilLogger(t *testing.T) { @@ -56,18 +49,13 @@ func TestNew_BelowMinimumConfig(t *testing.T) { logger := newTestLogger() cfg := config.PerformanceConfig{ - Enable: true, - Every: 1 * time.Millisecond, - GC: 100 * time.Millisecond, - MaxAge: 1 * time.Second, + Every: 1 * time.Millisecond, } m, err := New(cfg, logger) require.NoError(t, err) assert.Equal(t, 100*time.Millisecond, m.conf.Every) - assert.Equal(t, 1*time.Second, m.conf.GC) - assert.Equal(t, 1*time.Minute, m.conf.MaxAge) } func TestSubscribe_ReturnsChannels(t *testing.T) { diff --git a/llama-swap.go b/llama-swap.go index 3c3055f..643db5e 100644 --- a/llama-swap.go +++ b/llama-swap.go @@ -93,12 +93,17 @@ func main() { listenStr = &defaultPort } - mon, err := perf.New(conf.Performance, mainLogger) - if err != nil { - mainLogger.Errorf("failed to create monitor: %s", err.Error()) - os.Exit(1) + var mon *perf.Monitor + if !conf.Performance.Disabled { + mon, err = perf.New(conf.Performance, mainLogger) + if err != nil { + mainLogger.Errorf("failed to create monitor: %s", err.Error()) + os.Exit(1) + } + mon.Start() + } else { + mainLogger.Info("performance monitoring is disabled") } - mon.Start() // Setup channels for server management exitChan := make(chan struct{}) @@ -108,7 +113,7 @@ func main() { // Context that bounds the lifetime of background watcher goroutines. watcherCtx, watcherCancel := context.WithCancel(context.Background()) - // Create server with initial handler + // Create server with initial handlergit srv := &http.Server{ Addr: *listenStr, } @@ -140,7 +145,9 @@ func main() { mainLogger.Debug("Configuration Changed") currentPM.Shutdown() - mon.UpdateConfig(conf.Performance) + if mon != nil { + mon.UpdateConfig(conf.Performance) + } newPM := proxy.New(conf) newPM.SetVersion(date, commit, version) newPM.SetPerfMonitor(mon) @@ -197,7 +204,9 @@ func main() { reloadProxyManager() case syscall.SIGINT, syscall.SIGTERM: mainLogger.Debugf("Received signal %v, shutting down...", sig) - mon.Stop() + if mon != nil { + mon.Stop() + } watcherCancel() ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() diff --git a/proxy/config/config.go b/proxy/config/config.go index 4549bf1..e6ff5f1 100644 --- a/proxy/config/config.go +++ b/proxy/config/config.go @@ -223,11 +223,8 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { } // Apply defaults for performance config when section is missing - if !config.Performance.Enable && config.Performance.Every == 0 && config.Performance.MaxAge == 0 && config.Performance.GC == 0 { - config.Performance.Enable = true - config.Performance.Every = 15 * time.Second - config.Performance.MaxAge = 1 * time.Hour - config.Performance.GC = 5 * time.Minute + if config.Performance.Every == 0 { + config.Performance.Every = 5 * time.Second } if err = config.Performance.Validate(); err != nil { return Config{}, fmt.Errorf("performance: %w", err) diff --git a/proxy/config/config_posix_test.go b/proxy/config/config_posix_test.go index 5c35c31..f5d0070 100644 --- a/proxy/config/config_posix_test.go +++ b/proxy/config/config_posix_test.go @@ -231,10 +231,7 @@ groups: MetricsMaxInMemory: 1000, CaptureBuffer: 5, Performance: PerformanceConfig{ - Enable: true, - Every: 15 * time.Second, - MaxAge: 1 * time.Hour, - GC: 5 * time.Minute, + Every: 5 * time.Second, }, Profiles: map[string][]string{ "test": {"model1", "model2"}, diff --git a/proxy/config/config_windows_test.go b/proxy/config/config_windows_test.go index f4372be..1777bdf 100644 --- a/proxy/config/config_windows_test.go +++ b/proxy/config/config_windows_test.go @@ -220,10 +220,7 @@ groups: MetricsMaxInMemory: 1000, CaptureBuffer: 5, Performance: PerformanceConfig{ - Enable: true, - Every: 15 * time.Second, - MaxAge: 1 * time.Hour, - GC: 5 * time.Minute, + Every: 5 * time.Second, }, Profiles: map[string][]string{ "test": {"model1", "model2"}, diff --git a/proxy/config/performance.go b/proxy/config/performance.go index 8144aca..4d4155e 100644 --- a/proxy/config/performance.go +++ b/proxy/config/performance.go @@ -7,19 +7,14 @@ import ( // PerformanceConfig holds configuration for system performance monitoring type PerformanceConfig struct { - Enable bool `yaml:"enable"` - Every time.Duration `yaml:"every"` - MaxAge time.Duration `yaml:"maxAge"` - GC time.Duration `yaml:"gc"` + Disabled bool `yaml:"disabled"` + Every time.Duration `yaml:"every"` } func (p *PerformanceConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type rawPerformanceConfig PerformanceConfig defaults := rawPerformanceConfig{ - Enable: true, - Every: 15 * time.Second, - MaxAge: 1 * time.Hour, - GC: 5 * time.Minute, + Every: 5 * time.Second, } if err := unmarshal(&defaults); err != nil { @@ -32,14 +27,8 @@ func (p *PerformanceConfig) UnmarshalYAML(unmarshal func(interface{}) error) err // Validate checks the PerformanceConfig values and returns an error if invalid func (p *PerformanceConfig) Validate() error { - if p.Every < time.Second { - return fmt.Errorf("every must be at least 1s, got %v", p.Every) - } - if p.MaxAge <= 0 { - return fmt.Errorf("maxAge must be greater than 0, got %v", p.MaxAge) - } - if p.GC <= 0 { - return fmt.Errorf("gc must be greater than 0, got %v", p.GC) + if p.Every < 5*time.Second { + return fmt.Errorf("every must be at least 5s, got %v", p.Every) } return nil } diff --git a/proxy/config/performance_test.go b/proxy/config/performance_test.go index 1284a85..b54cf1e 100644 --- a/proxy/config/performance_test.go +++ b/proxy/config/performance_test.go @@ -18,10 +18,8 @@ models: assert.NoError(t, err) // When performance section is missing, defaults should be applied - assert.True(t, config.Performance.Enable) - assert.Equal(t, 15*time.Second, config.Performance.Every) - assert.Equal(t, 1*time.Hour, config.Performance.MaxAge) - assert.Equal(t, 5*time.Minute, config.Performance.GC) + assert.False(t, config.Performance.Disabled) + assert.Equal(t, 5*time.Second, config.Performance.Every) } func TestPerformanceConfig_CustomValues(t *testing.T) { @@ -29,8 +27,6 @@ func TestPerformanceConfig_CustomValues(t *testing.T) { performance: enable: true every: 30s - maxAge: 12h - gc: 10m models: model1: cmd: path/to/cmd --port ${PORT} @@ -38,16 +34,14 @@ models: config, err := LoadConfigFromReader(strings.NewReader(content)) assert.NoError(t, err) - assert.True(t, config.Performance.Enable) + assert.False(t, config.Performance.Disabled) assert.Equal(t, 30*time.Second, config.Performance.Every) - assert.Equal(t, 12*time.Hour, config.Performance.MaxAge) - assert.Equal(t, 10*time.Minute, config.Performance.GC) } func TestPerformanceConfig_Disabled(t *testing.T) { content := ` performance: - enable: false + disabled: true models: model1: cmd: path/to/cmd --port ${PORT} @@ -55,18 +49,15 @@ models: config, err := LoadConfigFromReader(strings.NewReader(content)) assert.NoError(t, err) - assert.False(t, config.Performance.Enable) + assert.True(t, config.Performance.Disabled) // Duration defaults should still apply - assert.Equal(t, 15*time.Second, config.Performance.Every) - assert.Equal(t, 1*time.Hour, config.Performance.MaxAge) - assert.Equal(t, 5*time.Minute, config.Performance.GC) + assert.Equal(t, 5*time.Second, config.Performance.Every) } func TestPerformanceConfig_PartialValues(t *testing.T) { content := ` performance: every: 10s - maxAge: 6h models: model1: cmd: path/to/cmd --port ${PORT} @@ -75,58 +66,27 @@ models: assert.NoError(t, err) // enable should default to true - assert.True(t, config.Performance.Enable) + assert.False(t, config.Performance.Disabled) assert.Equal(t, 10*time.Second, config.Performance.Every) - assert.Equal(t, 6*time.Hour, config.Performance.MaxAge) - // gc should use default - assert.Equal(t, 5*time.Minute, config.Performance.GC) } func TestPerformanceConfig_InvalidEvery(t *testing.T) { content := ` performance: - every: 500ms + every: 4s models: model1: cmd: path/to/cmd --port ${PORT} ` _, err := LoadConfigFromReader(strings.NewReader(content)) assert.Error(t, err) - assert.Contains(t, err.Error(), "every must be at least 1s") -} - -func TestPerformanceConfig_InvalidMaxAge(t *testing.T) { - content := ` -performance: - maxAge: 0s -models: - model1: - cmd: path/to/cmd --port ${PORT} -` - _, err := LoadConfigFromReader(strings.NewReader(content)) - assert.Error(t, err) - assert.Contains(t, err.Error(), "maxAge must be greater than 0") -} - -func TestPerformanceConfig_InvalidGC(t *testing.T) { - content := ` -performance: - gc: 0s -models: - model1: - cmd: path/to/cmd --port ${PORT} -` - _, err := LoadConfigFromReader(strings.NewReader(content)) - assert.Error(t, err) - assert.Contains(t, err.Error(), "gc must be greater than 0") + assert.Contains(t, err.Error(), "every must be at least 5s") } func TestPerformanceConfig_ComplexDurations(t *testing.T) { content := ` performance: every: 1m30s - maxAge: 2h10m - gc: 1m models: model1: cmd: path/to/cmd --port ${PORT} @@ -135,6 +95,4 @@ models: assert.NoError(t, err) assert.Equal(t, 90*time.Second, config.Performance.Every) - assert.Equal(t, (2*time.Hour)+(10*time.Minute), config.Performance.MaxAge) - assert.Equal(t, 1*time.Minute, config.Performance.GC) }