* proxy: refactor metrics recording - remove metrics_middleware.go as this wrapper is no longer needed. This also eliminiates double body parsing for the modelID - move metrics parsing to be part of MetricsMonitor - refactor how metrics are recording in ProxyManager - add MetricsMonitor tests - improve mem efficiency of processStreamingResponse - add benchmarks for MetricsMonitor.addMetrics - proxy: refactor MetricsMonitor to be more safe handling errors
694 lines
18 KiB
Go
694 lines
18 KiB
Go
package proxy
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/mostlygeek/llama-swap/event"
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
func TestMetricsMonitor_AddMetrics(t *testing.T) {
|
|
t.Run("adds metrics and assigns ID", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
metric := TokenMetrics{
|
|
Model: "test-model",
|
|
InputTokens: 100,
|
|
OutputTokens: 50,
|
|
}
|
|
|
|
mm.addMetrics(metric)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
assert.Equal(t, 0, metrics[0].ID)
|
|
assert.Equal(t, "test-model", metrics[0].Model)
|
|
assert.Equal(t, 100, metrics[0].InputTokens)
|
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
|
})
|
|
|
|
t.Run("increments ID for each metric", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
for i := 0; i < 5; i++ {
|
|
mm.addMetrics(TokenMetrics{Model: "model"})
|
|
}
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 5, len(metrics))
|
|
for i := 0; i < 5; i++ {
|
|
assert.Equal(t, i, metrics[i].ID)
|
|
}
|
|
})
|
|
|
|
t.Run("respects max metrics limit", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 3)
|
|
|
|
// Add 5 metrics
|
|
for i := 0; i < 5; i++ {
|
|
mm.addMetrics(TokenMetrics{
|
|
Model: "model",
|
|
InputTokens: i,
|
|
})
|
|
}
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 3, len(metrics))
|
|
|
|
// Should keep the last 3 metrics (IDs 2, 3, 4)
|
|
assert.Equal(t, 2, metrics[0].ID)
|
|
assert.Equal(t, 3, metrics[1].ID)
|
|
assert.Equal(t, 4, metrics[2].ID)
|
|
})
|
|
|
|
t.Run("emits TokenMetricsEvent", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
receivedEvent := make(chan TokenMetricsEvent, 1)
|
|
cancel := event.On(func(e TokenMetricsEvent) {
|
|
receivedEvent <- e
|
|
})
|
|
defer cancel()
|
|
|
|
metric := TokenMetrics{
|
|
Model: "test-model",
|
|
InputTokens: 100,
|
|
OutputTokens: 50,
|
|
}
|
|
|
|
mm.addMetrics(metric)
|
|
|
|
select {
|
|
case evt := <-receivedEvent:
|
|
assert.Equal(t, 0, evt.Metrics.ID)
|
|
assert.Equal(t, "test-model", evt.Metrics.Model)
|
|
assert.Equal(t, 100, evt.Metrics.InputTokens)
|
|
assert.Equal(t, 50, evt.Metrics.OutputTokens)
|
|
case <-time.After(1 * time.Second):
|
|
t.Fatal("timeout waiting for event")
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_GetMetrics(t *testing.T) {
|
|
t.Run("returns empty slice when no metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
metrics := mm.getMetrics()
|
|
assert.NotNil(t, metrics)
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("returns copy of metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
mm.addMetrics(TokenMetrics{Model: "model1"})
|
|
mm.addMetrics(TokenMetrics{Model: "model2"})
|
|
|
|
metrics1 := mm.getMetrics()
|
|
metrics2 := mm.getMetrics()
|
|
|
|
// Verify we got copies
|
|
assert.Equal(t, 2, len(metrics1))
|
|
assert.Equal(t, 2, len(metrics2))
|
|
|
|
// Modify the returned slice shouldn't affect the original
|
|
metrics1[0].Model = "modified"
|
|
metrics3 := mm.getMetrics()
|
|
assert.Equal(t, "model1", metrics3[0].Model)
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_GetMetricsJSON(t *testing.T) {
|
|
t.Run("returns valid JSON for empty metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
jsonData, err := mm.getMetricsJSON()
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, jsonData)
|
|
|
|
var metrics []TokenMetrics
|
|
err = json.Unmarshal(jsonData, &metrics)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("returns valid JSON with metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
mm.addMetrics(TokenMetrics{
|
|
Model: "model1",
|
|
InputTokens: 100,
|
|
OutputTokens: 50,
|
|
TokensPerSecond: 25.5,
|
|
})
|
|
mm.addMetrics(TokenMetrics{
|
|
Model: "model2",
|
|
InputTokens: 200,
|
|
OutputTokens: 100,
|
|
TokensPerSecond: 30.0,
|
|
})
|
|
|
|
jsonData, err := mm.getMetricsJSON()
|
|
assert.NoError(t, err)
|
|
|
|
var metrics []TokenMetrics
|
|
err = json.Unmarshal(jsonData, &metrics)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, 2, len(metrics))
|
|
assert.Equal(t, "model1", metrics[0].Model)
|
|
assert.Equal(t, "model2", metrics[1].Model)
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_WrapHandler(t *testing.T) {
|
|
t.Run("successful non-streaming request with usage data", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
responseBody := `{
|
|
"usage": {
|
|
"prompt_tokens": 100,
|
|
"completion_tokens": 50
|
|
}
|
|
}`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
assert.Equal(t, "test-model", metrics[0].Model)
|
|
assert.Equal(t, 100, metrics[0].InputTokens)
|
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
|
})
|
|
|
|
t.Run("successful request with timings data", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
responseBody := `{
|
|
"timings": {
|
|
"prompt_n": 100,
|
|
"predicted_n": 50,
|
|
"prompt_per_second": 150.5,
|
|
"predicted_per_second": 25.5,
|
|
"prompt_ms": 500.0,
|
|
"predicted_ms": 1500.0,
|
|
"cache_n": 20
|
|
}
|
|
}`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
assert.Equal(t, "test-model", metrics[0].Model)
|
|
assert.Equal(t, 100, metrics[0].InputTokens)
|
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
|
assert.Equal(t, 20, metrics[0].CachedTokens)
|
|
assert.Equal(t, 150.5, metrics[0].PromptPerSecond)
|
|
assert.Equal(t, 25.5, metrics[0].TokensPerSecond)
|
|
assert.Equal(t, 2000, metrics[0].DurationMs) // 500 + 1500
|
|
})
|
|
|
|
t.Run("streaming request with SSE format", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
// Note: SSE format requires proper line breaks - each data line followed by blank line
|
|
responseBody := `data: {"choices":[{"text":"Hello"}]}
|
|
|
|
data: {"choices":[{"text":" World"}]}
|
|
|
|
data: {"usage":{"prompt_tokens":10,"completion_tokens":20},"timings":{"prompt_n":10,"predicted_n":20,"prompt_per_second":100.0,"predicted_per_second":50.0,"prompt_ms":100.0,"predicted_ms":400.0}}
|
|
|
|
data: [DONE]
|
|
|
|
`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
assert.Equal(t, "test-model", metrics[0].Model)
|
|
// When timings data is present, it takes precedence
|
|
assert.Equal(t, 10, metrics[0].InputTokens)
|
|
assert.Equal(t, 20, metrics[0].OutputTokens)
|
|
})
|
|
|
|
t.Run("non-OK status code does not record metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
w.Write([]byte("error"))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("empty response body does not record metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.WriteHeader(http.StatusOK)
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("invalid JSON does not record metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte("not valid json"))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("next handler error is propagated", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
expectedErr := assert.AnError
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
return expectedErr
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.Equal(t, expectedErr, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("response without usage or timings does not record metrics", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
responseBody := `{"result": "ok"}`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_ResponseBodyCopier(t *testing.T) {
|
|
t.Run("captures response body", func(t *testing.T) {
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
copier := newBodyCopier(ginCtx.Writer)
|
|
|
|
testData := []byte("test response body")
|
|
n, err := copier.Write(testData)
|
|
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, len(testData), n)
|
|
assert.Equal(t, testData, copier.body.Bytes())
|
|
assert.Equal(t, string(testData), rec.Body.String())
|
|
})
|
|
|
|
t.Run("sets start time on first write", func(t *testing.T) {
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
copier := newBodyCopier(ginCtx.Writer)
|
|
|
|
assert.True(t, copier.StartTime().IsZero())
|
|
|
|
copier.Write([]byte("test"))
|
|
|
|
assert.False(t, copier.StartTime().IsZero())
|
|
})
|
|
|
|
t.Run("preserves headers", func(t *testing.T) {
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
copier := newBodyCopier(ginCtx.Writer)
|
|
|
|
copier.Header().Set("X-Test", "value")
|
|
|
|
assert.Equal(t, "value", rec.Header().Get("X-Test"))
|
|
})
|
|
|
|
t.Run("preserves status code", func(t *testing.T) {
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
copier := newBodyCopier(ginCtx.Writer)
|
|
|
|
copier.WriteHeader(http.StatusCreated)
|
|
|
|
// Gin's ResponseWriter tracks status internally
|
|
assert.Equal(t, http.StatusCreated, copier.Status())
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_Concurrent(t *testing.T) {
|
|
t.Run("concurrent addMetrics is safe", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 1000)
|
|
|
|
var wg sync.WaitGroup
|
|
numGoroutines := 10
|
|
metricsPerGoroutine := 100
|
|
|
|
for i := 0; i < numGoroutines; i++ {
|
|
wg.Add(1)
|
|
go func(id int) {
|
|
defer wg.Done()
|
|
for j := 0; j < metricsPerGoroutine; j++ {
|
|
mm.addMetrics(TokenMetrics{
|
|
Model: "test-model",
|
|
InputTokens: id*1000 + j,
|
|
OutputTokens: j,
|
|
})
|
|
}
|
|
}(i)
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, numGoroutines*metricsPerGoroutine, len(metrics))
|
|
})
|
|
|
|
t.Run("concurrent reads and writes are safe", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 100)
|
|
|
|
done := make(chan bool)
|
|
|
|
// Writer goroutine
|
|
go func() {
|
|
for i := 0; i < 50; i++ {
|
|
mm.addMetrics(TokenMetrics{Model: "test-model"})
|
|
time.Sleep(1 * time.Millisecond)
|
|
}
|
|
done <- true
|
|
}()
|
|
|
|
// Multiple reader goroutines
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < 5; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for j := 0; j < 20; j++ {
|
|
_ = mm.getMetrics()
|
|
_, _ = mm.getMetricsJSON()
|
|
time.Sleep(2 * time.Millisecond)
|
|
}
|
|
}()
|
|
}
|
|
|
|
<-done
|
|
wg.Wait()
|
|
|
|
// Final check
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 50, len(metrics))
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_ParseMetrics(t *testing.T) {
|
|
t.Run("prefers timings over usage data", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
// Timings should take precedence over usage
|
|
responseBody := `{
|
|
"usage": {
|
|
"prompt_tokens": 50,
|
|
"completion_tokens": 25
|
|
},
|
|
"timings": {
|
|
"prompt_n": 100,
|
|
"predicted_n": 50,
|
|
"prompt_per_second": 150.5,
|
|
"predicted_per_second": 25.5,
|
|
"prompt_ms": 500.0,
|
|
"predicted_ms": 1500.0
|
|
}
|
|
}`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
// Should use timings values, not usage values
|
|
assert.Equal(t, 100, metrics[0].InputTokens)
|
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
|
})
|
|
|
|
t.Run("handles missing cache_n in timings", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
responseBody := `{
|
|
"timings": {
|
|
"prompt_n": 100,
|
|
"predicted_n": 50,
|
|
"prompt_per_second": 150.5,
|
|
"predicted_per_second": 25.5,
|
|
"prompt_ms": 500.0,
|
|
"predicted_ms": 1500.0
|
|
}
|
|
}`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
assert.Equal(t, -1, metrics[0].CachedTokens) // Default value when not present
|
|
})
|
|
}
|
|
|
|
func TestMetricsMonitor_StreamingResponse(t *testing.T) {
|
|
t.Run("finds metrics in last valid SSE data", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
// Metrics should be found in the last data line before [DONE]
|
|
responseBody := `data: {"choices":[{"text":"First"}]}
|
|
|
|
data: {"choices":[{"text":"Second"}]}
|
|
|
|
data: {"usage":{"prompt_tokens":100,"completion_tokens":50}}
|
|
|
|
data: [DONE]
|
|
|
|
`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 1, len(metrics))
|
|
assert.Equal(t, 100, metrics[0].InputTokens)
|
|
assert.Equal(t, 50, metrics[0].OutputTokens)
|
|
})
|
|
|
|
t.Run("handles streaming with no valid JSON", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
responseBody := `data: not json
|
|
|
|
data: [DONE]
|
|
|
|
`
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
assert.NoError(t, err) // Errors after response is sent are logged, not returned
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
|
|
t.Run("handles empty streaming response", func(t *testing.T) {
|
|
mm := newMetricsMonitor(testLogger, 10)
|
|
|
|
responseBody := ``
|
|
|
|
nextHandler := func(modelID string, w http.ResponseWriter, r *http.Request) error {
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte(responseBody))
|
|
return nil
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/test", nil)
|
|
rec := httptest.NewRecorder()
|
|
ginCtx, _ := gin.CreateTestContext(rec)
|
|
|
|
err := mm.wrapHandler("test-model", ginCtx.Writer, req, nextHandler)
|
|
// Empty body should not trigger WrapHandler processing
|
|
assert.NoError(t, err)
|
|
|
|
metrics := mm.getMetrics()
|
|
assert.Equal(t, 0, len(metrics))
|
|
})
|
|
}
|
|
|
|
// Benchmark tests
|
|
func BenchmarkMetricsMonitor_AddMetrics(b *testing.B) {
|
|
mm := newMetricsMonitor(testLogger, 1000)
|
|
|
|
metric := TokenMetrics{
|
|
Model: "test-model",
|
|
CachedTokens: 100,
|
|
InputTokens: 500,
|
|
OutputTokens: 250,
|
|
PromptPerSecond: 1200.5,
|
|
TokensPerSecond: 45.8,
|
|
DurationMs: 5000,
|
|
Timestamp: time.Now(),
|
|
}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
mm.addMetrics(metric)
|
|
}
|
|
}
|
|
|
|
func BenchmarkMetricsMonitor_AddMetrics_SmallBuffer(b *testing.B) {
|
|
// Test performance with a smaller buffer where wrapping occurs more frequently
|
|
mm := newMetricsMonitor(testLogger, 100)
|
|
|
|
metric := TokenMetrics{
|
|
Model: "test-model",
|
|
CachedTokens: 100,
|
|
InputTokens: 500,
|
|
OutputTokens: 250,
|
|
PromptPerSecond: 1200.5,
|
|
TokensPerSecond: 45.8,
|
|
DurationMs: 5000,
|
|
Timestamp: time.Now(),
|
|
}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
mm.addMetrics(metric)
|
|
}
|
|
}
|