Include metrics from upstream chat requests (#361)

* proxy: refactor metrics recording - remove metrics_middleware.go as this wrapper is no longer needed. This also eliminiates double body parsing for the modelID - move metrics parsing to be part of MetricsMonitor - refactor how metrics are recording in ProxyManager - add MetricsMonitor tests - improve mem efficiency of processStreamingResponse - add benchmarks for MetricsMonitor.addMetrics - proxy: refactor MetricsMonitor to be more safe handling errors
2025-10-25 17:38:18 -07:00
parent d18dc26d01
commit e250e71e59
6 changed files with 939 additions and 289 deletions
--- a/proxy/proxymanager_test.go
+++ b/proxy/proxymanager_test.go
@@ -911,76 +911,6 @@ func TestProxyManager_FiltersStripParams(t *testing.T) {
 	// t.Logf("%v", response)
 }

-func TestProxyManager_MiddlewareWritesMetrics_NonStreaming(t *testing.T) {
-	config := config.AddDefaultGroupToConfig(config.Config{
-		HealthCheckTimeout: 15,
-		Models: map[string]config.ModelConfig{
-			"model1": getTestSimpleResponderConfig("model1"),
-		},
-		LogLevel: "error",
-	})
-
-	proxy := New(config)
-	defer proxy.StopProcesses(StopWaitForInflightRequest)
-
-	// Make a non-streaming request
-	reqBody := `{"model":"model1", "stream": false}`
-	req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
-	w := CreateTestResponseRecorder()
-
-	proxy.ServeHTTP(w, req)
-	assert.Equal(t, http.StatusOK, w.Code)
-
-	// Check that metrics were recorded
-	metrics := proxy.metricsMonitor.GetMetrics()
-	if !assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request") {
-		return
-	}
-
-	// Verify the last metric has the correct model
-	lastMetric := metrics[len(metrics)-1]
-	assert.Equal(t, "model1", lastMetric.Model)
-	assert.Equal(t, 25, lastMetric.InputTokens, "input tokens should be 25")
-	assert.Equal(t, 10, lastMetric.OutputTokens, "output tokens should be 10")
-	assert.Greater(t, lastMetric.TokensPerSecond, 0.0, "tokens per second should be greater than 0")
-	assert.Greater(t, lastMetric.DurationMs, 0, "duration should be greater than 0")
-}
-
-func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {
-	config := config.AddDefaultGroupToConfig(config.Config{
-		HealthCheckTimeout: 15,
-		Models: map[string]config.ModelConfig{
-			"model1": getTestSimpleResponderConfig("model1"),
-		},
-		LogLevel: "error",
-	})
-
-	proxy := New(config)
-	defer proxy.StopProcesses(StopWaitForInflightRequest)
-
-	// Make a streaming request
-	reqBody := `{"model":"model1", "stream": true}`
-	req := httptest.NewRequest("POST", "/v1/chat/completions?stream=true", bytes.NewBufferString(reqBody))
-	w := CreateTestResponseRecorder()
-
-	proxy.ServeHTTP(w, req)
-	assert.Equal(t, http.StatusOK, w.Code)
-
-	// Check that metrics were recorded
-	metrics := proxy.metricsMonitor.GetMetrics()
-	if !assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request") {
-		return
-	}
-
-	// Verify the last metric has the correct model
-	lastMetric := metrics[len(metrics)-1]
-	assert.Equal(t, "model1", lastMetric.Model)
-	assert.Equal(t, 25, lastMetric.InputTokens, "input tokens should be 25")
-	assert.Equal(t, 10, lastMetric.OutputTokens, "output tokens should be 10")
-	assert.Greater(t, lastMetric.TokensPerSecond, 0.0, "tokens per second should be greater than 0")
-	assert.Greater(t, lastMetric.DurationMs, 0, "duration should be greater than 0")
-}
-
 func TestProxyManager_HealthEndpoint(t *testing.T) {
 	config := config.AddDefaultGroupToConfig(config.Config{
 		HealthCheckTimeout: 15,