Fix token metrics parsing (#199)

Fix #198 - use llama-server's `timings` info if available in response body - send "-1" for token/sec when not able to accurately calculate performance - optimize streaming body search for metrics information
2025-07-22 23:10:14 -07:00
parent accd65294b
commit 01d4838fb3
4 changed files with 79 additions and 45 deletions
--- a/proxy/proxymanager_test.go
+++ b/proxy/proxymanager_test.go
@@ -708,7 +708,9 @@ func TestProxyManager_MiddlewareWritesMetrics_NonStreaming(t *testing.T) {

 	// Check that metrics were recorded
 	metrics := proxy.metricsMonitor.GetMetrics()
-	assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request")
+	if !assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request") {
+		return
+	}

 	// Verify the last metric has the correct model
 	lastMetric := metrics[len(metrics)-1]
@@ -741,7 +743,9 @@ func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {

 	// Check that metrics were recorded
 	metrics := proxy.metricsMonitor.GetMetrics()
-	assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request")
+	if !assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request") {
+		return
+	}

 	// Verify the last metric has the correct model
 	lastMetric := metrics[len(metrics)-1]