Fix token metrics parsing (#199)
Fix #198 - use llama-server's `timings` info if available in response body - send "-1" for token/sec when not able to accurately calculate performance - optimize streaming body search for metrics information
This commit is contained in:
@@ -708,7 +708,9 @@ func TestProxyManager_MiddlewareWritesMetrics_NonStreaming(t *testing.T) {
|
||||
|
||||
// Check that metrics were recorded
|
||||
metrics := proxy.metricsMonitor.GetMetrics()
|
||||
assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request")
|
||||
if !assert.NotEmpty(t, metrics, "metrics should be recorded for non-streaming request") {
|
||||
return
|
||||
}
|
||||
|
||||
// Verify the last metric has the correct model
|
||||
lastMetric := metrics[len(metrics)-1]
|
||||
@@ -741,7 +743,9 @@ func TestProxyManager_MiddlewareWritesMetrics_Streaming(t *testing.T) {
|
||||
|
||||
// Check that metrics were recorded
|
||||
metrics := proxy.metricsMonitor.GetMetrics()
|
||||
assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request")
|
||||
if !assert.NotEmpty(t, metrics, "metrics should be recorded for streaming request") {
|
||||
return
|
||||
}
|
||||
|
||||
// Verify the last metric has the correct model
|
||||
lastMetric := metrics[len(metrics)-1]
|
||||
|
||||
Reference in New Issue
Block a user