Support llama.cpp's cache_n in timings info (#287)

Capture prompt cache metrics and surface them on Activities page in UI
This commit is contained in:
Benson Wong
2025-09-06 13:58:02 -07:00
committed by GitHub
parent 954e2dee73
commit f58c8c8ec5
4 changed files with 72 additions and 11 deletions

View File

@@ -13,6 +13,7 @@ type TokenMetrics struct {
ID int `json:"id"`
Timestamp time.Time `json:"timestamp"`
Model string `json:"model"`
CachedTokens int `json:"cache_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
PromptPerSecond float64 `json:"prompt_per_second"`
@@ -61,7 +62,6 @@ func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) {
if len(mp.metrics) > mp.maxMetrics {
mp.metrics = mp.metrics[len(mp.metrics)-mp.maxMetrics:]
}
event.Emit(TokenMetricsEvent{Metrics: metric})
}