Add prompt processing metrics (#250)
- capture prompt processing metrics - display prompt processing metrics on UI Activity page
This commit is contained in:
@@ -79,10 +79,12 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
|||||||
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
outputTokens := int(jsonData.Get("usage.completion_tokens").Int())
|
||||||
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
inputTokens := int(jsonData.Get("usage.prompt_tokens").Int())
|
||||||
tokensPerSecond := -1.0
|
tokensPerSecond := -1.0
|
||||||
|
promptPerSecond := -1.0
|
||||||
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
durationMs := int(time.Since(rec.startTime).Milliseconds())
|
||||||
|
|
||||||
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
// use llama-server's timing data for tok/sec and duration as it is more accurate
|
||||||
if timings := jsonData.Get("timings"); timings.Exists() {
|
if timings := jsonData.Get("timings"); timings.Exists() {
|
||||||
|
promptPerSecond = jsonData.Get("timings.prompt_per_second").Float()
|
||||||
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
tokensPerSecond = jsonData.Get("timings.predicted_per_second").Float()
|
||||||
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
|
||||||
}
|
}
|
||||||
@@ -92,6 +94,7 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
|
|||||||
Model: rec.realModelName,
|
Model: rec.realModelName,
|
||||||
InputTokens: inputTokens,
|
InputTokens: inputTokens,
|
||||||
OutputTokens: outputTokens,
|
OutputTokens: outputTokens,
|
||||||
|
PromptPerSecond: promptPerSecond,
|
||||||
TokensPerSecond: tokensPerSecond,
|
TokensPerSecond: tokensPerSecond,
|
||||||
DurationMs: durationMs,
|
DurationMs: durationMs,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ type TokenMetrics struct {
|
|||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
InputTokens int `json:"input_tokens"`
|
InputTokens int `json:"input_tokens"`
|
||||||
OutputTokens int `json:"output_tokens"`
|
OutputTokens int `json:"output_tokens"`
|
||||||
|
PromptPerSecond float64 `json:"prompt_per_second"`
|
||||||
TokensPerSecond float64 `json:"tokens_per_second"`
|
TokensPerSecond float64 `json:"tokens_per_second"`
|
||||||
DurationMs int `json:"duration_ms"`
|
DurationMs int `json:"duration_ms"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ interface Metrics {
|
|||||||
model: string;
|
model: string;
|
||||||
input_tokens: number;
|
input_tokens: number;
|
||||||
output_tokens: number;
|
output_tokens: number;
|
||||||
|
prompt_per_second: number;
|
||||||
tokens_per_second: number;
|
tokens_per_second: number;
|
||||||
duration_ms: number;
|
duration_ms: number;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ const ActivityPage = () => {
|
|||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Model</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Input Tokens</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
|
||||||
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Prompt Processing</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
|
||||||
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
|
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -62,6 +63,7 @@ const ActivityPage = () => {
|
|||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
|
||||||
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.prompt_per_second)}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.tokens_per_second)}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.tokens_per_second)}</td>
|
||||||
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatDuration(metric.duration_ms)}</td>
|
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatDuration(metric.duration_ms)}</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|||||||
Reference in New Issue
Block a user