diff --git a/ui/package-lock.json b/ui/package-lock.json index 428137d..878cec7 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -3975,9 +3975,9 @@ } }, "node_modules/vite": { - "version": "6.3.5", - "resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz", - "integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==", + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz", + "integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==", "dev": true, "license": "MIT", "dependencies": { diff --git a/ui/src/pages/Models.tsx b/ui/src/pages/Models.tsx index 7a14390..c9793bd 100644 --- a/ui/src/pages/Models.tsx +++ b/ui/src/pages/Models.tsx @@ -191,42 +191,300 @@ function ModelsPanel() { ); } +interface HistogramData { + bins: number[]; + min: number; + max: number; + binSize: number; + p99: number; + p95: number; + p50: number; +} + +function TokenHistogram({ data }: { data: HistogramData }) { + const { bins, min, max, p50, p95, p99 } = data; + const maxCount = Math.max(...bins); + + const height = 120; + const padding = { top: 10, right: 15, bottom: 25, left: 45 }; + + // Use viewBox for responsive sizing + const viewBoxWidth = 600; + const chartWidth = viewBoxWidth - padding.left - padding.right; + const chartHeight = height - padding.top - padding.bottom; + + const barWidth = chartWidth / bins.length; + const range = max - min; + + // Calculate x position for a given value + const getXPosition = (value: number) => { + return padding.left + ((value - min) / range) * chartWidth; + }; + + return ( +
+ + {/* Y-axis */} + + + {/* X-axis */} + + + {/* Histogram bars */} + {bins.map((count, i) => { + const barHeight = maxCount > 0 ? (count / maxCount) * chartHeight : 0; + const x = padding.left + i * barWidth; + const y = height - padding.bottom - barHeight; + const binStart = min + i * data.binSize; + const binEnd = binStart + data.binSize; + + return ( + + + {`${binStart.toFixed(1)} - ${binEnd.toFixed(1)} tokens/sec\nCount: ${count}`} + + ); + })} + + {/* Percentile lines */} + + + + + + + {/* X-axis labels */} + + {min.toFixed(1)} + + + + {max.toFixed(1)} + + + {/* X-axis label */} + + Tokens/Second Distribution + + +
+ ); +} + function StatsPanel() { const { metrics } = useAPI(); - const [totalRequests, totalInputTokens, totalOutputTokens, avgTokensPerSecond] = useMemo(() => { + const [totalRequests, totalInputTokens, totalOutputTokens, tokenStats, histogramData] = useMemo(() => { const totalRequests = metrics.length; if (totalRequests === 0) { - return [0, 0, 0]; + return [0, 0, 0, { p99: 0, p95: 0, p50: 0 }, null]; } const totalInputTokens = metrics.reduce((sum, m) => sum + m.input_tokens, 0); const totalOutputTokens = metrics.reduce((sum, m) => sum + m.output_tokens, 0); - const avgTokensPerSecond = (metrics.reduce((sum, m) => sum + m.tokens_per_second, 0) / totalRequests).toFixed(2); - return [totalRequests, totalInputTokens, totalOutputTokens, avgTokensPerSecond]; + + // Calculate token statistics using output_tokens and duration_ms + // Filter out metrics with invalid duration or output tokens + const validMetrics = metrics.filter((m) => m.duration_ms > 0 && m.output_tokens > 0); + if (validMetrics.length === 0) { + return [totalRequests, totalInputTokens, totalOutputTokens, { p99: 0, p95: 0, p50: 0 }, null]; + } + + // Calculate tokens/second for each valid metric + const tokensPerSecond = validMetrics.map((m) => m.output_tokens / (m.duration_ms / 1000)); + + // Sort for percentile calculation + const sortedTokensPerSecond = [...tokensPerSecond].sort((a, b) => a - b); + + // Calculate percentiles - showing speed thresholds where X% of requests are SLOWER (below) + // P99: 99% of requests are slower than this speed (99th percentile - fast requests) + // P95: 95% of requests are slower than this speed (95th percentile) + // P50: 50% of requests are slower than this speed (median) + const p99 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.99)]; + const p95 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.95)]; + const p50 = sortedTokensPerSecond[Math.floor(sortedTokensPerSecond.length * 0.5)]; + + // Create histogram data + const min = Math.min(...tokensPerSecond); + const max = Math.max(...tokensPerSecond); + const binCount = Math.min(30, Math.max(10, Math.floor(tokensPerSecond.length / 5))); // Adaptive bin count + const binSize = (max - min) / binCount; + + const bins = Array(binCount).fill(0); + tokensPerSecond.forEach((value) => { + const binIndex = Math.min(Math.floor((value - min) / binSize), binCount - 1); + bins[binIndex]++; + }); + + const histogramData = { + bins, + min, + max, + binSize, + p99, + p95, + p50, + }; + + return [ + totalRequests, + totalInputTokens, + totalOutputTokens, + { + p99: p99.toFixed(2), + p95: p95.toFixed(2), + p50: p50.toFixed(2), + }, + histogramData, + ]; }, [metrics]); + const nf = new Intl.NumberFormat(); + return (
-
- - - - - - - +
+
RequestsProcessedGeneratedTokens/Sec
+ + + + + + - - - - + + + + - + + -
+ Requests + + Processed + + Generated + + Token Stats (tokens/sec) +
{totalRequests} - {new Intl.NumberFormat().format(totalInputTokens)} + +
{totalRequests} +
+ {nf.format(totalInputTokens)} + tokens +
- {new Intl.NumberFormat().format(totalOutputTokens)} + + +
+ {nf.format(totalOutputTokens)} + tokens +
+
+
+
+
+
P50
+
+ {tokenStats.p50} +
+
+ +
+
P95
+
+ {tokenStats.p95} +
+
+ +
+
P99
+
+ {tokenStats.p99} +
+
+
+ {histogramData && } +
{avgTokensPerSecond}