From 8c693e7fcf8dade4d5555b733571c232addf7259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABl=20James?= Date: Thu, 24 Jul 2025 17:32:47 +0200 Subject: [PATCH] Add endpoint aliases for reranking models (#201) * Add endpoint aliases for reranking models * Add MetricsMiddleware to the previous reranking endpoint * Fix the embeddings endpoint not having model set --- README.md | 2 +- proxy/proxymanager.go | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9d0bec3..088fda8 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Written in golang, it is very easy to install (single binary with no dependencie - `v1/completions` - `v1/chat/completions` - `v1/embeddings` - - `v1/rerank` + - `v1/rerank`, `v1/reranking`, `rerank` - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36)) - `v1/audio/transcriptions` ([docs](https://github.com/mostlygeek/llama-swap/issues/41#issuecomment-2722637867)) - ✅ llama-swap custom API endpoints diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index ff44930..d0f7713 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -160,8 +160,10 @@ func (pm *ProxyManager) setupGinEngine() { pm.ginEngine.POST("/v1/completions", mm, pm.proxyOAIHandler) // Support embeddings - pm.ginEngine.POST("/v1/embeddings", pm.proxyOAIHandler) - pm.ginEngine.POST("/v1/rerank", pm.proxyOAIHandler) + pm.ginEngine.POST("/v1/embeddings", mm, pm.proxyOAIHandler) + pm.ginEngine.POST("/v1/rerank", mm, pm.proxyOAIHandler) + pm.ginEngine.POST("/v1/reranking", mm, pm.proxyOAIHandler) + pm.ginEngine.POST("/rerank", mm, pm.proxyOAIHandler) // Support audio/speech endpoint pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)