From 6fe37c3abf35577ad43a9b4ddc3efab4bec06086 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Tue, 17 Dec 2024 17:23:26 -0800 Subject: [PATCH] support /v1/embeddings (#4) --- config.example.yaml | 12 ++++++++++++ proxy/proxymanager.go | 3 +++ 2 files changed, 15 insertions(+) diff --git a/config.example.yaml b/config.example.yaml index 093ff9e..b8cd5e8 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -26,6 +26,18 @@ models: aliases: - gpt-3.5-turbo + "nomic": + proxy: http://127.0.0.1:9005 + cmd: > + models/llama-server-osx --port 9005 + -m models/nomic-embed-text-v1.5.Q8_0.gguf + --ctx-size 8192 + --batch-size 8192 + --rope-scaling yarn + --rope-freq-scale 0.75 + -ngl 99 + --embeddings + "simple": # example of setting environment variables env: diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index b418243..e7f8c00 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -49,6 +49,9 @@ func New(config *Config) *ProxyManager { // Set up routes using the Gin engine pm.ginEngine.POST("/v1/chat/completions", pm.proxyChatRequestHandler) + // Support embeddings + pm.ginEngine.POST("/v1/embeddings", pm.proxyChatRequestHandler) + // Support legacy /v1/completions api, see issue #12 pm.ginEngine.POST("/v1/completions", pm.proxyChatRequestHandler)