support /v1/embeddings (#4)

This commit is contained in:
Benson Wong
2024-12-17 17:23:26 -08:00
parent 7f45493a37
commit 6fe37c3abf
2 changed files with 15 additions and 0 deletions

View File

@@ -26,6 +26,18 @@ models:
aliases: aliases:
- gpt-3.5-turbo - gpt-3.5-turbo
"nomic":
proxy: http://127.0.0.1:9005
cmd: >
models/llama-server-osx --port 9005
-m models/nomic-embed-text-v1.5.Q8_0.gguf
--ctx-size 8192
--batch-size 8192
--rope-scaling yarn
--rope-freq-scale 0.75
-ngl 99
--embeddings
"simple": "simple":
# example of setting environment variables # example of setting environment variables
env: env:

View File

@@ -49,6 +49,9 @@ func New(config *Config) *ProxyManager {
// Set up routes using the Gin engine // Set up routes using the Gin engine
pm.ginEngine.POST("/v1/chat/completions", pm.proxyChatRequestHandler) pm.ginEngine.POST("/v1/chat/completions", pm.proxyChatRequestHandler)
// Support embeddings
pm.ginEngine.POST("/v1/embeddings", pm.proxyChatRequestHandler)
// Support legacy /v1/completions api, see issue #12 // Support legacy /v1/completions api, see issue #12
pm.ginEngine.POST("/v1/completions", pm.proxyChatRequestHandler) pm.ginEngine.POST("/v1/completions", pm.proxyChatRequestHandler)