support /v1/embeddings (#4)

2024-12-17 17:23:26 -08:00
parent 7f45493a37
commit 6fe37c3abf
2 changed files with 15 additions and 0 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -26,6 +26,18 @@ models:
    aliases:
    - gpt-3.5-turbo
  "nomic":
    proxy: http://127.0.0.1:9005
    cmd: >
      models/llama-server-osx --port 9005
      -m models/nomic-embed-text-v1.5.Q8_0.gguf
      --ctx-size 8192
      --batch-size 8192
      --rope-scaling yarn
      --rope-freq-scale 0.75
      -ngl 99
      --embeddings
  "simple":
    # example of setting environment variables
    env:
--- a/proxy/proxymanager.go
+++ b/proxy/proxymanager.go
@@ -49,6 +49,9 @@ func New(config *Config) *ProxyManager {
 	// Set up routes using the Gin engine
 	pm.ginEngine.POST("/v1/chat/completions", pm.proxyChatRequestHandler)
 	// Support embeddings
 	pm.ginEngine.POST("/v1/embeddings", pm.proxyChatRequestHandler)
 	// Support legacy /v1/completions api, see issue #12
 	pm.ginEngine.POST("/v1/completions", pm.proxyChatRequestHandler)