Add support for sending a custom model name to upstream (#69) (#71)

* add test for splitRequestedModel() * Add `useModelName` parameter to model configuration * add docs to README
2025-03-14 21:07:52 -07:00
parent 671c1a5a7b
commit 5c97299e7b
4 changed files with 146 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -117,6 +117,13 @@ models:
      ghcr.io/ggerganov/llama.cpp:server
      --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'

+  # `useModelName` will send a specific model name to the upstream server
+  # overriding whatever was set in the request
+  "qwq":
+    proxy: http://127.0.0.1:11434
+    cmd: my-server
+    useModelName: "qwen:qwq"
+
 # profiles make it easy to managing multi model (and gpu) configurations.
 #
 # Tips: