tidy up config examples in README

2025-03-15 10:36:45 -07:00
parent 5c97299e7b
commit a3f82c140b
1 changed files with 13 additions and 20 deletions
--- a/README.md
+++ b/README.md
@@ -70,7 +70,14 @@ logRequests: true
 # define valid model values and the upstream server start
 models:
  "llama":
-    cmd: llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf
+    # multiline for readability
    cmd: >
      llama-server --port 8999
      --model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
    # environment variables to pass to the command
    env:
      - "CUDA_VISIBLE_DEVICES=0"
    # where to reach the server started by cmd, make sure the ports match
    proxy: http://127.0.0.1:8999
@@ -91,16 +98,9 @@ models:
    # default: 0 = never unload model
    ttl: 60
-  "qwen":
+    # `useModelName` overrides the model name in the request
-    # environment variables to pass to the command
+    # and sends a specific name to the upstream server
-    env:
+    useModelName: "qwen:qwq"
      - "CUDA_VISIBLE_DEVICES=0"
    # multiline for readability
    cmd: >
      llama-server --port 8999
      --model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
    proxy: http://127.0.0.1:8999
  # unlisted models do not show up in /v1/models or /upstream lists
  # but they can still be requested as normal
@@ -117,14 +117,7 @@ models:
      ghcr.io/ggerganov/llama.cpp:server
      --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
-  # `useModelName` will send a specific model name to the upstream server
+# profiles eliminates swapping by running multiple models at the same time
  # overriding whatever was set in the request
  "qwq":
    proxy: http://127.0.0.1:11434
    cmd: my-server
    useModelName: "qwen:qwq"
 # profiles make it easy to managing multi model (and gpu) configurations.
 #
 # Tips:
 #  - each model must be listening on a unique address and port
@@ -132,8 +125,8 @@ models:
 #  - the profile will load and unload all models in the profile at the same time
 profiles:
  coding:
    - "qwen"
    - "llama"
    - "qwen-unlisted"
 ```
 ### Use Case Examples