Support multiline cmds in YAML configuration

Add support for multiline `cmd` configurations allowing for nicer looking configuration YAML files.
2024-10-19 20:06:59 -07:00
parent 6cf0962807
commit be82d1a6a0
6 changed files with 185 additions and 18 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -4,36 +4,38 @@ healthCheckTimeout: 60

 models:
  "llama":
-    cmd: "models/llama-server-osx --port 8999 -m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf"
-    proxy: "http://127.0.0.1:8999"
+    cmd: >
+      models/llama-server-osx
+      --port 8999
+      -m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
+    proxy: http://127.0.0.1:8999

    # list of model name aliases this llama.cpp instance can serve
    aliases:
-    - "gpt-4o-mini"
+    - gpt-4o-mini

    # check this path for a HTTP 200 response for the server to be ready
-    checkEndpoint: "/health"
+    checkEndpoint: /health

  "qwen":
-    cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
-    proxy: "http://127.0.0.1:8999"
+    cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
+    proxy: http://127.0.0.1:8999
    aliases:
-    - "gpt-3.5-turbo"
+    - gpt-3.5-turbo

  "simple":
    # example of setting environment variables
    env:
-      - "CUDA_VISIBLE_DEVICES=0,1"
-      - "env1=hello"
-    cmd: "build/simple-responder --port 8999"
-    proxy: "http://127.0.0.1:8999"
+      - CUDA_VISIBLE_DEVICES=0,1
+      - env1=hello
+    cmd: build/simple-responder --port 8999
+    proxy: http://127.0.0.1:8999

    # use "none" to skip check. Caution this may cause some requests to fail
    # until the upstream server is ready for traffic
-    checkEndpoint: "none"
+    checkEndpoint: none

  # don't use this, just for testing if things are broken
  "broken":
-    cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
-    proxy: "http://127.0.0.1:8999"
-
+    cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
+    proxy: http://127.0.0.1:8999