Support multiline cmds in YAML configuration
Add support for multiline `cmd` configurations allowing for nicer looking configuration YAML files.
This commit is contained in:
@@ -4,36 +4,38 @@ healthCheckTimeout: 60
|
||||
|
||||
models:
|
||||
"llama":
|
||||
cmd: "models/llama-server-osx --port 8999 -m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf"
|
||||
proxy: "http://127.0.0.1:8999"
|
||||
cmd: >
|
||||
models/llama-server-osx
|
||||
--port 8999
|
||||
-m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
|
||||
# list of model name aliases this llama.cpp instance can serve
|
||||
aliases:
|
||||
- "gpt-4o-mini"
|
||||
- gpt-4o-mini
|
||||
|
||||
# check this path for a HTTP 200 response for the server to be ready
|
||||
checkEndpoint: "/health"
|
||||
checkEndpoint: /health
|
||||
|
||||
"qwen":
|
||||
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
|
||||
proxy: "http://127.0.0.1:8999"
|
||||
cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
aliases:
|
||||
- "gpt-3.5-turbo"
|
||||
- gpt-3.5-turbo
|
||||
|
||||
"simple":
|
||||
# example of setting environment variables
|
||||
env:
|
||||
- "CUDA_VISIBLE_DEVICES=0,1"
|
||||
- "env1=hello"
|
||||
cmd: "build/simple-responder --port 8999"
|
||||
proxy: "http://127.0.0.1:8999"
|
||||
- CUDA_VISIBLE_DEVICES=0,1
|
||||
- env1=hello
|
||||
cmd: build/simple-responder --port 8999
|
||||
proxy: http://127.0.0.1:8999
|
||||
|
||||
# use "none" to skip check. Caution this may cause some requests to fail
|
||||
# until the upstream server is ready for traffic
|
||||
checkEndpoint: "none"
|
||||
checkEndpoint: none
|
||||
|
||||
# don't use this, just for testing if things are broken
|
||||
"broken":
|
||||
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
|
||||
proxy: "http://127.0.0.1:8999"
|
||||
|
||||
cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
Reference in New Issue
Block a user