Add custom check endpoint

Replace previously hardcoded value for `/health` to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint.
2024-10-11 21:59:21 -07:00
parent 5a57688aa8
commit 8eb5b7b6c4
5 changed files with 40 additions and 11 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -10,6 +10,10 @@ models:
    # list of model name aliases this llama.cpp instance can serve
    aliases:
    - "gpt-4o-mini"
+
+    # check this path for a HTTP 200 response for the server to be ready
+    checkEndpoint: "/health"
+
  "qwen":
    cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
    proxy: "http://127.0.0.1:8999"
@@ -24,6 +28,10 @@ models:
    cmd: "build/simple-responder --port 8999"
    proxy: "http://127.0.0.1:8999"

+    # use "none" to skip check. Caution this may cause some requests to fail
+    # until the upstream server is ready for traffic
+    checkEndpoint: "none"
+
  # don't use this, just for testing if things are broken
  "broken":
    cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"