Add custom check endpoint

Replace previously hardcoded value for /health to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint.
This commit is contained in:
Benson Wong
2024-10-11 22:02:14 -07:00
committed by GitHub
5 changed files with 40 additions and 11 deletions

View File

@@ -30,6 +30,13 @@ models:
- "gpt-4o-mini"
- "gpt-3.5-turbo"
# wait for this path to return an HTTP 200 before serving requests
# defaults to /health to match llama.cpp
#
# use "none" to skip endpoint checking. This may cause requests to fail
# until the server is ready
checkEndpoint: "/custom-endpoint"
"qwen":
# environment variables to pass to the command
env:

View File

@@ -10,6 +10,10 @@ models:
# list of model name aliases this llama.cpp instance can serve
aliases:
- "gpt-4o-mini"
# check this path for a HTTP 200 response for the server to be ready
checkEndpoint: "/health"
"qwen":
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
proxy: "http://127.0.0.1:8999"
@@ -24,6 +28,10 @@ models:
cmd: "build/simple-responder --port 8999"
proxy: "http://127.0.0.1:8999"
# use "none" to skip check. Caution this may cause some requests to fail
# until the upstream server is ready for traffic
checkEndpoint: "none"
# don't use this, just for testing if things are broken
"broken":
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"

View File

@@ -25,7 +25,7 @@ func main() {
proxyManager := proxy.New(config)
http.HandleFunc("/", proxyManager.HandleFunc)
fmt.Println("llamagate listening on " + *listenStr)
fmt.Println("llama-swap listening on " + *listenStr)
if err := http.ListenAndServe(*listenStr, nil); err != nil {
fmt.Printf("Error starting server: %v\n", err)
os.Exit(1)

View File

@@ -11,6 +11,7 @@ type ModelConfig struct {
Proxy string `yaml:"proxy"`
Aliases []string `yaml:"aliases"`
Env []string `yaml:"env"`
CheckEndpoint string `yaml:"checkEndpoint"`
}
type Config struct {

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"os"
"os/exec"
"strings"
@@ -89,11 +90,23 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
return fmt.Errorf("no upstream available to check /health")
}
checkEndpoint := strings.TrimSpace(pm.currentConfig.CheckEndpoint)
if checkEndpoint == "none" {
return nil
}
// keep default behaviour
if checkEndpoint == "" {
checkEndpoint = "/health"
}
proxyTo := pm.currentConfig.Proxy
maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout)
healthURL := proxyTo + "/health"
healthURL, err := url.JoinPath(proxyTo, checkEndpoint)
if err != nil {
return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
}
client := &http.Client{}
startTime := time.Now()
@@ -112,12 +125,12 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
// if TCP dial can't connect any HTTP response after 5 seconds
// exit quickly.
if time.Since(startTime) > 5*time.Second {
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
}
}
if time.Since(startTime) >= maxDuration {
return fmt.Errorf("failed to check /healthy from: %s", healthURL)
return fmt.Errorf("failed to check health from: %s", healthURL)
}
time.Sleep(time.Second)
continue
@@ -127,7 +140,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
return nil
}
if time.Since(startTime) >= maxDuration {
return fmt.Errorf("failed to check /healthy from: %s", healthURL)
return fmt.Errorf("failed to check health from: %s", healthURL)
}
time.Sleep(time.Second)
}