From 8eb5b7b6c4255e0c9c627f7039d4102a1226bba9 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Fri, 11 Oct 2024 21:59:21 -0700 Subject: [PATCH] Add custom check endpoint Replace previously hardcoded value for `/health` to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint. --- README.md | 7 +++++++ config.example.yaml | 8 ++++++++ llama-swap.go | 2 +- proxy/config.go | 9 +++++---- proxy/manager.go | 25 +++++++++++++++++++------ 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d8690e4..1890219 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,13 @@ models: - "gpt-4o-mini" - "gpt-3.5-turbo" + # wait for this path to return an HTTP 200 before serving requests + # defaults to /health to match llama.cpp + # + # use "none" to skip endpoint checking. This may cause requests to fail + # until the server is ready + checkEndpoint: "/custom-endpoint" + "qwen": # environment variables to pass to the command env: diff --git a/config.example.yaml b/config.example.yaml index 8dc9086..9fedd0b 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -10,6 +10,10 @@ models: # list of model name aliases this llama.cpp instance can serve aliases: - "gpt-4o-mini" + + # check this path for a HTTP 200 response for the server to be ready + checkEndpoint: "/health" + "qwen": cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" proxy: "http://127.0.0.1:8999" @@ -24,6 +28,10 @@ models: cmd: "build/simple-responder --port 8999" proxy: "http://127.0.0.1:8999" + # use "none" to skip check. Caution this may cause some requests to fail + # until the upstream server is ready for traffic + checkEndpoint: "none" + # don't use this, just for testing if things are broken "broken": cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf" diff --git a/llama-swap.go b/llama-swap.go index 750e133..a3820fd 100644 --- a/llama-swap.go +++ b/llama-swap.go @@ -25,7 +25,7 @@ func main() { proxyManager := proxy.New(config) http.HandleFunc("/", proxyManager.HandleFunc) - fmt.Println("llamagate listening on " + *listenStr) + fmt.Println("llama-swap listening on " + *listenStr) if err := http.ListenAndServe(*listenStr, nil); err != nil { fmt.Printf("Error starting server: %v\n", err) os.Exit(1) diff --git a/proxy/config.go b/proxy/config.go index 106b992..e3cc213 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -7,10 +7,11 @@ import ( ) type ModelConfig struct { - Cmd string `yaml:"cmd"` - Proxy string `yaml:"proxy"` - Aliases []string `yaml:"aliases"` - Env []string `yaml:"env"` + Cmd string `yaml:"cmd"` + Proxy string `yaml:"proxy"` + Aliases []string `yaml:"aliases"` + Env []string `yaml:"env"` + CheckEndpoint string `yaml:"checkEndpoint"` } type Config struct { diff --git a/proxy/manager.go b/proxy/manager.go index 21adc30..ad028d6 100644 --- a/proxy/manager.go +++ b/proxy/manager.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "os/exec" "strings" @@ -89,11 +90,23 @@ func (pm *ProxyManager) checkHealthEndpoint() error { return fmt.Errorf("no upstream available to check /health") } + checkEndpoint := strings.TrimSpace(pm.currentConfig.CheckEndpoint) + + if checkEndpoint == "none" { + return nil + } + + // keep default behaviour + if checkEndpoint == "" { + checkEndpoint = "/health" + } + proxyTo := pm.currentConfig.Proxy - maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout) - - healthURL := proxyTo + "/health" + healthURL, err := url.JoinPath(proxyTo, checkEndpoint) + if err != nil { + return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint) + } client := &http.Client{} startTime := time.Now() @@ -112,12 +125,12 @@ func (pm *ProxyManager) checkHealthEndpoint() error { // if TCP dial can't connect any HTTP response after 5 seconds // exit quickly. if time.Since(startTime) > 5*time.Second { - return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond") + return fmt.Errorf("health check endpoint took more than 5 seconds to respond") } } if time.Since(startTime) >= maxDuration { - return fmt.Errorf("failed to check /healthy from: %s", healthURL) + return fmt.Errorf("failed to check health from: %s", healthURL) } time.Sleep(time.Second) continue @@ -127,7 +140,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error { return nil } if time.Since(startTime) >= maxDuration { - return fmt.Errorf("failed to check /healthy from: %s", healthURL) + return fmt.Errorf("failed to check health from: %s", healthURL) } time.Sleep(time.Second) }