Add custom check endpoint

Replace previously hardcoded value for /health to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint.
This commit is contained in:
Benson Wong
2024-10-11 22:02:14 -07:00
committed by GitHub
5 changed files with 40 additions and 11 deletions

View File

@@ -30,6 +30,13 @@ models:
- "gpt-4o-mini" - "gpt-4o-mini"
- "gpt-3.5-turbo" - "gpt-3.5-turbo"
# wait for this path to return an HTTP 200 before serving requests
# defaults to /health to match llama.cpp
#
# use "none" to skip endpoint checking. This may cause requests to fail
# until the server is ready
checkEndpoint: "/custom-endpoint"
"qwen": "qwen":
# environment variables to pass to the command # environment variables to pass to the command
env: env:

View File

@@ -10,6 +10,10 @@ models:
# list of model name aliases this llama.cpp instance can serve # list of model name aliases this llama.cpp instance can serve
aliases: aliases:
- "gpt-4o-mini" - "gpt-4o-mini"
# check this path for a HTTP 200 response for the server to be ready
checkEndpoint: "/health"
"qwen": "qwen":
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
proxy: "http://127.0.0.1:8999" proxy: "http://127.0.0.1:8999"
@@ -24,6 +28,10 @@ models:
cmd: "build/simple-responder --port 8999" cmd: "build/simple-responder --port 8999"
proxy: "http://127.0.0.1:8999" proxy: "http://127.0.0.1:8999"
# use "none" to skip check. Caution this may cause some requests to fail
# until the upstream server is ready for traffic
checkEndpoint: "none"
# don't use this, just for testing if things are broken # don't use this, just for testing if things are broken
"broken": "broken":
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf" cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"

View File

@@ -25,7 +25,7 @@ func main() {
proxyManager := proxy.New(config) proxyManager := proxy.New(config)
http.HandleFunc("/", proxyManager.HandleFunc) http.HandleFunc("/", proxyManager.HandleFunc)
fmt.Println("llamagate listening on " + *listenStr) fmt.Println("llama-swap listening on " + *listenStr)
if err := http.ListenAndServe(*listenStr, nil); err != nil { if err := http.ListenAndServe(*listenStr, nil); err != nil {
fmt.Printf("Error starting server: %v\n", err) fmt.Printf("Error starting server: %v\n", err)
os.Exit(1) os.Exit(1)

View File

@@ -7,10 +7,11 @@ import (
) )
type ModelConfig struct { type ModelConfig struct {
Cmd string `yaml:"cmd"` Cmd string `yaml:"cmd"`
Proxy string `yaml:"proxy"` Proxy string `yaml:"proxy"`
Aliases []string `yaml:"aliases"` Aliases []string `yaml:"aliases"`
Env []string `yaml:"env"` Env []string `yaml:"env"`
CheckEndpoint string `yaml:"checkEndpoint"`
} }
type Config struct { type Config struct {

View File

@@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"net/url"
"os" "os"
"os/exec" "os/exec"
"strings" "strings"
@@ -89,11 +90,23 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
return fmt.Errorf("no upstream available to check /health") return fmt.Errorf("no upstream available to check /health")
} }
checkEndpoint := strings.TrimSpace(pm.currentConfig.CheckEndpoint)
if checkEndpoint == "none" {
return nil
}
// keep default behaviour
if checkEndpoint == "" {
checkEndpoint = "/health"
}
proxyTo := pm.currentConfig.Proxy proxyTo := pm.currentConfig.Proxy
maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout) maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout)
healthURL, err := url.JoinPath(proxyTo, checkEndpoint)
healthURL := proxyTo + "/health" if err != nil {
return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
}
client := &http.Client{} client := &http.Client{}
startTime := time.Now() startTime := time.Now()
@@ -112,12 +125,12 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
// if TCP dial can't connect any HTTP response after 5 seconds // if TCP dial can't connect any HTTP response after 5 seconds
// exit quickly. // exit quickly.
if time.Since(startTime) > 5*time.Second { if time.Since(startTime) > 5*time.Second {
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond") return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
} }
} }
if time.Since(startTime) >= maxDuration { if time.Since(startTime) >= maxDuration {
return fmt.Errorf("failed to check /healthy from: %s", healthURL) return fmt.Errorf("failed to check health from: %s", healthURL)
} }
time.Sleep(time.Second) time.Sleep(time.Second)
continue continue
@@ -127,7 +140,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
return nil return nil
} }
if time.Since(startTime) >= maxDuration { if time.Since(startTime) >= maxDuration {
return fmt.Errorf("failed to check /healthy from: %s", healthURL) return fmt.Errorf("failed to check health from: %s", healthURL)
} }
time.Sleep(time.Second) time.Sleep(time.Second)
} }