Add custom check endpoint
Replace previously hardcoded value for /health to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint.
This commit is contained in:
@@ -30,6 +30,13 @@ models:
|
||||
- "gpt-4o-mini"
|
||||
- "gpt-3.5-turbo"
|
||||
|
||||
# wait for this path to return an HTTP 200 before serving requests
|
||||
# defaults to /health to match llama.cpp
|
||||
#
|
||||
# use "none" to skip endpoint checking. This may cause requests to fail
|
||||
# until the server is ready
|
||||
checkEndpoint: "/custom-endpoint"
|
||||
|
||||
"qwen":
|
||||
# environment variables to pass to the command
|
||||
env:
|
||||
|
||||
@@ -10,6 +10,10 @@ models:
|
||||
# list of model name aliases this llama.cpp instance can serve
|
||||
aliases:
|
||||
- "gpt-4o-mini"
|
||||
|
||||
# check this path for a HTTP 200 response for the server to be ready
|
||||
checkEndpoint: "/health"
|
||||
|
||||
"qwen":
|
||||
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
|
||||
proxy: "http://127.0.0.1:8999"
|
||||
@@ -24,6 +28,10 @@ models:
|
||||
cmd: "build/simple-responder --port 8999"
|
||||
proxy: "http://127.0.0.1:8999"
|
||||
|
||||
# use "none" to skip check. Caution this may cause some requests to fail
|
||||
# until the upstream server is ready for traffic
|
||||
checkEndpoint: "none"
|
||||
|
||||
# don't use this, just for testing if things are broken
|
||||
"broken":
|
||||
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
|
||||
|
||||
@@ -25,7 +25,7 @@ func main() {
|
||||
proxyManager := proxy.New(config)
|
||||
http.HandleFunc("/", proxyManager.HandleFunc)
|
||||
|
||||
fmt.Println("llamagate listening on " + *listenStr)
|
||||
fmt.Println("llama-swap listening on " + *listenStr)
|
||||
if err := http.ListenAndServe(*listenStr, nil); err != nil {
|
||||
fmt.Printf("Error starting server: %v\n", err)
|
||||
os.Exit(1)
|
||||
|
||||
@@ -7,10 +7,11 @@ import (
|
||||
)
|
||||
|
||||
type ModelConfig struct {
|
||||
Cmd string `yaml:"cmd"`
|
||||
Proxy string `yaml:"proxy"`
|
||||
Aliases []string `yaml:"aliases"`
|
||||
Env []string `yaml:"env"`
|
||||
Cmd string `yaml:"cmd"`
|
||||
Proxy string `yaml:"proxy"`
|
||||
Aliases []string `yaml:"aliases"`
|
||||
Env []string `yaml:"env"`
|
||||
CheckEndpoint string `yaml:"checkEndpoint"`
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
@@ -89,11 +90,23 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
return fmt.Errorf("no upstream available to check /health")
|
||||
}
|
||||
|
||||
checkEndpoint := strings.TrimSpace(pm.currentConfig.CheckEndpoint)
|
||||
|
||||
if checkEndpoint == "none" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// keep default behaviour
|
||||
if checkEndpoint == "" {
|
||||
checkEndpoint = "/health"
|
||||
}
|
||||
|
||||
proxyTo := pm.currentConfig.Proxy
|
||||
|
||||
maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout)
|
||||
|
||||
healthURL := proxyTo + "/health"
|
||||
healthURL, err := url.JoinPath(proxyTo, checkEndpoint)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
|
||||
}
|
||||
client := &http.Client{}
|
||||
startTime := time.Now()
|
||||
|
||||
@@ -112,12 +125,12 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
// if TCP dial can't connect any HTTP response after 5 seconds
|
||||
// exit quickly.
|
||||
if time.Since(startTime) > 5*time.Second {
|
||||
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
|
||||
return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
|
||||
}
|
||||
}
|
||||
|
||||
if time.Since(startTime) >= maxDuration {
|
||||
return fmt.Errorf("failed to check /healthy from: %s", healthURL)
|
||||
return fmt.Errorf("failed to check health from: %s", healthURL)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
@@ -127,7 +140,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
return nil
|
||||
}
|
||||
if time.Since(startTime) >= maxDuration {
|
||||
return fmt.Errorf("failed to check /healthy from: %s", healthURL)
|
||||
return fmt.Errorf("failed to check health from: %s", healthURL)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user