Merge pull request #8 from mostlygeek/improve-upstream-monitoring-issue5
Improvements to handling of the upstream process so errors happen whenever one of these is first:
the health check timeout is reached waiting for the upstream process to be ready
the upstream process exits unexpectedly
With this change llama-swap is more compatible with use cases like containerized upstream services (#5) which pull the container before HTTP endpoints are ready.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# Seconds to wait for llama.cpp to be available to serve requests
|
||||
# Default (and minimum): 15 seconds
|
||||
healthCheckTimeout: 60
|
||||
healthCheckTimeout: 15
|
||||
|
||||
models:
|
||||
"llama":
|
||||
@@ -35,7 +35,10 @@ models:
|
||||
# until the upstream server is ready for traffic
|
||||
checkEndpoint: none
|
||||
|
||||
# don't use this, just for testing if things are broken
|
||||
# don't use these, just for testing if things are broken
|
||||
"broken":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
proxy: http://127.0.0.1:8999
|
||||
"broken_timeout":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||
proxy: http://127.0.0.1:9000
|
||||
@@ -149,14 +149,28 @@ func (pm *ProxyManager) swapModel(requestedModel string) error {
|
||||
}
|
||||
pm.currentCmd = cmd
|
||||
|
||||
if err := pm.checkHealthEndpoint(); err != nil {
|
||||
// watch for the command to exist
|
||||
cmdCtx, cancel := context.WithCancelCause(context.Background())
|
||||
|
||||
// monitor the command's exist status
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
if err != nil {
|
||||
cancel(fmt.Errorf("command [%s] %s", strings.Join(cmd.Args, " "), err.Error()))
|
||||
} else {
|
||||
cancel(nil)
|
||||
}
|
||||
}()
|
||||
|
||||
// wait for checkHealthEndpoint
|
||||
if err := pm.checkHealthEndpoint(cmdCtx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
func (pm *ProxyManager) checkHealthEndpoint(cmdCtx context.Context) error {
|
||||
|
||||
if pm.currentConfig.Proxy == "" {
|
||||
return fmt.Errorf("no upstream available to check /health")
|
||||
@@ -179,6 +193,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
|
||||
}
|
||||
|
||||
client := &http.Client{}
|
||||
startTime := time.Now()
|
||||
|
||||
@@ -187,33 +202,47 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 250*time.Millisecond)
|
||||
|
||||
ctx, cancel := context.WithTimeout(cmdCtx, 250*time.Millisecond)
|
||||
defer cancel()
|
||||
req = req.WithContext(ctx)
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "connection refused") {
|
||||
|
||||
// if TCP dial can't connect any HTTP response after 5 seconds
|
||||
// exit quickly.
|
||||
if time.Since(startTime) > 5*time.Second {
|
||||
return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
|
||||
}
|
||||
ttl := (maxDuration - time.Since(startTime)).Seconds()
|
||||
|
||||
if err != nil {
|
||||
// check if the context was cancelled
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return context.Cause(ctx)
|
||||
default:
|
||||
}
|
||||
|
||||
if time.Since(startTime) >= maxDuration {
|
||||
// wait a bit longer for TCP connection issues
|
||||
if strings.Contains(err.Error(), "connection refused") {
|
||||
fmt.Fprintf(pm.logMonitor, "Connection refused on %s, ttl %.0fs\n", healthURL, ttl)
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
} else {
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
|
||||
if ttl < 0 {
|
||||
return fmt.Errorf("failed to check health from: %s", healthURL)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return nil
|
||||
}
|
||||
if time.Since(startTime) >= maxDuration {
|
||||
|
||||
if ttl < 0 {
|
||||
return fmt.Errorf("failed to check health from: %s", healthURL)
|
||||
}
|
||||
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
@@ -236,7 +265,7 @@ func (pm *ProxyManager) proxyChatRequest(w http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
|
||||
if err := pm.swapModel(model); err != nil {
|
||||
http.Error(w, fmt.Sprintf("unable to swap to model: %s", err.Error()), http.StatusNotFound)
|
||||
http.Error(w, fmt.Sprintf("unable to swap to model, %s", err.Error()), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user