revise health check logic to not error on 5 second timeout
This commit is contained in:
@@ -35,7 +35,10 @@ models:
|
||||
# until the upstream server is ready for traffic
|
||||
checkEndpoint: none
|
||||
|
||||
# don't use this, just for testing if things are broken
|
||||
# don't use these, just for testing if things are broken
|
||||
"broken":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
"broken_timeout":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||
proxy: http://127.0.0.1:9000
|
||||
@@ -187,26 +187,28 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 250*time.Millisecond)
|
||||
defer cancel()
|
||||
req = req.WithContext(ctx)
|
||||
resp, err := client.Do(req)
|
||||
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "connection refused") {
|
||||
|
||||
// if TCP dial can't connect any HTTP response after 5 seconds
|
||||
// exit quickly.
|
||||
if time.Since(startTime) > 5*time.Second {
|
||||
return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
|
||||
}
|
||||
}
|
||||
|
||||
if time.Since(startTime) >= maxDuration {
|
||||
return fmt.Errorf("failed to check health from: %s", healthURL)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
|
||||
// wait a bit longer for TCP connection issues
|
||||
if strings.Contains(err.Error(), "connection refused") {
|
||||
fmt.Fprintf(pm.logMonitor, "Connection refused on %s\n", healthURL)
|
||||
time.Sleep(5 * time.Second)
|
||||
} else {
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return nil
|
||||
@@ -214,6 +216,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
||||
if time.Since(startTime) >= maxDuration {
|
||||
return fmt.Errorf("failed to check health from: %s", healthURL)
|
||||
}
|
||||
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user