increase default healthCheckTimeout to 120s

2025-05-26 09:57:53 -07:00
parent b2a891f8f4
commit 02ee29d881
3 changed files with 11 additions and 5 deletions
--- a/README.md
+++ b/README.md
@@ -63,9 +63,10 @@ models:
 <summary>But also very powerful ...</summary>

 ```yaml
-# Seconds to wait for llama.cpp to load and be ready to serve requests
-# Default (and minimum) is 15 seconds
-healthCheckTimeout: 60
+# Seconds to wait for upstream to load and be ready to serve requests
+# minimum is 15 seconds
+# default is 120 seconds
+healthCheckTimeout: 500

 # Valid log levels: debug, info (default), warn, error
 logLevel: info
--- a/proxy/config.go
+++ b/proxy/config.go
@@ -113,7 +113,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		return Config{}, err
 	}

-	if config.HealthCheckTimeout < 15 {
+	if config.HealthCheckTimeout == 0 {
+		// this high default timeout helps avoid failing health checks
+		// for configurations that wait for docker or have slower startup
+		config.HealthCheckTimeout = 120
+	} else if config.HealthCheckTimeout < 15 {
+		// set a minimum of 15 seconds
 		config.HealthCheckTimeout = 15
 	}

--- a/proxy/process.go
+++ b/proxy/process.go
@@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo
 		concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit),

 		// stop timeout
-		gracefulStopTimeout:        5 * time.Second,
+		gracefulStopTimeout:        10 * time.Second,
 		upstreamWasStoppedWithKill: false,
 	}
 }