From 02ee29d8813f7e8e30d796002943f2a4a4357fef Mon Sep 17 00:00:00 2001
From: Benson Wong <mostlygeek@gmail.com>
Date: Mon, 26 May 2025 09:57:53 -0700
Subject: [PATCH] increase default healthCheckTimeout to 120s

---
 README.md        | 7 ++++---
 proxy/config.go  | 7 ++++++-
 proxy/process.go | 2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index be8d56e..f6b1f18 100644
--- a/README.md
+++ b/README.md
@@ -63,9 +63,10 @@ models:
 <summary>But also very powerful ...</summary>
 
 ```yaml
-# Seconds to wait for llama.cpp to load and be ready to serve requests
-# Default (and minimum) is 15 seconds
-healthCheckTimeout: 60
+# Seconds to wait for upstream to load and be ready to serve requests
+# minimum is 15 seconds
+# default is 120 seconds
+healthCheckTimeout: 500
 
 # Valid log levels: debug, info (default), warn, error
 logLevel: info
diff --git a/proxy/config.go b/proxy/config.go
index df9f7d0..6ebdd7d 100644
--- a/proxy/config.go
+++ b/proxy/config.go
@@ -113,7 +113,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 		return Config{}, err
 	}
 
-	if config.HealthCheckTimeout < 15 {
+	if config.HealthCheckTimeout == 0 {
+		// this high default timeout helps avoid failing health checks
+		// for configurations that wait for docker or have slower startup
+		config.HealthCheckTimeout = 120
+	} else if config.HealthCheckTimeout < 15 {
+		// set a minimum of 15 seconds
 		config.HealthCheckTimeout = 15
 	}
 
diff --git a/proxy/process.go b/proxy/process.go
index 4dc4f0f..ace3311 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo
 		concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit),
 
 		// stop timeout
-		gracefulStopTimeout:        5 * time.Second,
+		gracefulStopTimeout:        10 * time.Second,
 		upstreamWasStoppedWithKill: false,
 	}
 }