From 02ee29d8813f7e8e30d796002943f2a4a4357fef Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 26 May 2025 09:57:53 -0700 Subject: [PATCH] increase default healthCheckTimeout to 120s --- README.md | 7 ++++--- proxy/config.go | 7 ++++++- proxy/process.go | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index be8d56e..f6b1f18 100644 --- a/README.md +++ b/README.md @@ -63,9 +63,10 @@ models: But also very powerful ... ```yaml -# Seconds to wait for llama.cpp to load and be ready to serve requests -# Default (and minimum) is 15 seconds -healthCheckTimeout: 60 +# Seconds to wait for upstream to load and be ready to serve requests +# minimum is 15 seconds +# default is 120 seconds +healthCheckTimeout: 500 # Valid log levels: debug, info (default), warn, error logLevel: info diff --git a/proxy/config.go b/proxy/config.go index df9f7d0..6ebdd7d 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -113,7 +113,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { return Config{}, err } - if config.HealthCheckTimeout < 15 { + if config.HealthCheckTimeout == 0 { + // this high default timeout helps avoid failing health checks + // for configurations that wait for docker or have slower startup + config.HealthCheckTimeout = 120 + } else if config.HealthCheckTimeout < 15 { + // set a minimum of 15 seconds config.HealthCheckTimeout = 15 } diff --git a/proxy/process.go b/proxy/process.go index 4dc4f0f..ace3311 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit), // stop timeout - gracefulStopTimeout: 5 * time.Second, + gracefulStopTimeout: 10 * time.Second, upstreamWasStoppedWithKill: false, } }