diff --git a/README.md b/README.md index be8d56e..f6b1f18 100644 --- a/README.md +++ b/README.md @@ -63,9 +63,10 @@ models: But also very powerful ... ```yaml -# Seconds to wait for llama.cpp to load and be ready to serve requests -# Default (and minimum) is 15 seconds -healthCheckTimeout: 60 +# Seconds to wait for upstream to load and be ready to serve requests +# minimum is 15 seconds +# default is 120 seconds +healthCheckTimeout: 500 # Valid log levels: debug, info (default), warn, error logLevel: info diff --git a/proxy/config.go b/proxy/config.go index df9f7d0..6ebdd7d 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -113,7 +113,12 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { return Config{}, err } - if config.HealthCheckTimeout < 15 { + if config.HealthCheckTimeout == 0 { + // this high default timeout helps avoid failing health checks + // for configurations that wait for docker or have slower startup + config.HealthCheckTimeout = 120 + } else if config.HealthCheckTimeout < 15 { + // set a minimum of 15 seconds config.HealthCheckTimeout = 15 } diff --git a/proxy/process.go b/proxy/process.go index 4dc4f0f..ace3311 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -100,7 +100,7 @@ func NewProcess(ID string, healthCheckTimeout int, config ModelConfig, processLo concurrencyLimitSemaphore: make(chan struct{}, concurrentLimit), // stop timeout - gracefulStopTimeout: 5 * time.Second, + gracefulStopTimeout: 10 * time.Second, upstreamWasStoppedWithKill: false, } }