From b83a5fa291ce92b73942b700b8311a8eebff3bc5 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Fri, 16 May 2025 19:54:44 -0700 Subject: [PATCH] make Failed stated recoverable (#137) A process in the failed state can transition to stopped either by calling /unload or swapping to another model. --- proxy/process.go | 21 +++++++++++++++------ proxy/proxymanager.go | 2 ++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/proxy/process.go b/proxy/process.go index 2870139..b6e4aba 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -149,7 +149,9 @@ func isValidTransition(from, to ProcessState) bool { return to == StateStopping case StateStopping: return to == StateStopped || to == StateShutdown - case StateFailed, StateShutdown: + case StateFailed: + return to == StateStopping + case StateShutdown: return false // No transitions allowed from these states } return false @@ -359,12 +361,19 @@ func (p *Process) StopImmediately() { return } - p.proxyLogger.Debugf("<%s> Stopping process", p.ID) + p.proxyLogger.Debugf("<%s> Stopping process, current state: %s", p.ID, p.CurrentState()) + currentState := p.CurrentState() - // calling Stop() when state is invalid is a no-op - if curState, err := p.swapState(StateReady, StateStopping); err != nil { - p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) - return + if currentState == StateFailed { + if curState, err := p.swapState(StateFailed, StateStopping); err != nil { + p.proxyLogger.Infof("<%s> Stop() Failed -> StateStopping err: %v, current state: %v", p.ID, err, curState) + return + } + } else { + if curState, err := p.swapState(StateReady, StateStopping); err != nil { + p.proxyLogger.Infof("<%s> Stop() Ready -> StateStopping err: %v, current state: %v", p.ID, err, curState) + return + } } // stop the process with a graceful exit timeout diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 0c1311c..2c5ac0e 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -352,6 +352,8 @@ func (pm *ProxyManager) upstreamIndex(c *gin.Context) { stateStr = "Failed" case StateShutdown: stateStr = "Shutdown" + case StateStopped: + stateStr = "Stopped" default: stateStr = "Unknown" }