Change /unload to not wait for inflight requests (#125)

Sometimes upstreams can accept HTTP but never respond causing requests
to build up waiting for a response. This can block Process.Stop() as
that waits for inflight requests to finish. This change refactors the
code to not wait when attempting to shutdown the process.
This commit is contained in:
Benson Wong
2025-05-13 11:39:19 -07:00
committed by GitHub
parent 9dc4bcb46c
commit 519c3a4d22
7 changed files with 66 additions and 24 deletions

View File

@@ -208,7 +208,7 @@ func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// This is the public method safe for concurrent calls.
// Unlike Shutdown, this method only stops the processes but doesn't perform
// a complete shutdown, allowing for process replacement without full termination.
func (pm *ProxyManager) StopProcesses() {
func (pm *ProxyManager) StopProcesses(strategy StopStrategy) {
pm.Lock()
defer pm.Unlock()
@@ -218,7 +218,7 @@ func (pm *ProxyManager) StopProcesses() {
wg.Add(1)
go func(processGroup *ProcessGroup) {
defer wg.Done()
processGroup.stopProcesses()
processGroup.StopProcesses(strategy)
}(processGroup)
}
@@ -260,7 +260,7 @@ func (pm *ProxyManager) swapProcessGroup(requestedModel string) (*ProcessGroup,
pm.proxyLogger.Debugf("Exclusive mode for group %s, stopping other process groups", processGroup.id)
for groupId, otherGroup := range pm.processGroups {
if groupId != processGroup.id && !otherGroup.persistent {
otherGroup.StopProcesses()
otherGroup.StopProcesses(StopWaitForInflightRequest)
}
}
}
@@ -504,7 +504,7 @@ func (pm *ProxyManager) sendErrorResponse(c *gin.Context, statusCode int, messag
}
func (pm *ProxyManager) unloadAllModelsHandler(c *gin.Context) {
pm.StopProcesses()
pm.StopProcesses(StopImmediately)
c.String(http.StatusOK, "OK")
}