add support for automatically unloading a model (#10) (#14)

* Make starting upstream process on-demand (#10) * Add automatic unload of model after TTL is reached * add `ttl` configuration parameter to models in seconds, default is 0 (never unload)
2024-11-19 16:32:51 -08:00
parent ba39ed4c18
commit 533162ce6a
8 changed files with 149 additions and 54 deletions
--- a/proxy/proxymanager.go
+++ b/proxy/proxymanager.go
@@ -99,8 +99,8 @@ func (pm *ProxyManager) swapModel(requestedModel string) error {
 		}
 	}

-	pm.currentProcess = NewProcess(modelID, modelConfig, pm.logMonitor)
-	return pm.currentProcess.Start(pm.config.HealthCheckTimeout)
+	pm.currentProcess = NewProcess(modelID, pm.config.HealthCheckTimeout, modelConfig, pm.logMonitor)
+	return nil
 }

 func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {