delay TTL check until after all requests are complete (#25)

- fixes #25 where requests that last longer than the TTL will cause the process to be unloaded before the next request. - new behavior, TTL waits until all requests are complete before checking timeout
2024-12-09 19:08:03 -08:00
parent 97dae50dc4
commit 5fbd53c616
2 changed files with 26 additions and 12 deletions
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -122,16 +122,15 @@ func (p *Process) start() error {
 		// start a goroutine to check every second if
 		// the process should be stopped
 		go func() {
-			ticker := time.NewTicker(time.Second)
-			defer ticker.Stop()
 			maxDuration := time.Duration(p.config.UnloadAfter) * time.Second

-			for {
-				<-ticker.C
+			for range time.Tick(time.Second) {
+				// wait for all inflight requests to complete and ticker
+				p.inFlightRequests.Wait()
+
 				if time.Since(p.lastRequestHandled) > maxDuration {
 					fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter)
 					p.Stop()
-					return
 				}
 			}
 		}()
@@ -275,7 +274,11 @@ func (p *Process) checkHealthEndpoint(ctxFromStart context.Context) error {
 func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {

 	p.inFlightRequests.Add(1)
-	defer p.inFlightRequests.Done()
+
+	defer func() {
+		p.lastRequestHandled = time.Now()
+		p.inFlightRequests.Done()
+	}()

 	if p.CurrentState() != StateReady {
 		if err := p.start(); err != nil {
@@ -285,8 +288,6 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
 		}
 	}

-	p.lastRequestHandled = time.Now()
-
 	proxyTo := p.config.Proxy
 	client := &http.Client{}
 	req, err := http.NewRequest(r.Method, proxyTo+r.URL.String(), r.Body)