From 9a0c6bed40d2de5989c3a1d5dd95ee22f66c6743 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 16 Dec 2024 12:29:25 -0800 Subject: [PATCH] Improve stop exceptions (#28) (#29) Stop Process TTL goroutine when process is not ready (#28) - fix issue where the goroutine will continue even though the child process is no longer running and the Process' state is not Ready - fix issue where some logs were going to stdout instead of p.logMonitor causing them to not show up in the /logs - add units to unloading model message --- proxy/process.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/proxy/process.go b/proxy/process.go index c20d624..3490446 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -125,11 +125,15 @@ func (p *Process) start() error { maxDuration := time.Duration(p.config.UnloadAfter) * time.Second for range time.Tick(time.Second) { + if p.state != StateReady { + return + } + // wait for all inflight requests to complete and ticker p.inFlightRequests.Wait() if time.Since(p.lastRequestHandled) > maxDuration { - fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %d reached.\n", p.ID, p.config.UnloadAfter) + fmt.Fprintf(p.logMonitor, "!!! Unloading model %s, TTL of %ds reached.\n", p.ID, p.config.UnloadAfter) p.Stop() } } @@ -162,25 +166,25 @@ func (p *Process) Stop() { // will be a source of pain in the future. p.cmd.Process.Signal(syscall.SIGTERM) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - done := make(chan error, 1) + sigtermNormal := make(chan error, 1) go func() { - done <- p.cmd.Wait() + sigtermNormal <- p.cmd.Wait() }() select { - case <-ctx.Done(): - fmt.Printf("!!! process for %s timed out waiting to stop\n", p.ID) + case <-sigtermTimeout.Done(): + fmt.Fprintf(p.logMonitor, "!!! process for %s timed out waiting to stop\n", p.ID) p.cmd.Process.Kill() p.cmd.Wait() - case err := <-done: + case err := <-sigtermNormal: if err != nil { if err.Error() != "wait: no child processes" { // possible that simple-responder for testing is just not // existing right, so suppress those errors. - fmt.Printf("!!! process for %s stopped with error > %v\n", p.ID, err) + fmt.Fprintf(p.logMonitor, "!!! process for %s stopped with error > %v\n", p.ID, err) } } }