Merge pull request #8 from mostlygeek/improve-upstream-monitoring-issue5

Improvements to handling of the upstream process so errors happen whenever one of these is first: the health check timeout is reached waiting for the upstream process to be ready the upstream process exits unexpectedly With this change llama-swap is more compatible with use cases like containerized upstream services (#5) which pull the container before HTTP endpoints are ready.
2024-11-01 15:28:06 -07:00
parent 8cf2a389d8 34f9fd7340
commit f45469f7ff
2 changed files with 49 additions and 17 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,6 +1,6 @@
 # Seconds to wait for llama.cpp to be available to serve requests
 # Default (and minimum): 15 seconds
-healthCheckTimeout: 60
+healthCheckTimeout: 15
 models:
  "llama":
@@ -35,7 +35,10 @@ models:
    # until the upstream server is ready for traffic
    checkEndpoint: none
-  # don't use this, just for testing if things are broken
+  # don't use these, just for testing if things are broken
  "broken":
    cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
-    proxy: http://127.0.0.1:8999
+    proxy: http://127.0.0.1:8999
  "broken_timeout":
    cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
    proxy: http://127.0.0.1:9000
--- a/proxy/manager.go
+++ b/proxy/manager.go
@@ -149,14 +149,28 @@ func (pm *ProxyManager) swapModel(requestedModel string) error {
 	}
 	pm.currentCmd = cmd
-	if err := pm.checkHealthEndpoint(); err != nil {
+	// watch for the command to exist
 	cmdCtx, cancel := context.WithCancelCause(context.Background())
 	// monitor the command's exist status
 	go func() {
 		err := cmd.Wait()
 		if err != nil {
 			cancel(fmt.Errorf("command [%s] %s", strings.Join(cmd.Args, " "), err.Error()))
 		} else {
 			cancel(nil)
 		}
 	}()
 	// wait for checkHealthEndpoint
 	if err := pm.checkHealthEndpoint(cmdCtx); err != nil {
 		return err
 	}
 	return nil
 }
-func (pm *ProxyManager) checkHealthEndpoint() error {
+func (pm *ProxyManager) checkHealthEndpoint(cmdCtx context.Context) error {
 	if pm.currentConfig.Proxy == "" {
 		return fmt.Errorf("no upstream available to check /health")
@@ -179,6 +193,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
 	if err != nil {
 		return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
 	}
 	client := &http.Client{}
 	startTime := time.Now()
@@ -187,33 +202,47 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
 		if err != nil {
 			return err
 		}
-		ctx, cancel := context.WithTimeout(req.Context(), 250*time.Millisecond)
+
 		ctx, cancel := context.WithTimeout(cmdCtx, 250*time.Millisecond)
 		defer cancel()
 		req = req.WithContext(ctx)
 		resp, err := client.Do(req)
 		if err != nil {
 			if strings.Contains(err.Error(), "connection refused") {
-				// if TCP dial can't connect any HTTP response after 5 seconds
+		ttl := (maxDuration - time.Since(startTime)).Seconds()
-				// exit quickly.
+
-				if time.Since(startTime) > 5*time.Second {
+		if err != nil {
-					return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
+			// check if the context was cancelled
-				}
+			select {
 			case <-ctx.Done():
 				return context.Cause(ctx)
 			default:
 			}
-			if time.Since(startTime) >= maxDuration {
+			// wait a bit longer for TCP connection issues
 			if strings.Contains(err.Error(), "connection refused") {
 				fmt.Fprintf(pm.logMonitor, "Connection refused on %s, ttl %.0fs\n", healthURL, ttl)
 				time.Sleep(5 * time.Second)
 			} else {
 				time.Sleep(time.Second)
 			}
 			if ttl < 0 {
 				return fmt.Errorf("failed to check health from: %s", healthURL)
 			}
-			time.Sleep(time.Second)
+
 			continue
 		}
 		defer resp.Body.Close()
 		if resp.StatusCode == http.StatusOK {
 			return nil
 		}
-		if time.Since(startTime) >= maxDuration {
+
 		if ttl < 0 {
 			return fmt.Errorf("failed to check health from: %s", healthURL)
 		}
 		time.Sleep(time.Second)
 	}
 }
@@ -236,7 +265,7 @@ func (pm *ProxyManager) proxyChatRequest(w http.ResponseWriter, r *http.Request)
 	}
 	if err := pm.swapModel(model); err != nil {
-		http.Error(w, fmt.Sprintf("unable to swap to model: %s", err.Error()), http.StatusNotFound)
+		http.Error(w, fmt.Sprintf("unable to swap to model, %s", err.Error()), http.StatusNotFound)
 		return
 	}