Add custom check endpoint

Replace previously hardcoded value for `/health` to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint.
2024-10-11 21:59:21 -07:00
parent 5a57688aa8
commit 8eb5b7b6c4
5 changed files with 40 additions and 11 deletions
--- a/README.md
+++ b/README.md
@@ -30,6 +30,13 @@ models:
    - "gpt-4o-mini"
    - "gpt-3.5-turbo"
    # wait for this path to return an HTTP 200 before serving requests
    # defaults to /health to match llama.cpp
    #
    # use "none" to skip endpoint checking. This may cause requests to fail
    # until the server is ready
    checkEndpoint: "/custom-endpoint"
  "qwen":
    # environment variables to pass to the command
    env:
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -10,6 +10,10 @@ models:
    # list of model name aliases this llama.cpp instance can serve
    aliases:
    - "gpt-4o-mini"
    # check this path for a HTTP 200 response for the server to be ready
    checkEndpoint: "/health"
  "qwen":
    cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
    proxy: "http://127.0.0.1:8999"
@@ -24,6 +28,10 @@ models:
    cmd: "build/simple-responder --port 8999"
    proxy: "http://127.0.0.1:8999"
    # use "none" to skip check. Caution this may cause some requests to fail
    # until the upstream server is ready for traffic
    checkEndpoint: "none"
  # don't use this, just for testing if things are broken
  "broken":
    cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
--- a/llama-swap.go
+++ b/llama-swap.go
@@ -25,7 +25,7 @@ func main() {
 	proxyManager := proxy.New(config)
 	http.HandleFunc("/", proxyManager.HandleFunc)
-	fmt.Println("llamagate listening on " + *listenStr)
+	fmt.Println("llama-swap listening on " + *listenStr)
 	if err := http.ListenAndServe(*listenStr, nil); err != nil {
 		fmt.Printf("Error starting server: %v\n", err)
 		os.Exit(1)
--- a/proxy/config.go
+++ b/proxy/config.go
@@ -11,6 +11,7 @@ type ModelConfig struct {
 	Proxy         string   `yaml:"proxy"`
 	Aliases       []string `yaml:"aliases"`
 	Env           []string `yaml:"env"`
 	CheckEndpoint string   `yaml:"checkEndpoint"`
 }
 type Config struct {
--- a/proxy/manager.go
+++ b/proxy/manager.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
 	"net/url"
 	"os"
 	"os/exec"
 	"strings"
@@ -89,11 +90,23 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
 		return fmt.Errorf("no upstream available to check /health")
 	}
 	checkEndpoint := strings.TrimSpace(pm.currentConfig.CheckEndpoint)
 	if checkEndpoint == "none" {
 		return nil
 	}
 	// keep default behaviour
 	if checkEndpoint == "" {
 		checkEndpoint = "/health"
 	}
 	proxyTo := pm.currentConfig.Proxy
 	maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout)
-
+	healthURL, err := url.JoinPath(proxyTo, checkEndpoint)
-	healthURL := proxyTo + "/health"
+	if err != nil {
 		return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
 	}
 	client := &http.Client{}
 	startTime := time.Now()
@@ -112,12 +125,12 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
 				// if TCP dial can't connect any HTTP response after 5 seconds
 				// exit quickly.
 				if time.Since(startTime) > 5*time.Second {
-					return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
+					return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
 				}
 			}
 			if time.Since(startTime) >= maxDuration {
-				return fmt.Errorf("failed to check /healthy from: %s", healthURL)
+				return fmt.Errorf("failed to check health from: %s", healthURL)
 			}
 			time.Sleep(time.Second)
 			continue
@@ -127,7 +140,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
 			return nil
 		}
 		if time.Since(startTime) >= maxDuration {
-			return fmt.Errorf("failed to check /healthy from: %s", healthURL)
+			return fmt.Errorf("failed to check health from: %s", healthURL)
 		}
 		time.Sleep(time.Second)
 	}