Add cmd_stop configuration to better support docker (#35)

Add `cmd_stop` to model configuration to run a command instead of sending a SIGTERM to shutdown a process before swapping.
2025-01-30 16:59:57 -08:00
parent 2833517eef
commit baeb0c4e7f
6 changed files with 125 additions and 22 deletions
--- a/README.md
+++ b/README.md
@@ -30,11 +30,13 @@ Any OpenAI compatible server would work. llama-swap was originally designed for
  - `v1/rerank`
  - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
 - ✅ Multiple GPU support
+- ✅ Docker Support ([#40](https://github.com/mostlygeek/llama-swap/pull/40))
 - ✅ Run multiple models at once with `profiles`
 - ✅ Remote log monitoring at `/log`
 - ✅ Automatic unloading of models from GPUs after timeout
 - ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
 - ✅ Direct access to upstream HTTP server via `/upstream/:model_id` ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
+-

 ## config.yaml

@@ -89,6 +91,20 @@ models:
    cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
    unlisted: true

+  # Docker Support (Experimental)
+  # see: https://github.com/mostlygeek/llama-swap/pull/40
+  "dockertest":
+    proxy: "http://127.0.0.1:9790"
+
+    # introduced to reliably stop containers
+    cmd_stop: docker stop -t 2 dockertest
+
+    cmd: >
+      docker run --name dockertest
+      --init --rm -p 9790:8080 -v /mnt/nvme/models:/models
+      ghcr.io/ggerganov/llama.cpp:server
+      --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
+
 # profiles make it easy to managing multi model (and gpu) configurations.
 #
 # Tips:
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -53,6 +53,21 @@ models:
      --ctx-size 8192
      --reranking

+  # EXPERIMENTAL! Docker Support
+  # see:
+  #  - https://github.com/mostlygeek/llama-swap/pull/40
+  #  - https://github.com/mostlygeek/llama-swap/issues/35
+  "dockertest":
+    proxy: "http://127.0.0.1:9790"
+
+    # use this to reliably stop named containers
+    cmd_stop: docker stop -t 2 dockertest
+
+    cmd: >
+      docker run --name dockertest
+      --init --rm -p 9790:8080 -v /mnt/nvme/models:/models
+      ghcr.io/ggerganov/llama.cpp:server
+      --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'

  "simple":
    # example of setting environment variables
--- a/llama-swap.go
+++ b/llama-swap.go
@@ -4,6 +4,8 @@ import (
 	"flag"
 	"fmt"
 	"os"
+	"os/signal"
+	"syscall"

 	"github.com/gin-gonic/gin"
 	"github.com/mostlygeek/llama-swap/proxy"
@@ -39,6 +41,16 @@ func main() {
 	}

 	proxyManager := proxy.New(config)
+
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+	go func() {
+		<-sigChan
+		fmt.Println("Shutting down llama-swap")
+		proxyManager.StopProcesses()
+		os.Exit(0)
+	}()
+
 	fmt.Println("llama-swap listening on " + *listenStr)
 	if err := proxyManager.Run(*listenStr); err != nil {
 		fmt.Printf("Server error: %v\n", err)
--- a/proxy/config.go
+++ b/proxy/config.go
@@ -11,6 +11,7 @@ import (

 type ModelConfig struct {
 	Cmd           string   `yaml:"cmd"`
+	CmdStop       string   `yaml:"cmd_stop"`
 	Proxy         string   `yaml:"proxy"`
 	Aliases       []string `yaml:"aliases"`
 	Env           []string `yaml:"env"`
@@ -22,6 +23,9 @@ type ModelConfig struct {
 func (m *ModelConfig) SanitizedCommand() ([]string, error) {
 	return SanitizeCommand(m.Cmd)
 }
+func (m *ModelConfig) SanitizeCommandStop() ([]string, error) {
+	return SanitizeCommand(m.CmdStop)
+}

 type Config struct {
 	HealthCheckTimeout int                    `yaml:"healthCheckTimeout"`
--- a/proxy/config_test.go
+++ b/proxy/config_test.go
@@ -35,6 +35,11 @@ models:
    aliases:
      - "m2"
    checkEndpoint: "/"
+  docker:
+    cmd: docker run -p 9999:8080 --name "my_container"
+    cmd_stop: docker stop my_container
+    proxy: "http://localhost:9999"
+    checkEndpoint: "/health"
 healthCheckTimeout: 15
 profiles:
  test:
@@ -56,6 +61,7 @@ profiles:
 		Models: map[string]ModelConfig{
 			"model1": {
 				Cmd:           "path/to/cmd --arg1 one",
+				CmdStop:       "",
 				Proxy:         "http://localhost:8080",
 				Aliases:       []string{"m1", "model-one"},
 				Env:           []string{"VAR1=value1", "VAR2=value2"},
@@ -63,11 +69,19 @@ profiles:
 			},
 			"model2": {
 				Cmd:           "path/to/cmd --arg1 one",
+				CmdStop:       "",
 				Proxy:         "http://localhost:8081",
 				Aliases:       []string{"m2"},
 				Env:           nil,
 				CheckEndpoint: "/",
 			},
+			"docker": {
+				Cmd:           `docker run -p 9999:8080 --name "my_container"`,
+				CmdStop:       "docker stop my_container",
+				Proxy:         "http://localhost:9999",
+				Env:           nil,
+				CheckEndpoint: "/health",
+			},
 		},
 		HealthCheckTimeout: 15,
 		Profiles: map[string][]string{
@@ -99,6 +113,18 @@ func TestConfig_ModelConfigSanitizedCommand(t *testing.T) {
 	assert.Equal(t, []string{"python", "model1.py", "--arg1", "value1", "--arg2", "value2"}, args)
 }

+func TestConfig_ModelConfigSanitizedCommandStop(t *testing.T) {
+	config := &ModelConfig{
+		CmdStop: `docker stop my_container \
+		--arg1 1
+		--arg2 2`,
+	}
+
+	args, err := config.SanitizeCommandStop()
+	assert.NoError(t, err)
+	assert.Equal(t, []string{"docker", "stop", "my_container", "--arg1", "1", "--arg2", "2"}, args)
+}
+
 func TestConfig_FindConfig(t *testing.T) {

 	// TODO?
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -153,12 +153,13 @@ func (p *Process) Stop() {
 	defer p.stateMutex.Unlock()

 	if p.state != StateReady {
+		fmt.Fprintf(p.logMonitor, "!!! Stop() called but Process State is not READY\n")
 		return
 	}

 	if p.cmd == nil || p.cmd.Process == nil {
 		// this situation should never happen... but if it does just update the state
-		fmt.Fprintf(p.logMonitor, "!!! State is Ready but Command is nil.")
+		fmt.Fprintf(p.logMonitor, "!!! State is Ready but Command is nil.\n")
 		p.state = StateStopped
 		return
 	}
@@ -166,6 +167,33 @@ func (p *Process) Stop() {
 	// Pretty sure this stopping code needs some work for windows and
 	// will be a source of pain in the future.

+	if p.config.CmdStop != "" {
+		// for issue #35 to do things like `docker stop`
+		args, err := p.config.SanitizeCommandStop()
+		if err != nil {
+			fmt.Fprintf(p.logMonitor, "!!! Error sanitizing stop command: %v\n", err)
+
+			// leave the state as it is?
+			return
+		}
+
+		fmt.Fprintf(p.logMonitor, "!!! Running stop command: %s\n", strings.Join(args, " "))
+		cmd := exec.Command(args[0], args[1:]...)
+		cmd.Stdout = p.logMonitor
+		cmd.Stderr = p.logMonitor
+		err = cmd.Start()
+		if err != nil {
+			fmt.Fprintf(p.logMonitor, "!!! Error running stop command: %v\n", err)
+
+			// leave the state as it is?
+			return
+		}
+
+		err = cmd.Wait()
+		if err != nil {
+			fmt.Fprintf(p.logMonitor, "!!! WARNING error waiting for stop command to complete: %v\n", err)
+		}
+	} else {
 		sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		defer cancel()

@@ -190,6 +218,8 @@ func (p *Process) Stop() {
 				}
 			}
 		}
+	}
+
 	p.state = StateStopped
 }