Add cmd_stop configuration to better support docker (#35)
Add `cmd_stop` to model configuration to run a command instead of sending a SIGTERM to shutdown a process before swapping.
This commit is contained in:
16
README.md
16
README.md
@@ -30,11 +30,13 @@ Any OpenAI compatible server would work. llama-swap was originally designed for
|
||||
- `v1/rerank`
|
||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||
- ✅ Multiple GPU support
|
||||
- ✅ Docker Support ([#40](https://github.com/mostlygeek/llama-swap/pull/40))
|
||||
- ✅ Run multiple models at once with `profiles`
|
||||
- ✅ Remote log monitoring at `/log`
|
||||
- ✅ Automatic unloading of models from GPUs after timeout
|
||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
||||
- ✅ Direct access to upstream HTTP server via `/upstream/:model_id` ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||
-
|
||||
|
||||
## config.yaml
|
||||
|
||||
@@ -89,6 +91,20 @@ models:
|
||||
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
||||
unlisted: true
|
||||
|
||||
# Docker Support (Experimental)
|
||||
# see: https://github.com/mostlygeek/llama-swap/pull/40
|
||||
"dockertest":
|
||||
proxy: "http://127.0.0.1:9790"
|
||||
|
||||
# introduced to reliably stop containers
|
||||
cmd_stop: docker stop -t 2 dockertest
|
||||
|
||||
cmd: >
|
||||
docker run --name dockertest
|
||||
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
||||
ghcr.io/ggerganov/llama.cpp:server
|
||||
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
||||
|
||||
# profiles make it easy to managing multi model (and gpu) configurations.
|
||||
#
|
||||
# Tips:
|
||||
|
||||
@@ -53,6 +53,21 @@ models:
|
||||
--ctx-size 8192
|
||||
--reranking
|
||||
|
||||
# EXPERIMENTAL! Docker Support
|
||||
# see:
|
||||
# - https://github.com/mostlygeek/llama-swap/pull/40
|
||||
# - https://github.com/mostlygeek/llama-swap/issues/35
|
||||
"dockertest":
|
||||
proxy: "http://127.0.0.1:9790"
|
||||
|
||||
# use this to reliably stop named containers
|
||||
cmd_stop: docker stop -t 2 dockertest
|
||||
|
||||
cmd: >
|
||||
docker run --name dockertest
|
||||
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
||||
ghcr.io/ggerganov/llama.cpp:server
|
||||
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
||||
|
||||
"simple":
|
||||
# example of setting environment variables
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/mostlygeek/llama-swap/proxy"
|
||||
@@ -39,6 +41,16 @@ func main() {
|
||||
}
|
||||
|
||||
proxyManager := proxy.New(config)
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
<-sigChan
|
||||
fmt.Println("Shutting down llama-swap")
|
||||
proxyManager.StopProcesses()
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
fmt.Println("llama-swap listening on " + *listenStr)
|
||||
if err := proxyManager.Run(*listenStr); err != nil {
|
||||
fmt.Printf("Server error: %v\n", err)
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
|
||||
type ModelConfig struct {
|
||||
Cmd string `yaml:"cmd"`
|
||||
CmdStop string `yaml:"cmd_stop"`
|
||||
Proxy string `yaml:"proxy"`
|
||||
Aliases []string `yaml:"aliases"`
|
||||
Env []string `yaml:"env"`
|
||||
@@ -22,6 +23,9 @@ type ModelConfig struct {
|
||||
func (m *ModelConfig) SanitizedCommand() ([]string, error) {
|
||||
return SanitizeCommand(m.Cmd)
|
||||
}
|
||||
func (m *ModelConfig) SanitizeCommandStop() ([]string, error) {
|
||||
return SanitizeCommand(m.CmdStop)
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
|
||||
|
||||
@@ -35,6 +35,11 @@ models:
|
||||
aliases:
|
||||
- "m2"
|
||||
checkEndpoint: "/"
|
||||
docker:
|
||||
cmd: docker run -p 9999:8080 --name "my_container"
|
||||
cmd_stop: docker stop my_container
|
||||
proxy: "http://localhost:9999"
|
||||
checkEndpoint: "/health"
|
||||
healthCheckTimeout: 15
|
||||
profiles:
|
||||
test:
|
||||
@@ -56,6 +61,7 @@ profiles:
|
||||
Models: map[string]ModelConfig{
|
||||
"model1": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
CmdStop: "",
|
||||
Proxy: "http://localhost:8080",
|
||||
Aliases: []string{"m1", "model-one"},
|
||||
Env: []string{"VAR1=value1", "VAR2=value2"},
|
||||
@@ -63,11 +69,19 @@ profiles:
|
||||
},
|
||||
"model2": {
|
||||
Cmd: "path/to/cmd --arg1 one",
|
||||
CmdStop: "",
|
||||
Proxy: "http://localhost:8081",
|
||||
Aliases: []string{"m2"},
|
||||
Env: nil,
|
||||
CheckEndpoint: "/",
|
||||
},
|
||||
"docker": {
|
||||
Cmd: `docker run -p 9999:8080 --name "my_container"`,
|
||||
CmdStop: "docker stop my_container",
|
||||
Proxy: "http://localhost:9999",
|
||||
Env: nil,
|
||||
CheckEndpoint: "/health",
|
||||
},
|
||||
},
|
||||
HealthCheckTimeout: 15,
|
||||
Profiles: map[string][]string{
|
||||
@@ -99,6 +113,18 @@ func TestConfig_ModelConfigSanitizedCommand(t *testing.T) {
|
||||
assert.Equal(t, []string{"python", "model1.py", "--arg1", "value1", "--arg2", "value2"}, args)
|
||||
}
|
||||
|
||||
func TestConfig_ModelConfigSanitizedCommandStop(t *testing.T) {
|
||||
config := &ModelConfig{
|
||||
CmdStop: `docker stop my_container \
|
||||
--arg1 1
|
||||
--arg2 2`,
|
||||
}
|
||||
|
||||
args, err := config.SanitizeCommandStop()
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []string{"docker", "stop", "my_container", "--arg1", "1", "--arg2", "2"}, args)
|
||||
}
|
||||
|
||||
func TestConfig_FindConfig(t *testing.T) {
|
||||
|
||||
// TODO?
|
||||
|
||||
@@ -153,12 +153,13 @@ func (p *Process) Stop() {
|
||||
defer p.stateMutex.Unlock()
|
||||
|
||||
if p.state != StateReady {
|
||||
fmt.Fprintf(p.logMonitor, "!!! Stop() called but Process State is not READY\n")
|
||||
return
|
||||
}
|
||||
|
||||
if p.cmd == nil || p.cmd.Process == nil {
|
||||
// this situation should never happen... but if it does just update the state
|
||||
fmt.Fprintf(p.logMonitor, "!!! State is Ready but Command is nil.")
|
||||
fmt.Fprintf(p.logMonitor, "!!! State is Ready but Command is nil.\n")
|
||||
p.state = StateStopped
|
||||
return
|
||||
}
|
||||
@@ -166,6 +167,33 @@ func (p *Process) Stop() {
|
||||
// Pretty sure this stopping code needs some work for windows and
|
||||
// will be a source of pain in the future.
|
||||
|
||||
if p.config.CmdStop != "" {
|
||||
// for issue #35 to do things like `docker stop`
|
||||
args, err := p.config.SanitizeCommandStop()
|
||||
if err != nil {
|
||||
fmt.Fprintf(p.logMonitor, "!!! Error sanitizing stop command: %v\n", err)
|
||||
|
||||
// leave the state as it is?
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(p.logMonitor, "!!! Running stop command: %s\n", strings.Join(args, " "))
|
||||
cmd := exec.Command(args[0], args[1:]...)
|
||||
cmd.Stdout = p.logMonitor
|
||||
cmd.Stderr = p.logMonitor
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
fmt.Fprintf(p.logMonitor, "!!! Error running stop command: %v\n", err)
|
||||
|
||||
// leave the state as it is?
|
||||
return
|
||||
}
|
||||
|
||||
err = cmd.Wait()
|
||||
if err != nil {
|
||||
fmt.Fprintf(p.logMonitor, "!!! WARNING error waiting for stop command to complete: %v\n", err)
|
||||
}
|
||||
} else {
|
||||
sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
@@ -190,6 +218,8 @@ func (p *Process) Stop() {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.state = StateStopped
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user