diff --git a/README.md b/README.md index 48eb17b..831a085 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,12 @@ Any OpenAI compatible server would work. llama-swap was originally designed for - `v1/rerank` - `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36)) - ✅ Multiple GPU support -- ✅ Docker Support ([#40](https://github.com/mostlygeek/llama-swap/pull/40)) +- ✅ Docker and Podman support - ✅ Run multiple models at once with `profiles` - ✅ Remote log monitoring at `/log` - ✅ Automatic unloading of models from GPUs after timeout - ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc) - ✅ Direct access to upstream HTTP server via `/upstream/:model_id` ([demo](https://github.com/mostlygeek/llama-swap/pull/31)) -- ## config.yaml @@ -91,14 +90,9 @@ models: cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 unlisted: true - # Docker Support (Experimental) - # see: https://github.com/mostlygeek/llama-swap/pull/40 - "dockertest": + # Docker Support (v26.1.4+ required!) + "docker-llama": proxy: "http://127.0.0.1:9790" - - # introduced to reliably stop containers - cmd_stop: docker stop -t 2 dockertest - cmd: > docker run --name dockertest --init --rm -p 9790:8080 -v /mnt/nvme/models:/models diff --git a/config.example.yaml b/config.example.yaml index 6a2a543..85ec5be 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -53,16 +53,9 @@ models: --ctx-size 8192 --reranking - # EXPERIMENTAL! Docker Support - # see: - # - https://github.com/mostlygeek/llama-swap/pull/40 - # - https://github.com/mostlygeek/llama-swap/issues/35 + # Docker Support (v26.1.4+ required!) "dockertest": proxy: "http://127.0.0.1:9790" - - # use this to reliably stop named containers - cmd_stop: docker stop -t 2 dockertest - cmd: > docker run --name dockertest --init --rm -p 9790:8080 -v /mnt/nvme/models:/models diff --git a/proxy/config.go b/proxy/config.go index 0ed7487..3206ae9 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -11,7 +11,6 @@ import ( type ModelConfig struct { Cmd string `yaml:"cmd"` - CmdStop string `yaml:"cmd_stop"` Proxy string `yaml:"proxy"` Aliases []string `yaml:"aliases"` Env []string `yaml:"env"` @@ -23,9 +22,6 @@ type ModelConfig struct { func (m *ModelConfig) SanitizedCommand() ([]string, error) { return SanitizeCommand(m.Cmd) } -func (m *ModelConfig) SanitizeCommandStop() ([]string, error) { - return SanitizeCommand(m.CmdStop) -} type Config struct { HealthCheckTimeout int `yaml:"healthCheckTimeout"` diff --git a/proxy/config_test.go b/proxy/config_test.go index 28495f1..da2eb39 100644 --- a/proxy/config_test.go +++ b/proxy/config_test.go @@ -35,11 +35,6 @@ models: aliases: - "m2" checkEndpoint: "/" - docker: - cmd: docker run -p 9999:8080 --name "my_container" - cmd_stop: docker stop my_container - proxy: "http://localhost:9999" - checkEndpoint: "/health" healthCheckTimeout: 15 profiles: test: @@ -61,7 +56,6 @@ profiles: Models: map[string]ModelConfig{ "model1": { Cmd: "path/to/cmd --arg1 one", - CmdStop: "", Proxy: "http://localhost:8080", Aliases: []string{"m1", "model-one"}, Env: []string{"VAR1=value1", "VAR2=value2"}, @@ -69,19 +63,11 @@ profiles: }, "model2": { Cmd: "path/to/cmd --arg1 one", - CmdStop: "", Proxy: "http://localhost:8081", Aliases: []string{"m2"}, Env: nil, CheckEndpoint: "/", }, - "docker": { - Cmd: `docker run -p 9999:8080 --name "my_container"`, - CmdStop: "docker stop my_container", - Proxy: "http://localhost:9999", - Env: nil, - CheckEndpoint: "/health", - }, }, HealthCheckTimeout: 15, Profiles: map[string][]string{ @@ -113,18 +99,6 @@ func TestConfig_ModelConfigSanitizedCommand(t *testing.T) { assert.Equal(t, []string{"python", "model1.py", "--arg1", "value1", "--arg2", "value2"}, args) } -func TestConfig_ModelConfigSanitizedCommandStop(t *testing.T) { - config := &ModelConfig{ - CmdStop: `docker stop my_container \ - --arg1 1 - --arg2 2`, - } - - args, err := config.SanitizeCommandStop() - assert.NoError(t, err) - assert.Equal(t, []string{"docker", "stop", "my_container", "--arg1", "1", "--arg2", "2"}, args) -} - func TestConfig_FindConfig(t *testing.T) { // TODO? diff --git a/proxy/process.go b/proxy/process.go index 1d185bb..7741175 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -153,7 +153,7 @@ func (p *Process) Stop() { defer p.stateMutex.Unlock() if p.state != StateReady { - fmt.Fprintf(p.logMonitor, "!!! Stop() called but Process State is not READY\n") + fmt.Fprintf(p.logMonitor, "!!! Info - Stop() called but Process State is not READY\n") return } @@ -164,58 +164,27 @@ func (p *Process) Stop() { return } - // Pretty sure this stopping code needs some work for windows and - // will be a source of pain in the future. + sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() - if p.config.CmdStop != "" { - // for issue #35 to do things like `docker stop` - args, err := p.config.SanitizeCommandStop() + sigtermNormal := make(chan error, 1) + go func() { + sigtermNormal <- p.cmd.Wait() + }() + + p.cmd.Process.Signal(syscall.SIGTERM) + + select { + case <-sigtermTimeout.Done(): + fmt.Fprintf(p.logMonitor, "XXX Process for %s timed out waiting to stop, sending SIGKILL to PID: %d\n", p.ID, p.cmd.Process.Pid) + p.cmd.Process.Kill() + p.cmd.Wait() + case err := <-sigtermNormal: if err != nil { - fmt.Fprintf(p.logMonitor, "!!! Error sanitizing stop command: %v\n", err) - - // leave the state as it is? - return - } - - fmt.Fprintf(p.logMonitor, "!!! Running stop command: %s\n", strings.Join(args, " ")) - cmd := exec.Command(args[0], args[1:]...) - cmd.Stdout = p.logMonitor - cmd.Stderr = p.logMonitor - err = cmd.Start() - if err != nil { - fmt.Fprintf(p.logMonitor, "!!! Error running stop command: %v\n", err) - - // leave the state as it is? - return - } - - err = cmd.Wait() - if err != nil { - fmt.Fprintf(p.logMonitor, "!!! WARNING error waiting for stop command to complete: %v\n", err) - } - } else { - sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - sigtermNormal := make(chan error, 1) - go func() { - sigtermNormal <- p.cmd.Wait() - }() - - p.cmd.Process.Signal(syscall.SIGTERM) - - select { - case <-sigtermTimeout.Done(): - fmt.Fprintf(p.logMonitor, "XXX Process for %s timed out waiting to stop, sending SIGKILL to PID: %d\n", p.ID, p.cmd.Process.Pid) - p.cmd.Process.Kill() - p.cmd.Wait() - case err := <-sigtermNormal: - if err != nil { - if err.Error() != "wait: no child processes" { - // possible that simple-responder for testing is just not - // existing right, so suppress those errors. - fmt.Fprintf(p.logMonitor, "!!! process for %s stopped with error > %v\n", p.ID, err) - } + if err.Error() != "wait: no child processes" { + // possible that simple-responder for testing is just not + // existing right, so suppress those errors. + fmt.Fprintf(p.logMonitor, "!!! process for %s stopped with error > %v\n", p.ID, err) } } }