* remove cmd_stop functionality from #40
This commit is contained in:
12
README.md
12
README.md
@@ -30,13 +30,12 @@ Any OpenAI compatible server would work. llama-swap was originally designed for
|
|||||||
- `v1/rerank`
|
- `v1/rerank`
|
||||||
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
- `v1/audio/speech` ([#36](https://github.com/mostlygeek/llama-swap/issues/36))
|
||||||
- ✅ Multiple GPU support
|
- ✅ Multiple GPU support
|
||||||
- ✅ Docker Support ([#40](https://github.com/mostlygeek/llama-swap/pull/40))
|
- ✅ Docker and Podman support
|
||||||
- ✅ Run multiple models at once with `profiles`
|
- ✅ Run multiple models at once with `profiles`
|
||||||
- ✅ Remote log monitoring at `/log`
|
- ✅ Remote log monitoring at `/log`
|
||||||
- ✅ Automatic unloading of models from GPUs after timeout
|
- ✅ Automatic unloading of models from GPUs after timeout
|
||||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc)
|
||||||
- ✅ Direct access to upstream HTTP server via `/upstream/:model_id` ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
- ✅ Direct access to upstream HTTP server via `/upstream/:model_id` ([demo](https://github.com/mostlygeek/llama-swap/pull/31))
|
||||||
-
|
|
||||||
|
|
||||||
## config.yaml
|
## config.yaml
|
||||||
|
|
||||||
@@ -91,14 +90,9 @@ models:
|
|||||||
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
||||||
unlisted: true
|
unlisted: true
|
||||||
|
|
||||||
# Docker Support (Experimental)
|
# Docker Support (v26.1.4+ required!)
|
||||||
# see: https://github.com/mostlygeek/llama-swap/pull/40
|
"docker-llama":
|
||||||
"dockertest":
|
|
||||||
proxy: "http://127.0.0.1:9790"
|
proxy: "http://127.0.0.1:9790"
|
||||||
|
|
||||||
# introduced to reliably stop containers
|
|
||||||
cmd_stop: docker stop -t 2 dockertest
|
|
||||||
|
|
||||||
cmd: >
|
cmd: >
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
||||||
|
|||||||
@@ -53,16 +53,9 @@ models:
|
|||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
--reranking
|
--reranking
|
||||||
|
|
||||||
# EXPERIMENTAL! Docker Support
|
# Docker Support (v26.1.4+ required!)
|
||||||
# see:
|
|
||||||
# - https://github.com/mostlygeek/llama-swap/pull/40
|
|
||||||
# - https://github.com/mostlygeek/llama-swap/issues/35
|
|
||||||
"dockertest":
|
"dockertest":
|
||||||
proxy: "http://127.0.0.1:9790"
|
proxy: "http://127.0.0.1:9790"
|
||||||
|
|
||||||
# use this to reliably stop named containers
|
|
||||||
cmd_stop: docker stop -t 2 dockertest
|
|
||||||
|
|
||||||
cmd: >
|
cmd: >
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
type ModelConfig struct {
|
type ModelConfig struct {
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
CmdStop string `yaml:"cmd_stop"`
|
|
||||||
Proxy string `yaml:"proxy"`
|
Proxy string `yaml:"proxy"`
|
||||||
Aliases []string `yaml:"aliases"`
|
Aliases []string `yaml:"aliases"`
|
||||||
Env []string `yaml:"env"`
|
Env []string `yaml:"env"`
|
||||||
@@ -23,9 +22,6 @@ type ModelConfig struct {
|
|||||||
func (m *ModelConfig) SanitizedCommand() ([]string, error) {
|
func (m *ModelConfig) SanitizedCommand() ([]string, error) {
|
||||||
return SanitizeCommand(m.Cmd)
|
return SanitizeCommand(m.Cmd)
|
||||||
}
|
}
|
||||||
func (m *ModelConfig) SanitizeCommandStop() ([]string, error) {
|
|
||||||
return SanitizeCommand(m.CmdStop)
|
|
||||||
}
|
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
|
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
|
||||||
|
|||||||
@@ -35,11 +35,6 @@ models:
|
|||||||
aliases:
|
aliases:
|
||||||
- "m2"
|
- "m2"
|
||||||
checkEndpoint: "/"
|
checkEndpoint: "/"
|
||||||
docker:
|
|
||||||
cmd: docker run -p 9999:8080 --name "my_container"
|
|
||||||
cmd_stop: docker stop my_container
|
|
||||||
proxy: "http://localhost:9999"
|
|
||||||
checkEndpoint: "/health"
|
|
||||||
healthCheckTimeout: 15
|
healthCheckTimeout: 15
|
||||||
profiles:
|
profiles:
|
||||||
test:
|
test:
|
||||||
@@ -61,7 +56,6 @@ profiles:
|
|||||||
Models: map[string]ModelConfig{
|
Models: map[string]ModelConfig{
|
||||||
"model1": {
|
"model1": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
CmdStop: "",
|
|
||||||
Proxy: "http://localhost:8080",
|
Proxy: "http://localhost:8080",
|
||||||
Aliases: []string{"m1", "model-one"},
|
Aliases: []string{"m1", "model-one"},
|
||||||
Env: []string{"VAR1=value1", "VAR2=value2"},
|
Env: []string{"VAR1=value1", "VAR2=value2"},
|
||||||
@@ -69,19 +63,11 @@ profiles:
|
|||||||
},
|
},
|
||||||
"model2": {
|
"model2": {
|
||||||
Cmd: "path/to/cmd --arg1 one",
|
Cmd: "path/to/cmd --arg1 one",
|
||||||
CmdStop: "",
|
|
||||||
Proxy: "http://localhost:8081",
|
Proxy: "http://localhost:8081",
|
||||||
Aliases: []string{"m2"},
|
Aliases: []string{"m2"},
|
||||||
Env: nil,
|
Env: nil,
|
||||||
CheckEndpoint: "/",
|
CheckEndpoint: "/",
|
||||||
},
|
},
|
||||||
"docker": {
|
|
||||||
Cmd: `docker run -p 9999:8080 --name "my_container"`,
|
|
||||||
CmdStop: "docker stop my_container",
|
|
||||||
Proxy: "http://localhost:9999",
|
|
||||||
Env: nil,
|
|
||||||
CheckEndpoint: "/health",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
HealthCheckTimeout: 15,
|
HealthCheckTimeout: 15,
|
||||||
Profiles: map[string][]string{
|
Profiles: map[string][]string{
|
||||||
@@ -113,18 +99,6 @@ func TestConfig_ModelConfigSanitizedCommand(t *testing.T) {
|
|||||||
assert.Equal(t, []string{"python", "model1.py", "--arg1", "value1", "--arg2", "value2"}, args)
|
assert.Equal(t, []string{"python", "model1.py", "--arg1", "value1", "--arg2", "value2"}, args)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestConfig_ModelConfigSanitizedCommandStop(t *testing.T) {
|
|
||||||
config := &ModelConfig{
|
|
||||||
CmdStop: `docker stop my_container \
|
|
||||||
--arg1 1
|
|
||||||
--arg2 2`,
|
|
||||||
}
|
|
||||||
|
|
||||||
args, err := config.SanitizeCommandStop()
|
|
||||||
assert.NoError(t, err)
|
|
||||||
assert.Equal(t, []string{"docker", "stop", "my_container", "--arg1", "1", "--arg2", "2"}, args)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestConfig_FindConfig(t *testing.T) {
|
func TestConfig_FindConfig(t *testing.T) {
|
||||||
|
|
||||||
// TODO?
|
// TODO?
|
||||||
|
|||||||
@@ -153,7 +153,7 @@ func (p *Process) Stop() {
|
|||||||
defer p.stateMutex.Unlock()
|
defer p.stateMutex.Unlock()
|
||||||
|
|
||||||
if p.state != StateReady {
|
if p.state != StateReady {
|
||||||
fmt.Fprintf(p.logMonitor, "!!! Stop() called but Process State is not READY\n")
|
fmt.Fprintf(p.logMonitor, "!!! Info - Stop() called but Process State is not READY\n")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,58 +164,27 @@ func (p *Process) Stop() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pretty sure this stopping code needs some work for windows and
|
sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
// will be a source of pain in the future.
|
defer cancel()
|
||||||
|
|
||||||
if p.config.CmdStop != "" {
|
sigtermNormal := make(chan error, 1)
|
||||||
// for issue #35 to do things like `docker stop`
|
go func() {
|
||||||
args, err := p.config.SanitizeCommandStop()
|
sigtermNormal <- p.cmd.Wait()
|
||||||
|
}()
|
||||||
|
|
||||||
|
p.cmd.Process.Signal(syscall.SIGTERM)
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-sigtermTimeout.Done():
|
||||||
|
fmt.Fprintf(p.logMonitor, "XXX Process for %s timed out waiting to stop, sending SIGKILL to PID: %d\n", p.ID, p.cmd.Process.Pid)
|
||||||
|
p.cmd.Process.Kill()
|
||||||
|
p.cmd.Wait()
|
||||||
|
case err := <-sigtermNormal:
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(p.logMonitor, "!!! Error sanitizing stop command: %v\n", err)
|
if err.Error() != "wait: no child processes" {
|
||||||
|
// possible that simple-responder for testing is just not
|
||||||
// leave the state as it is?
|
// existing right, so suppress those errors.
|
||||||
return
|
fmt.Fprintf(p.logMonitor, "!!! process for %s stopped with error > %v\n", p.ID, err)
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintf(p.logMonitor, "!!! Running stop command: %s\n", strings.Join(args, " "))
|
|
||||||
cmd := exec.Command(args[0], args[1:]...)
|
|
||||||
cmd.Stdout = p.logMonitor
|
|
||||||
cmd.Stderr = p.logMonitor
|
|
||||||
err = cmd.Start()
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(p.logMonitor, "!!! Error running stop command: %v\n", err)
|
|
||||||
|
|
||||||
// leave the state as it is?
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
err = cmd.Wait()
|
|
||||||
if err != nil {
|
|
||||||
fmt.Fprintf(p.logMonitor, "!!! WARNING error waiting for stop command to complete: %v\n", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sigtermTimeout, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
sigtermNormal := make(chan error, 1)
|
|
||||||
go func() {
|
|
||||||
sigtermNormal <- p.cmd.Wait()
|
|
||||||
}()
|
|
||||||
|
|
||||||
p.cmd.Process.Signal(syscall.SIGTERM)
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-sigtermTimeout.Done():
|
|
||||||
fmt.Fprintf(p.logMonitor, "XXX Process for %s timed out waiting to stop, sending SIGKILL to PID: %d\n", p.ID, p.cmd.Process.Pid)
|
|
||||||
p.cmd.Process.Kill()
|
|
||||||
p.cmd.Wait()
|
|
||||||
case err := <-sigtermNormal:
|
|
||||||
if err != nil {
|
|
||||||
if err.Error() != "wait: no child processes" {
|
|
||||||
// possible that simple-responder for testing is just not
|
|
||||||
// existing right, so suppress those errors.
|
|
||||||
fmt.Fprintf(p.logMonitor, "!!! process for %s stopped with error > %v\n", p.ID, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user