diff --git a/README.md b/README.md index 28acd8a..f29cc4d 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,14 @@ healthCheckTimeout: 60 # Valid log levels: debug, info (default), warn, error logLevel: info +# Automatic Port Values +# use ${PORT} in model.cmd and model.proxy to use an automatic port number +# when you use ${PORT} you can omit a custom model.proxy value, as it will +# default to http://localhost:${PORT} + +# override the default port (5800) for automatic port values +startPort: 10001 + # define valid model values and the upstream server start models: "llama": @@ -83,6 +91,7 @@ models: - "CUDA_VISIBLE_DEVICES=0" # where to reach the server started by cmd, make sure the ports match + # can be omitted if you use an automatic ${PORT} in cmd proxy: http://127.0.0.1:8999 # aliases names to use this model for @@ -109,14 +118,14 @@ models: # but they can still be requested as normal "qwen-unlisted": unlisted: true - cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 + cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 # Docker Support (v26.1.4+ required!) "docker-llama": - proxy: "http://127.0.0.1:9790" + proxy: "http://127.0.0.1:${PORT}" cmd: > docker run --name dockertest - --init --rm -p 9790:8080 -v /mnt/nvme/models:/models + --init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models ghcr.io/ggerganov/llama.cpp:server --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf' diff --git a/proxy/config.go b/proxy/config.go index 182dc84..e82ffda 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -5,6 +5,7 @@ import ( "io" "os" "sort" + "strconv" "strings" "github.com/google/shlex" @@ -63,6 +64,9 @@ type Config struct { // map aliases to actual model IDs aliases map[string]string + + // automatic port assignments + StartPort int `yaml:"startPort"` } func (c *Config) RealModelName(search string) (string, bool) { @@ -108,6 +112,14 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { config.HealthCheckTimeout = 15 } + // set default port ranges + if config.StartPort == 0 { + // default to 5800 + config.StartPort = 5800 + } else if config.StartPort < 1 { + return Config{}, fmt.Errorf("startPort must be greater than 1") + } + // Populate the aliases map config.aliases = make(map[string]string) for modelName, modelConfig := range config.Models { @@ -119,6 +131,31 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { } } + // iterate over the models and replace any ${PORT} with the next available port + // Get and sort all model IDs first, makes testing more consistent + modelIds := make([]string, 0, len(config.Models)) + for modelId := range config.Models { + modelIds = append(modelIds, modelId) + } + sort.Strings(modelIds) // This guarantees stable iteration order + + // iterate over the sorted models + nextPort := config.StartPort + for _, modelId := range modelIds { + modelConfig := config.Models[modelId] + if strings.Contains(modelConfig.Cmd, "${PORT}") { + modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${PORT}", strconv.Itoa(nextPort)) + if modelConfig.Proxy == "" { + modelConfig.Proxy = fmt.Sprintf("http://localhost:%d", nextPort) + } else { + modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, "${PORT}", strconv.Itoa(nextPort)) + } + nextPort++ + config.Models[modelId] = modelConfig + } else if modelConfig.Proxy == "" { + return Config{}, fmt.Errorf("model %s requires a proxy value when not using automatic ${PORT}", modelId) + } + } config = AddDefaultGroupToConfig(config) // check that members are all unique in the groups memberUsage := make(map[string]string) // maps member to group it appears in diff --git a/proxy/config_test.go b/proxy/config_test.go index bb0ce5a..6cff42d 100644 --- a/proxy/config_test.go +++ b/proxy/config_test.go @@ -44,6 +44,7 @@ models: checkEndpoint: "/" model4: cmd: path/to/cmd --arg1 one + proxy: "http://localhost:8082" checkEndpoint: "/" healthCheckTimeout: 15 @@ -74,6 +75,7 @@ groups: } expected := Config{ + StartPort: 5800, Models: map[string]ModelConfig{ "model1": { Cmd: "path/to/cmd --arg1 one", @@ -98,6 +100,7 @@ groups: }, "model4": { Cmd: "path/to/cmd --arg1 one", + Proxy: "http://localhost:8082", CheckEndpoint: "/", }, }, @@ -166,8 +169,9 @@ groups: ` // Load the config and verify _, err := LoadConfigFromReader(strings.NewReader(content)) - assert.Equal(t, "model member model2 is used in multiple groups: group1 and group2", err.Error()) + // a Contains as order of the map is not guaranteed + assert.Contains(t, err.Error(), "model member model2 is used in multiple groups:") } func TestConfig_ModelAliasesAreUnique(t *testing.T) { @@ -186,10 +190,12 @@ models: - m1 - m2 ` - // Load the config and verify _, err := LoadConfigFromReader(strings.NewReader(content)) - assert.Equal(t, "duplicate alias m1 found in model: model2", err.Error()) + + // this is a contains because it could be `model1` or `model2` depending on the order + // go decided on the order of the map + assert.Contains(t, err.Error(), "duplicate alias m1 found in model: model") } func TestConfig_ModelConfigSanitizedCommand(t *testing.T) { @@ -279,3 +285,77 @@ func TestConfig_SanitizeCommand(t *testing.T) { assert.Error(t, err) assert.Nil(t, args) } + +func TestConfig_AutomaticPortAssignments(t *testing.T) { + + t.Run("Default Port Ranges", func(t *testing.T) { + content := `` + config, err := LoadConfigFromReader(strings.NewReader(content)) + if !assert.NoError(t, err) { + t.Fatalf("Failed to load config: %v", err) + } + + assert.Equal(t, 5800, config.StartPort) + }) + t.Run("User specific port ranges", func(t *testing.T) { + content := `startPort: 1000` + config, err := LoadConfigFromReader(strings.NewReader(content)) + if !assert.NoError(t, err) { + t.Fatalf("Failed to load config: %v", err) + } + + assert.Equal(t, 1000, config.StartPort) + }) + + t.Run("Invalid start port", func(t *testing.T) { + content := `startPort: abcd` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NotNil(t, err) + }) + + t.Run("start port must be greater than 1", func(t *testing.T) { + content := `startPort: -99` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NotNil(t, err) + }) + + t.Run("Automatic port assignments", func(t *testing.T) { + content := ` +startPort: 5800 +models: + model1: + cmd: svr --port ${PORT} + model2: + cmd: svr --port ${PORT} + proxy: "http://172.11.22.33:${PORT}" + model3: + cmd: svr --port 1999 + proxy: "http://1.2.3.4:1999" +` + config, err := LoadConfigFromReader(strings.NewReader(content)) + if !assert.NoError(t, err) { + t.Fatalf("Failed to load config: %v", err) + } + + assert.Equal(t, 5800, config.StartPort) + assert.Equal(t, "svr --port 5800", config.Models["model1"].Cmd) + assert.Equal(t, "http://localhost:5800", config.Models["model1"].Proxy) + + assert.Equal(t, "svr --port 5801", config.Models["model2"].Cmd) + assert.Equal(t, "http://172.11.22.33:5801", config.Models["model2"].Proxy) + + assert.Equal(t, "svr --port 1999", config.Models["model3"].Cmd) + assert.Equal(t, "http://1.2.3.4:1999", config.Models["model3"].Proxy) + + }) + + t.Run("Proxy value required if no ${PORT} in cmd", func(t *testing.T) { + content := ` +models: + model1: + cmd: svr --port 111 +` + _, err := LoadConfigFromReader(strings.NewReader(content)) + assert.Equal(t, "model model1 requires a proxy value when not using automatic ${PORT}", err.Error()) + }) +}