diff --git a/config.example.yaml b/config.example.yaml index 4fca8ec..58a2d83 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -49,6 +49,7 @@ macros: # - required # - each key is the model's ID, used in API requests # - model settings have default values that are used if they are not defined here +# - the model's ID is available in the ${MODEL_ID} macro, also available in macros defined above # - below are examples of the various settings a model can have: # - available model settings: env, cmd, cmdStop, proxy, aliases, checkEndpoint, ttl, unlisted models: @@ -148,12 +149,12 @@ models: cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0 # Docker example: - # container run times like Docker and Podman can be used reliably with a - # a combination of cmd and cmdStop. + # container runtimes like Docker and Podman can be used reliably with + # a combination of cmd, cmdStop, and ${MODEL_ID} "docker-llama": proxy: "http://127.0.0.1:${PORT}" cmd: | - docker run --name dockertest + docker run --name ${MODEL_ID} --init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models ghcr.io/ggml-org/llama.cpp:server --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf' @@ -167,7 +168,7 @@ models: # - on POSIX systems: a SIGTERM signal is sent # - on Windows, calls taskkill to stop the process # - processes have 5 seconds to shutdown until forceful termination is attempted - cmdStop: docker stop dockertest + cmdStop: docker stop ${MODEL_ID} # groups: a dictionary of group settings # - optional, default: empty dictionary diff --git a/proxy/config.go b/proxy/config.go index 9447060..65be8d0 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -237,7 +237,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { - name must fit the regex ^[a-zA-Z0-9_-]+$ - names must be less than 64 characters (no reason, just cause) - - name can not be any reserved macros: PORT + - name can not be any reserved macros: PORT, MODEL_ID - macro values must be less than 1024 characters */ macroNameRegex := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`) @@ -253,6 +253,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { } switch macroName { case "PORT": + case "MODEL_ID": return Config{}, fmt.Errorf("macro name '%s' is reserved and cannot be used", macroName) } } @@ -296,6 +297,11 @@ func LoadConfigFromReader(r io.Reader) (Config, error) { nextPort++ } + if strings.Contains(modelConfig.Cmd, "${MODEL_ID}") || strings.Contains(modelConfig.CmdStop, "${MODEL_ID}") { + modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${MODEL_ID}", modelId) + modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, "${MODEL_ID}", modelId) + } + // make sure there are no unknown macros that have not been replaced macroPattern := regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`) fieldMap := map[string]string{ diff --git a/proxy/config_test.go b/proxy/config_test.go index a5b4611..505d80b 100644 --- a/proxy/config_test.go +++ b/proxy/config_test.go @@ -440,3 +440,44 @@ models: expectedCmd := "/user/llama.cpp/build/bin/llama-server --port 9990 --model /path/to/model.gguf -ngl 99" assert.Equal(t, expectedCmd, cmdStr, "Final command does not match expected structure") } + +func TestConfig_MacroModelId(t *testing.T) { + content := ` +startPort: 9000 +macros: + "docker-llama": docker run --name ${MODEL_ID} -p ${PORT}:8080 docker_img + "docker-stop": docker stop ${MODEL_ID} + +models: + model1: + cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID} + + model2: + cmd: ${docker-llama} + cmdStop: ${docker-stop} + + author/model:F16: + cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID} + cmdStop: stop +` + + config, err := LoadConfigFromReader(strings.NewReader(content)) + assert.NoError(t, err) + sanitizedCmd, err := SanitizeCommand(config.Models["model1"].Cmd) + assert.NoError(t, err) + assert.Equal(t, "/path/to/server -p 9001 -hf model1", strings.Join(sanitizedCmd, " ")) + + assert.Equal(t, "docker stop ${MODEL_ID}", config.Macros["docker-stop"]) + + sanitizedCmd2, err := SanitizeCommand(config.Models["model2"].Cmd) + assert.NoError(t, err) + assert.Equal(t, "docker run --name model2 -p 9002:8080 docker_img", strings.Join(sanitizedCmd2, " ")) + + sanitizedCmdStop, err := SanitizeCommand(config.Models["model2"].CmdStop) + assert.NoError(t, err) + assert.Equal(t, "docker stop model2", strings.Join(sanitizedCmdStop, " ")) + + sanitizedCmd3, err := SanitizeCommand(config.Models["author/model:F16"].Cmd) + assert.NoError(t, err) + assert.Equal(t, "/path/to/server -p 9000 -hf author/model:F16", strings.Join(sanitizedCmd3, " ")) +}