Add ${MODEL_ID} macro (#226)
The automatic ${MODEL_ID} macro includes the name of the model and can be used in Cmd and CmdStop.
This commit is contained in:
@@ -49,6 +49,7 @@ macros:
|
|||||||
# - required
|
# - required
|
||||||
# - each key is the model's ID, used in API requests
|
# - each key is the model's ID, used in API requests
|
||||||
# - model settings have default values that are used if they are not defined here
|
# - model settings have default values that are used if they are not defined here
|
||||||
|
# - the model's ID is available in the ${MODEL_ID} macro, also available in macros defined above
|
||||||
# - below are examples of the various settings a model can have:
|
# - below are examples of the various settings a model can have:
|
||||||
# - available model settings: env, cmd, cmdStop, proxy, aliases, checkEndpoint, ttl, unlisted
|
# - available model settings: env, cmd, cmdStop, proxy, aliases, checkEndpoint, ttl, unlisted
|
||||||
models:
|
models:
|
||||||
@@ -148,12 +149,12 @@ models:
|
|||||||
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
||||||
|
|
||||||
# Docker example:
|
# Docker example:
|
||||||
# container run times like Docker and Podman can be used reliably with a
|
# container runtimes like Docker and Podman can be used reliably with
|
||||||
# a combination of cmd and cmdStop.
|
# a combination of cmd, cmdStop, and ${MODEL_ID}
|
||||||
"docker-llama":
|
"docker-llama":
|
||||||
proxy: "http://127.0.0.1:${PORT}"
|
proxy: "http://127.0.0.1:${PORT}"
|
||||||
cmd: |
|
cmd: |
|
||||||
docker run --name dockertest
|
docker run --name ${MODEL_ID}
|
||||||
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||||
ghcr.io/ggml-org/llama.cpp:server
|
ghcr.io/ggml-org/llama.cpp:server
|
||||||
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
||||||
@@ -167,7 +168,7 @@ models:
|
|||||||
# - on POSIX systems: a SIGTERM signal is sent
|
# - on POSIX systems: a SIGTERM signal is sent
|
||||||
# - on Windows, calls taskkill to stop the process
|
# - on Windows, calls taskkill to stop the process
|
||||||
# - processes have 5 seconds to shutdown until forceful termination is attempted
|
# - processes have 5 seconds to shutdown until forceful termination is attempted
|
||||||
cmdStop: docker stop dockertest
|
cmdStop: docker stop ${MODEL_ID}
|
||||||
|
|
||||||
# groups: a dictionary of group settings
|
# groups: a dictionary of group settings
|
||||||
# - optional, default: empty dictionary
|
# - optional, default: empty dictionary
|
||||||
|
|||||||
@@ -237,7 +237,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
|
|
||||||
- name must fit the regex ^[a-zA-Z0-9_-]+$
|
- name must fit the regex ^[a-zA-Z0-9_-]+$
|
||||||
- names must be less than 64 characters (no reason, just cause)
|
- names must be less than 64 characters (no reason, just cause)
|
||||||
- name can not be any reserved macros: PORT
|
- name can not be any reserved macros: PORT, MODEL_ID
|
||||||
- macro values must be less than 1024 characters
|
- macro values must be less than 1024 characters
|
||||||
*/
|
*/
|
||||||
macroNameRegex := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
macroNameRegex := regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
|
||||||
@@ -253,6 +253,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
}
|
}
|
||||||
switch macroName {
|
switch macroName {
|
||||||
case "PORT":
|
case "PORT":
|
||||||
|
case "MODEL_ID":
|
||||||
return Config{}, fmt.Errorf("macro name '%s' is reserved and cannot be used", macroName)
|
return Config{}, fmt.Errorf("macro name '%s' is reserved and cannot be used", macroName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -296,6 +297,11 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
|
|||||||
nextPort++
|
nextPort++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if strings.Contains(modelConfig.Cmd, "${MODEL_ID}") || strings.Contains(modelConfig.CmdStop, "${MODEL_ID}") {
|
||||||
|
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${MODEL_ID}", modelId)
|
||||||
|
modelConfig.CmdStop = strings.ReplaceAll(modelConfig.CmdStop, "${MODEL_ID}", modelId)
|
||||||
|
}
|
||||||
|
|
||||||
// make sure there are no unknown macros that have not been replaced
|
// make sure there are no unknown macros that have not been replaced
|
||||||
macroPattern := regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
macroPattern := regexp.MustCompile(`\$\{([a-zA-Z0-9_-]+)\}`)
|
||||||
fieldMap := map[string]string{
|
fieldMap := map[string]string{
|
||||||
|
|||||||
@@ -440,3 +440,44 @@ models:
|
|||||||
expectedCmd := "/user/llama.cpp/build/bin/llama-server --port 9990 --model /path/to/model.gguf -ngl 99"
|
expectedCmd := "/user/llama.cpp/build/bin/llama-server --port 9990 --model /path/to/model.gguf -ngl 99"
|
||||||
assert.Equal(t, expectedCmd, cmdStr, "Final command does not match expected structure")
|
assert.Equal(t, expectedCmd, cmdStr, "Final command does not match expected structure")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConfig_MacroModelId(t *testing.T) {
|
||||||
|
content := `
|
||||||
|
startPort: 9000
|
||||||
|
macros:
|
||||||
|
"docker-llama": docker run --name ${MODEL_ID} -p ${PORT}:8080 docker_img
|
||||||
|
"docker-stop": docker stop ${MODEL_ID}
|
||||||
|
|
||||||
|
models:
|
||||||
|
model1:
|
||||||
|
cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID}
|
||||||
|
|
||||||
|
model2:
|
||||||
|
cmd: ${docker-llama}
|
||||||
|
cmdStop: ${docker-stop}
|
||||||
|
|
||||||
|
author/model:F16:
|
||||||
|
cmd: /path/to/server -p ${PORT} -hf ${MODEL_ID}
|
||||||
|
cmdStop: stop
|
||||||
|
`
|
||||||
|
|
||||||
|
config, err := LoadConfigFromReader(strings.NewReader(content))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
sanitizedCmd, err := SanitizeCommand(config.Models["model1"].Cmd)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "/path/to/server -p 9001 -hf model1", strings.Join(sanitizedCmd, " "))
|
||||||
|
|
||||||
|
assert.Equal(t, "docker stop ${MODEL_ID}", config.Macros["docker-stop"])
|
||||||
|
|
||||||
|
sanitizedCmd2, err := SanitizeCommand(config.Models["model2"].Cmd)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "docker run --name model2 -p 9002:8080 docker_img", strings.Join(sanitizedCmd2, " "))
|
||||||
|
|
||||||
|
sanitizedCmdStop, err := SanitizeCommand(config.Models["model2"].CmdStop)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "docker stop model2", strings.Join(sanitizedCmdStop, " "))
|
||||||
|
|
||||||
|
sanitizedCmd3, err := SanitizeCommand(config.Models["author/model:F16"].Cmd)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "/path/to/server -p 9000 -hf author/model:F16", strings.Join(sanitizedCmd3, " "))
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user