From f9ee7156dc9e3939923ada01397b255f21a17f29 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Fri, 16 May 2025 11:45:39 -0700 Subject: [PATCH] update configuration examples for multiline yaml commands #133 --- README.md | 18 +++++++++--------- config.example.yaml | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index b8c75fe..2327c01 100644 --- a/README.md +++ b/README.md @@ -46,14 +46,14 @@ llama-swap's configuration is purposefully simple. models: "qwen2.5": proxy: "http://127.0.0.1:9999" - cmd: > + cmd: | /app/llama-server -hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M --port 9999 "smollm2": proxy: "http://127.0.0.1:9999" - cmd: > + cmd: | /app/llama-server -hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M --port 9999 @@ -82,7 +82,7 @@ startPort: 10001 models: "llama": # multiline for readability - cmd: > + cmd: | llama-server --port 8999 --model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf @@ -123,7 +123,7 @@ models: # Docker Support (v26.1.4+ required!) "docker-llama": proxy: "http://127.0.0.1:${PORT}" - cmd: > + cmd: | docker run --name dockertest --init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models ghcr.io/ggerganov/llama.cpp:server @@ -247,11 +247,11 @@ Pre-built binaries are available for Linux, FreeBSD and Darwin (OSX). These are 1. Create a configuration file, see [config.example.yaml](config.example.yaml) 1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture. 1. Run the binary with `llama-swap --config path/to/config.yaml`. - Available flags: - - `--config`: Path to the configuration file (default: `config.yaml`). - - `--listen`: Address and port to listen on (default: `:8080`). - - `--version`: Show version information and exit. - - `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`). + Available flags: + - `--config`: Path to the configuration file (default: `config.yaml`). + - `--listen`: Address and port to listen on (default: `:8080`). + - `--version`: Show version information and exit. + - `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`). ### Building from source diff --git a/config.example.yaml b/config.example.yaml index 4fcce0b..befbd4c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -15,7 +15,7 @@ groups: models: "llama": - cmd: > + cmd: | models/llama-server-osx --port ${PORT} -m models/Llama-3.2-1B-Instruct-Q4_0.gguf @@ -38,7 +38,7 @@ models: # Embedding example with Nomic # https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF "nomic": - cmd: > + cmd: | models/llama-server-osx --port ${PORT} -m models/nomic-embed-text-v1.5.Q8_0.gguf --ctx-size 8192 @@ -51,7 +51,7 @@ models: # Reranking example with bge-reranker # https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF "bge-reranker": - cmd: > + cmd: | models/llama-server-osx --port ${PORT} -m models/bge-reranker-v2-m3-Q4_K_M.gguf --ctx-size 8192 @@ -59,7 +59,7 @@ models: # Docker Support (v26.1.4+ required!) "dockertest": - cmd: > + cmd: | docker run --name dockertest --init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models ghcr.io/ggerganov/llama.cpp:server