update configuration examples for multiline yaml commands #133

This commit is contained in:
Benson Wong
2025-05-16 11:45:39 -07:00
parent 2d00120781
commit f9ee7156dc
2 changed files with 13 additions and 13 deletions

View File

@@ -46,14 +46,14 @@ llama-swap's configuration is purposefully simple.
models:
"qwen2.5":
proxy: "http://127.0.0.1:9999"
cmd: >
cmd: |
/app/llama-server
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
--port 9999
"smollm2":
proxy: "http://127.0.0.1:9999"
cmd: >
cmd: |
/app/llama-server
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
--port 9999
@@ -82,7 +82,7 @@ startPort: 10001
models:
"llama":
# multiline for readability
cmd: >
cmd: |
llama-server --port 8999
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
@@ -123,7 +123,7 @@ models:
# Docker Support (v26.1.4+ required!)
"docker-llama":
proxy: "http://127.0.0.1:${PORT}"
cmd: >
cmd: |
docker run --name dockertest
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
ghcr.io/ggerganov/llama.cpp:server
@@ -247,11 +247,11 @@ Pre-built binaries are available for Linux, FreeBSD and Darwin (OSX). These are
1. Create a configuration file, see [config.example.yaml](config.example.yaml)
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
1. Run the binary with `llama-swap --config path/to/config.yaml`.
Available flags:
- `--config`: Path to the configuration file (default: `config.yaml`).
- `--listen`: Address and port to listen on (default: `:8080`).
- `--version`: Show version information and exit.
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
Available flags:
- `--config`: Path to the configuration file (default: `config.yaml`).
- `--listen`: Address and port to listen on (default: `:8080`).
- `--version`: Show version information and exit.
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
### Building from source

View File

@@ -15,7 +15,7 @@ groups:
models:
"llama":
cmd: >
cmd: |
models/llama-server-osx
--port ${PORT}
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
@@ -38,7 +38,7 @@ models:
# Embedding example with Nomic
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
"nomic":
cmd: >
cmd: |
models/llama-server-osx --port ${PORT}
-m models/nomic-embed-text-v1.5.Q8_0.gguf
--ctx-size 8192
@@ -51,7 +51,7 @@ models:
# Reranking example with bge-reranker
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
"bge-reranker":
cmd: >
cmd: |
models/llama-server-osx --port ${PORT}
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
--ctx-size 8192
@@ -59,7 +59,7 @@ models:
# Docker Support (v26.1.4+ required!)
"dockertest":
cmd: >
cmd: |
docker run --name dockertest
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
ghcr.io/ggerganov/llama.cpp:server