update configuration examples for multiline yaml commands #133
This commit is contained in:
18
README.md
18
README.md
@@ -46,14 +46,14 @@ llama-swap's configuration is purposefully simple.
|
|||||||
models:
|
models:
|
||||||
"qwen2.5":
|
"qwen2.5":
|
||||||
proxy: "http://127.0.0.1:9999"
|
proxy: "http://127.0.0.1:9999"
|
||||||
cmd: >
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
-hf bartowski/Qwen2.5-0.5B-Instruct-GGUF:Q4_K_M
|
||||||
--port 9999
|
--port 9999
|
||||||
|
|
||||||
"smollm2":
|
"smollm2":
|
||||||
proxy: "http://127.0.0.1:9999"
|
proxy: "http://127.0.0.1:9999"
|
||||||
cmd: >
|
cmd: |
|
||||||
/app/llama-server
|
/app/llama-server
|
||||||
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
-hf bartowski/SmolLM2-135M-Instruct-GGUF:Q4_K_M
|
||||||
--port 9999
|
--port 9999
|
||||||
@@ -82,7 +82,7 @@ startPort: 10001
|
|||||||
models:
|
models:
|
||||||
"llama":
|
"llama":
|
||||||
# multiline for readability
|
# multiline for readability
|
||||||
cmd: >
|
cmd: |
|
||||||
llama-server --port 8999
|
llama-server --port 8999
|
||||||
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ models:
|
|||||||
# Docker Support (v26.1.4+ required!)
|
# Docker Support (v26.1.4+ required!)
|
||||||
"docker-llama":
|
"docker-llama":
|
||||||
proxy: "http://127.0.0.1:${PORT}"
|
proxy: "http://127.0.0.1:${PORT}"
|
||||||
cmd: >
|
cmd: |
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||||
ghcr.io/ggerganov/llama.cpp:server
|
ghcr.io/ggerganov/llama.cpp:server
|
||||||
@@ -247,11 +247,11 @@ Pre-built binaries are available for Linux, FreeBSD and Darwin (OSX). These are
|
|||||||
1. Create a configuration file, see [config.example.yaml](config.example.yaml)
|
1. Create a configuration file, see [config.example.yaml](config.example.yaml)
|
||||||
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
|
1. Download a [release](https://github.com/mostlygeek/llama-swap/releases) appropriate for your OS and architecture.
|
||||||
1. Run the binary with `llama-swap --config path/to/config.yaml`.
|
1. Run the binary with `llama-swap --config path/to/config.yaml`.
|
||||||
Available flags:
|
Available flags:
|
||||||
- `--config`: Path to the configuration file (default: `config.yaml`).
|
- `--config`: Path to the configuration file (default: `config.yaml`).
|
||||||
- `--listen`: Address and port to listen on (default: `:8080`).
|
- `--listen`: Address and port to listen on (default: `:8080`).
|
||||||
- `--version`: Show version information and exit.
|
- `--version`: Show version information and exit.
|
||||||
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
|
- `--watch-config`: Automatically reload the configuration file when it changes. This will wait for in-flight requests to complete then stop all running models (default: `false`).
|
||||||
|
|
||||||
### Building from source
|
### Building from source
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ groups:
|
|||||||
|
|
||||||
models:
|
models:
|
||||||
"llama":
|
"llama":
|
||||||
cmd: >
|
cmd: |
|
||||||
models/llama-server-osx
|
models/llama-server-osx
|
||||||
--port ${PORT}
|
--port ${PORT}
|
||||||
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
||||||
@@ -38,7 +38,7 @@ models:
|
|||||||
# Embedding example with Nomic
|
# Embedding example with Nomic
|
||||||
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
|
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
|
||||||
"nomic":
|
"nomic":
|
||||||
cmd: >
|
cmd: |
|
||||||
models/llama-server-osx --port ${PORT}
|
models/llama-server-osx --port ${PORT}
|
||||||
-m models/nomic-embed-text-v1.5.Q8_0.gguf
|
-m models/nomic-embed-text-v1.5.Q8_0.gguf
|
||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
@@ -51,7 +51,7 @@ models:
|
|||||||
# Reranking example with bge-reranker
|
# Reranking example with bge-reranker
|
||||||
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
|
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
|
||||||
"bge-reranker":
|
"bge-reranker":
|
||||||
cmd: >
|
cmd: |
|
||||||
models/llama-server-osx --port ${PORT}
|
models/llama-server-osx --port ${PORT}
|
||||||
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
|
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
|
||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
@@ -59,7 +59,7 @@ models:
|
|||||||
|
|
||||||
# Docker Support (v26.1.4+ required!)
|
# Docker Support (v26.1.4+ required!)
|
||||||
"dockertest":
|
"dockertest":
|
||||||
cmd: >
|
cmd: |
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||||
ghcr.io/ggerganov/llama.cpp:server
|
ghcr.io/ggerganov/llama.cpp:server
|
||||||
|
|||||||
Reference in New Issue
Block a user