update example config
This commit is contained in:
@@ -5,13 +5,20 @@ healthCheckTimeout: 90
|
|||||||
# valid log levels: debug, info (default), warn, error
|
# valid log levels: debug, info (default), warn, error
|
||||||
logLevel: debug
|
logLevel: debug
|
||||||
|
|
||||||
|
# creating a coding profile with models for code generation and general questions
|
||||||
|
groups:
|
||||||
|
coding:
|
||||||
|
swap: false
|
||||||
|
members:
|
||||||
|
- "qwen"
|
||||||
|
- "llama"
|
||||||
|
|
||||||
models:
|
models:
|
||||||
"llama":
|
"llama":
|
||||||
cmd: >
|
cmd: >
|
||||||
models/llama-server-osx
|
models/llama-server-osx
|
||||||
--port 9001
|
--port ${PORT}
|
||||||
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
||||||
proxy: http://127.0.0.1:9001
|
|
||||||
|
|
||||||
# list of model name aliases this llama.cpp instance can serve
|
# list of model name aliases this llama.cpp instance can serve
|
||||||
aliases:
|
aliases:
|
||||||
@@ -24,17 +31,15 @@ models:
|
|||||||
ttl: 5
|
ttl: 5
|
||||||
|
|
||||||
"qwen":
|
"qwen":
|
||||||
cmd: models/llama-server-osx --port 9002 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
cmd: models/llama-server-osx --port ${PORT} -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
||||||
proxy: http://127.0.0.1:9002
|
|
||||||
aliases:
|
aliases:
|
||||||
- gpt-3.5-turbo
|
- gpt-3.5-turbo
|
||||||
|
|
||||||
# Embedding example with Nomic
|
# Embedding example with Nomic
|
||||||
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
|
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
|
||||||
"nomic":
|
"nomic":
|
||||||
proxy: http://127.0.0.1:9005
|
|
||||||
cmd: >
|
cmd: >
|
||||||
models/llama-server-osx --port 9005
|
models/llama-server-osx --port ${PORT}
|
||||||
-m models/nomic-embed-text-v1.5.Q8_0.gguf
|
-m models/nomic-embed-text-v1.5.Q8_0.gguf
|
||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
--batch-size 8192
|
--batch-size 8192
|
||||||
@@ -46,19 +51,17 @@ models:
|
|||||||
# Reranking example with bge-reranker
|
# Reranking example with bge-reranker
|
||||||
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
|
# https://huggingface.co/gpustack/bge-reranker-v2-m3-GGUF
|
||||||
"bge-reranker":
|
"bge-reranker":
|
||||||
proxy: http://127.0.0.1:9006
|
|
||||||
cmd: >
|
cmd: >
|
||||||
models/llama-server-osx --port 9006
|
models/llama-server-osx --port ${PORT}
|
||||||
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
|
-m models/bge-reranker-v2-m3-Q4_K_M.gguf
|
||||||
--ctx-size 8192
|
--ctx-size 8192
|
||||||
--reranking
|
--reranking
|
||||||
|
|
||||||
# Docker Support (v26.1.4+ required!)
|
# Docker Support (v26.1.4+ required!)
|
||||||
"dockertest":
|
"dockertest":
|
||||||
proxy: "http://127.0.0.1:9790"
|
|
||||||
cmd: >
|
cmd: >
|
||||||
docker run --name dockertest
|
docker run --name dockertest
|
||||||
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
|
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
|
||||||
ghcr.io/ggerganov/llama.cpp:server
|
ghcr.io/ggerganov/llama.cpp:server
|
||||||
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
|
||||||
|
|
||||||
@@ -67,8 +70,7 @@ models:
|
|||||||
env:
|
env:
|
||||||
- CUDA_VISIBLE_DEVICES=0,1
|
- CUDA_VISIBLE_DEVICES=0,1
|
||||||
- env1=hello
|
- env1=hello
|
||||||
cmd: build/simple-responder --port 8999
|
cmd: build/simple-responder --port ${PORT}
|
||||||
proxy: http://127.0.0.1:8999
|
|
||||||
unlisted: true
|
unlisted: true
|
||||||
|
|
||||||
# use "none" to skip check. Caution this may cause some requests to fail
|
# use "none" to skip check. Caution this may cause some requests to fail
|
||||||
@@ -84,9 +86,3 @@ models:
|
|||||||
cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
||||||
proxy: http://127.0.0.1:9000
|
proxy: http://127.0.0.1:9000
|
||||||
unlisted: true
|
unlisted: true
|
||||||
|
|
||||||
# creating a coding profile with models for code generation and general questions
|
|
||||||
profiles:
|
|
||||||
coding:
|
|
||||||
- "qwen"
|
|
||||||
- "llama"
|
|
||||||
Reference in New Issue
Block a user