add cpu docker container build

This commit is contained in:
Benson Wong
2025-02-14 15:25:45 -08:00
parent f20f2c9b7a
commit 96a8ea0241
2 changed files with 24 additions and 7 deletions

View File

@@ -31,7 +31,8 @@ Written in golang, it is very easy to install (single binary with no dependancie
Docker is the quickest way to try out llama-swap:
```
$ docker run -it --rm --runtime nvidia -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cuda
# use CPU inference
$ docker run -it --rm -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cpu
# qwen2.5 0.5B
@@ -52,10 +53,12 @@ $ curl -s http://localhost:9292/v1/chat/completions \
Docker images are [published nightly](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap) that include the latest llama-swap and llama-server:
- `ghcr.io/mostlygeek/llama-swap:cpu`
- `ghcr.io/mostlygeek/llama-swap:cuda`
- `ghcr.io/mostlygeek/llama-swap:intel`
- `ghcr.io/mostlygeek/llama-swap:vulkan`
- `ghcr.io/mostlygeek/llama-swap:musa`
- ROCm disabled until fixed in llama.cpp container
- musa disabled until requested.
Specific versions are also available and are tagged with the llama-swap, architecture and llama.cpp versions. For example: `ghcr.io/mostlygeek/llama-swap:v89-cuda-b4716`

View File

@@ -5,7 +5,7 @@ cd $(dirname "$0")
ARCH=$1
# List of allowed architectures
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda")
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda", "cpu")
# Check if ARCH is in the allowed list
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
@@ -22,11 +22,25 @@ fi
# the most recent llama-swap tag
# have to strip out the 'v' due to .tar.gz file naming
LS_VER=$(curl -s https://api.github.com/repos/mostlygeek/llama-swap/releases/latest | jq -r .tag_name | sed 's/v//')
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
if [ "$ARCH" == "cpu" ]; then
# cpu only containers just use the latest available
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:cpu"
echo "Building ${CONTAINER_LATEST} $LS_VER"
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=server --build-arg LS_VER=${LS_VER} -t ${CONTAINER_LATEST} .
else
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
"https://api.github.com/users/ggerganov/packages/container/llama.cpp/versions" \
| jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \
| sort -r | head -n1 | awk -F '-' '{print $3}')
CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}"
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}"
echo "Building ${CONTAINER_TAG} $LS_VER"
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=server-${ARCH}-${LCPP_TAG} --build-arg LS_VER=${LS_VER} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} .
fi
exit
CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}"
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}"