From 96a8ea024102b94a9ff00f2044860f619cc4abe5 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Fri, 14 Feb 2025 15:25:45 -0800 Subject: [PATCH] add cpu docker container build --- README.md | 7 +++++-- docker/build-container.sh | 24 +++++++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1b13e30..e42de16 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,8 @@ Written in golang, it is very easy to install (single binary with no dependancie Docker is the quickest way to try out llama-swap: ``` -$ docker run -it --rm --runtime nvidia -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cuda +# use CPU inference +$ docker run -it --rm -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cpu # qwen2.5 0.5B @@ -52,10 +53,12 @@ $ curl -s http://localhost:9292/v1/chat/completions \ Docker images are [published nightly](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap) that include the latest llama-swap and llama-server: +- `ghcr.io/mostlygeek/llama-swap:cpu` - `ghcr.io/mostlygeek/llama-swap:cuda` - `ghcr.io/mostlygeek/llama-swap:intel` - `ghcr.io/mostlygeek/llama-swap:vulkan` -- `ghcr.io/mostlygeek/llama-swap:musa` +- ROCm disabled until fixed in llama.cpp container +- musa disabled until requested. Specific versions are also available and are tagged with the llama-swap, architecture and llama.cpp versions. For example: `ghcr.io/mostlygeek/llama-swap:v89-cuda-b4716` diff --git a/docker/build-container.sh b/docker/build-container.sh index 97839d7..90b39a9 100755 --- a/docker/build-container.sh +++ b/docker/build-container.sh @@ -5,7 +5,7 @@ cd $(dirname "$0") ARCH=$1 # List of allowed architectures -ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda") +ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda", "cpu") # Check if ARCH is in the allowed list if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then @@ -22,10 +22,24 @@ fi # the most recent llama-swap tag # have to strip out the 'v' due to .tar.gz file naming LS_VER=$(curl -s https://api.github.com/repos/mostlygeek/llama-swap/releases/latest | jq -r .tag_name | sed 's/v//') -LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ - "https://api.github.com/users/ggerganov/packages/container/llama.cpp/versions" \ - | jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \ - | sort -r | head -n1 | awk -F '-' '{print $3}') + +if [ "$ARCH" == "cpu" ]; then + # cpu only containers just use the latest available + CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:cpu" + echo "Building ${CONTAINER_LATEST} $LS_VER" + docker build -f llama-swap.Containerfile --build-arg BASE_TAG=server --build-arg LS_VER=${LS_VER} -t ${CONTAINER_LATEST} . +else + LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/users/ggerganov/packages/container/llama.cpp/versions" \ + | jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \ + | sort -r | head -n1 | awk -F '-' '{print $3}') + + CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}" + CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}" + echo "Building ${CONTAINER_TAG} $LS_VER" + docker build -f llama-swap.Containerfile --build-arg BASE_TAG=server-${ARCH}-${LCPP_TAG} --build-arg LS_VER=${LS_VER} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} . +fi +exit CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}" CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}"