add cpu docker container build
This commit is contained in:
@@ -31,7 +31,8 @@ Written in golang, it is very easy to install (single binary with no dependancie
|
|||||||
Docker is the quickest way to try out llama-swap:
|
Docker is the quickest way to try out llama-swap:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ docker run -it --rm --runtime nvidia -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cuda
|
# use CPU inference
|
||||||
|
$ docker run -it --rm -p 9292:8080 ghcr.io/mostlygeek/llama-swap:cpu
|
||||||
|
|
||||||
|
|
||||||
# qwen2.5 0.5B
|
# qwen2.5 0.5B
|
||||||
@@ -52,10 +53,12 @@ $ curl -s http://localhost:9292/v1/chat/completions \
|
|||||||
|
|
||||||
Docker images are [published nightly](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap) that include the latest llama-swap and llama-server:
|
Docker images are [published nightly](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap) that include the latest llama-swap and llama-server:
|
||||||
|
|
||||||
|
- `ghcr.io/mostlygeek/llama-swap:cpu`
|
||||||
- `ghcr.io/mostlygeek/llama-swap:cuda`
|
- `ghcr.io/mostlygeek/llama-swap:cuda`
|
||||||
- `ghcr.io/mostlygeek/llama-swap:intel`
|
- `ghcr.io/mostlygeek/llama-swap:intel`
|
||||||
- `ghcr.io/mostlygeek/llama-swap:vulkan`
|
- `ghcr.io/mostlygeek/llama-swap:vulkan`
|
||||||
- `ghcr.io/mostlygeek/llama-swap:musa`
|
- ROCm disabled until fixed in llama.cpp container
|
||||||
|
- musa disabled until requested.
|
||||||
|
|
||||||
Specific versions are also available and are tagged with the llama-swap, architecture and llama.cpp versions. For example: `ghcr.io/mostlygeek/llama-swap:v89-cuda-b4716`
|
Specific versions are also available and are tagged with the llama-swap, architecture and llama.cpp versions. For example: `ghcr.io/mostlygeek/llama-swap:v89-cuda-b4716`
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ cd $(dirname "$0")
|
|||||||
ARCH=$1
|
ARCH=$1
|
||||||
|
|
||||||
# List of allowed architectures
|
# List of allowed architectures
|
||||||
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda")
|
ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda", "cpu")
|
||||||
|
|
||||||
# Check if ARCH is in the allowed list
|
# Check if ARCH is in the allowed list
|
||||||
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then
|
||||||
@@ -22,10 +22,24 @@ fi
|
|||||||
# the most recent llama-swap tag
|
# the most recent llama-swap tag
|
||||||
# have to strip out the 'v' due to .tar.gz file naming
|
# have to strip out the 'v' due to .tar.gz file naming
|
||||||
LS_VER=$(curl -s https://api.github.com/repos/mostlygeek/llama-swap/releases/latest | jq -r .tag_name | sed 's/v//')
|
LS_VER=$(curl -s https://api.github.com/repos/mostlygeek/llama-swap/releases/latest | jq -r .tag_name | sed 's/v//')
|
||||||
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
|
||||||
"https://api.github.com/users/ggerganov/packages/container/llama.cpp/versions" \
|
if [ "$ARCH" == "cpu" ]; then
|
||||||
| jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \
|
# cpu only containers just use the latest available
|
||||||
| sort -r | head -n1 | awk -F '-' '{print $3}')
|
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:cpu"
|
||||||
|
echo "Building ${CONTAINER_LATEST} $LS_VER"
|
||||||
|
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=server --build-arg LS_VER=${LS_VER} -t ${CONTAINER_LATEST} .
|
||||||
|
else
|
||||||
|
LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||||
|
"https://api.github.com/users/ggerganov/packages/container/llama.cpp/versions" \
|
||||||
|
| jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \
|
||||||
|
| sort -r | head -n1 | awk -F '-' '{print $3}')
|
||||||
|
|
||||||
|
CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}"
|
||||||
|
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}"
|
||||||
|
echo "Building ${CONTAINER_TAG} $LS_VER"
|
||||||
|
docker build -f llama-swap.Containerfile --build-arg BASE_TAG=server-${ARCH}-${LCPP_TAG} --build-arg LS_VER=${LS_VER} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} .
|
||||||
|
fi
|
||||||
|
exit
|
||||||
|
|
||||||
CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}"
|
CONTAINER_TAG="ghcr.io/mostlygeek/llama-swap:v${LS_VER}-${ARCH}-${LCPP_TAG}"
|
||||||
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}"
|
CONTAINER_LATEST="ghcr.io/mostlygeek/llama-swap:${ARCH}"
|
||||||
|
|||||||
Reference in New Issue
Block a user