diff --git a/Build.Containerfile b/Build.Containerfile new file mode 100644 index 0000000..be1cfa4 --- /dev/null +++ b/Build.Containerfile @@ -0,0 +1,60 @@ +# Stage 1: Build UI with Node.js +FROM node:25.2.1-trixie-slim AS ui-builder + +WORKDIR /app + +# Copy UI source +COPY ui/package*.json ./ +RUN npm install -g typescript && npm ci --only=production +RUN npm install --save-dev @types/react @types/react-dom + +# Build UI +COPY ui/ ./ +RUN npm run build + +# Stage 2: Build Go binary with embedded UI +FROM golang:1.25.4 AS go-builder + +WORKDIR /app + +# Copy go mod and sum files +COPY go.mod go.sum ./ + +# Download dependencies +RUN go mod download + +# Copy all source code +COPY . . + +# Copy UI build artifacts to embed directory +#COPY --from=ui-builder /app/build ./ui/build +COPY --from=ui-builder /proxy/ui_dist ./proxy/ui_dist +# Build the binary (Linux AMD64) +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -ldflags="-X main.commit=$(git rev-parse --short HEAD) -X main.version=local_$(git rev-parse --short HEAD) -X main.date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ + -o build/llama-swap + + +FROM ghcr.io/ggml-org/llama.cpp:full-vulkan + +# has to be after the FROM +ARG LS_VER=170 + +# Add user/group +ENV HOME=/app + + + +WORKDIR /app + +# Copy the binary from builder stage +COPY --from=go-builder /app/build/llama-swap . + + +COPY ./docker/config.example.yaml /config.yaml + + +ENTRYPOINT [ "/app/llama-swap", "-config", "/config.yaml" ] + + + diff --git a/README.md b/README.md index 0a31333..8b19a42 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,15 @@ +# Llama Swap fork with socket activation + +This is a fork of https://github.com/mostlygeek/llama-swap with the following changes + - Support for systemd socket activation + - Container file for fully containerized build with podman targetting Linux and Vulkan + - Documentation and example service files how to set up a socket activated + podman container isolated with network=none + (see [Readme](docs/socket_activation/README.md) ) + +# Original readme: + + ![llama-swap header image](header2.png) ![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/mostlygeek/llama-swap/total) ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/mostlygeek/llama-swap/go-ci.yml) diff --git a/docs/socket_activation/README.md b/docs/socket_activation/README.md new file mode 100644 index 0000000..cc33ec6 --- /dev/null +++ b/docs/socket_activation/README.md @@ -0,0 +1,41 @@ +# Rootless podman container with Systemd Socket activation + +## Idea +By passing in the socket from systemd we minimize resource use when not in use. +Since no other network access is required for operation, we can configure the container +with network=none and minimize the risk of the AI escaping. + +## Set up + +Optional, if you want to run this as a separate user +``` +sudo useradd llama +sudo machinectl shell llama@ +``` + +Check out this repository, navigate to its root directory and build the llama.cpp/llama swap +container with +``` +podman build -t localhost/lamaswap:latest -f Build.Containerfile +``` + +Place llama.socket in ` ~/.config/systemd/user`, adjust ports and interfaces if needed. +Place llama.container in `~/.config/containers/systemd`. Adjust paths for models and config if desired. +The files are in `docs/socket_activation`, next to this readme. + +Put model files into the models directory (`~/models`). +Create a llama swap `config.yaml` (by default in `~`) according to the docs. + +Start the socket: +``` +systemctl --user daemon-reload +systemctl --user enable --now llama.socket +``` + +If you want to run the service also when the user is not logged in, enable lingering: +``` +sudo loginctl enable-linger +``` + +Check that you can access the llama swap control panel in browser. For troubleshooting, use, e. g., `journalctl -xe`. + diff --git a/docs/socket_activation/llama.container b/docs/socket_activation/llama.container new file mode 100644 index 0000000..dbfccf2 --- /dev/null +++ b/docs/socket_activation/llama.container @@ -0,0 +1,20 @@ +[Unit] +Description=LLama.cpp (full‑vulkan) inference server +After=network-online.target +Wants=network-online.target + +[Container] +Image=localhost/lamaswap:latest +#AutoRemove=yes +#PublishPort=8080:8080 +Network=none +Volume=%h/models:/models:ro,Z +Volume=%h/config.yaml:/config.yaml:ro,Z +AddDevice=/dev/dri +Exec= + +#[Service] +#Restart=always + +[Install] +WantedBy=default.target diff --git a/docs/socket_activation/llama.socket b/docs/socket_activation/llama.socket new file mode 100644 index 0000000..2a35e61 --- /dev/null +++ b/docs/socket_activation/llama.socket @@ -0,0 +1,8 @@ +[Unit] +Description=llama socket + +[Socket] +ListenStream=0.0.0.0:8080 + +[Install] +WantedBy=default.target