Containerized build and socket activation docs

2025-11-20 00:18:58 +01:00
parent 553d8659b7
commit b984f5ca08
5 changed files with 141 additions and 0 deletions
--- a/Build.Containerfile
+++ b/Build.Containerfile
@@ -0,0 +1,60 @@
 # Stage 1: Build UI with Node.js
 FROM node:25.2.1-trixie-slim AS ui-builder
 WORKDIR /app
 # Copy UI source
 COPY ui/package*.json ./
 RUN npm install -g typescript && npm ci --only=production
 RUN npm install --save-dev @types/react @types/react-dom
 # Build UI
 COPY ui/ ./
 RUN npm run build
 # Stage 2: Build Go binary with embedded UI
 FROM golang:1.25.4 AS go-builder
 WORKDIR /app
 # Copy go mod and sum files
 COPY go.mod go.sum ./
 # Download dependencies
 RUN go mod download
 # Copy all source code
 COPY . .
 # Copy UI build artifacts to embed directory
 #COPY --from=ui-builder /app/build ./ui/build
 COPY --from=ui-builder /proxy/ui_dist ./proxy/ui_dist
 # Build the binary (Linux AMD64)
 RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
    go build -ldflags="-X main.commit=$(git rev-parse --short HEAD) -X main.version=local_$(git rev-parse --short HEAD) -X main.date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
    -o build/llama-swap
 FROM ghcr.io/ggml-org/llama.cpp:full-vulkan
 # has to be after the FROM
 ARG LS_VER=170
 # Add user/group
 ENV HOME=/app
 WORKDIR /app
 # Copy the binary from builder stage
 COPY --from=go-builder /app/build/llama-swap .
 COPY  ./docker/config.example.yaml /config.yaml
 ENTRYPOINT [ "/app/llama-swap", "-config", "/config.yaml" ]
--- a/README.md
+++ b/README.md
@@ -1,3 +1,15 @@
 # Llama Swap fork with socket activation
 This is a fork of https://github.com/mostlygeek/llama-swap with the following changes
 - Support for systemd socket activation
 - Container file for fully containerized build with podman targetting Linux and Vulkan
 - Documentation and example service files how to set up a socket activated 
   podman container isolated with network=none
   (see [Readme](docs/socket_activation/README.md) )
 # Original readme:
 ![llama-swap header image](header2.png)
 ![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/mostlygeek/llama-swap/total)
 ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/mostlygeek/llama-swap/go-ci.yml)
--- a/docs/socket_activation/README.md
+++ b/docs/socket_activation/README.md
@@ -0,0 +1,41 @@
 # Rootless podman container with Systemd Socket activation
 ## Idea
 By passing in the socket from systemd we minimize resource use when not in use.
 Since no other network access is required for operation, we can configure the container
 with network=none and minimize the risk of the AI escaping. 
 ## Set up 
 Optional, if you want to run this as a separate user
 ```
 sudo useradd llama
 sudo machinectl shell llama@
 ```
 Check out this repository, navigate to its root directory and build the llama.cpp/llama swap 
 container with
 ```
 podman build -t localhost/lamaswap:latest -f Build.Containerfile
 ```
 Place llama.socket in ` ~/.config/systemd/user`, adjust ports and interfaces if needed.
 Place llama.container in `~/.config/containers/systemd`. Adjust paths for models and config if desired.
 The files are in `docs/socket_activation`, next to this readme.
 Put model files into the models directory (`~/models`).
 Create a llama swap `config.yaml` (by default in `~`) according to the docs.
 Start the socket:
 ```
 systemctl --user daemon-reload
 systemctl --user enable --now llama.socket
 ```
 If you want to run the service also when the user is not logged in, enable lingering:
 ```
 sudo loginctl enable-linger <user>
 ```
 Check that you can access the llama swap control panel in browser. For troubleshooting, use, e. g., `journalctl -xe`.
--- a/docs/socket_activation/llama.container
+++ b/docs/socket_activation/llama.container
@@ -0,0 +1,20 @@
 [Unit]
 Description=LLama.cpp (full‑vulkan) inference server
 After=network-online.target
 Wants=network-online.target
 [Container]
 Image=localhost/lamaswap:latest
 #AutoRemove=yes
 #PublishPort=8080:8080
 Network=none
 Volume=%h/models:/models:ro,Z
 Volume=%h/config.yaml:/config.yaml:ro,Z
 AddDevice=/dev/dri
 Exec= 
 #[Service]
 #Restart=always
 [Install]
 WantedBy=default.target
--- a/docs/socket_activation/llama.socket
+++ b/docs/socket_activation/llama.socket
@@ -0,0 +1,8 @@
 [Unit]
 Description=llama socket
 [Socket]
 ListenStream=0.0.0.0:8080
 [Install]
 WantedBy=default.target