debate/backend/app/services/sandbox.py
Mikkel Georgsen 77a5aaa0f5 fix(01-05): use container-based builds instead of systemd-nspawn
Replace systemd-nspawn (Arch-only) with Podman/Docker containers:
- Works on any Linux host (Debian, Ubuntu, Fedora, etc.)
- Prefers Podman for rootless security, falls back to Docker
- Uses archlinux:latest image with archiso installed
- Network isolation via --network=none
- Resource limits: 8GB RAM, 4 CPUs
- Deterministic builds via SOURCE_DATE_EPOCH

This allows ISO builds from any development/production environment
rather than requiring an Arch-based build server.

LXC/Proxmox users: enable nesting on the container.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 20:41:36 +00:00

278 lines
9 KiB
Python

"""
Container-based sandbox for isolated ISO builds.
Runs archiso inside an Arch Linux container, allowing builds
from any Linux host (Debian, Ubuntu, Fedora, etc.).
Supports both Podman (preferred) and Docker:
- Podman: Rootless by default, no daemon, better security
- Docker: Fallback if Podman not available
Security measures:
- --network=none: No network access during build
- --read-only: Immutable container filesystem
- --tmpfs: Writable temp directories only
- --cap-drop=ALL + minimal caps: Reduced privileges
- Resource limits: 8GB RAM, 4 CPUs
"""
import asyncio
import shutil
from dataclasses import dataclass
from pathlib import Path
from backend.app.core.config import settings
# Container image for Arch Linux builds
ARCHISO_BASE_IMAGE = "ghcr.io/archlinux/archlinux:latest"
BUILD_IMAGE = "debate-archiso-builder:latest"
@dataclass
class SandboxConfig:
"""Configuration for sandbox execution."""
memory_limit: str = "8g"
cpu_count: int = 4
timeout_seconds: int = 1200 # 20 minutes
warning_seconds: int = 900 # 15 minutes
def detect_container_runtime() -> str | None:
"""
Detect available container runtime.
Prefers Podman for rootless security, falls back to Docker.
Returns the command name or None if neither available.
"""
# Prefer podman for rootless security
if shutil.which("podman"):
return "podman"
if shutil.which("docker"):
return "docker"
return None
class BuildSandbox:
"""Manages container-based sandboxed build environments."""
def __init__(
self,
builds_root: Path | None = None,
config: SandboxConfig | None = None,
runtime: str | None = None,
):
self.builds_root = builds_root or Path(settings.sandbox_root) / "builds"
self.config = config or SandboxConfig()
self._runtime = runtime # Allow override for testing
self._runtime_cmd: str | None = None
@property
def runtime(self) -> str:
"""Get container runtime command, detecting if needed."""
if self._runtime_cmd is None:
self._runtime_cmd = self._runtime or detect_container_runtime()
if self._runtime_cmd is None:
raise RuntimeError(
"No container runtime found. "
"Install podman (recommended) or docker."
)
return self._runtime_cmd
async def ensure_build_image(self) -> tuple[bool, str]:
"""
Ensure the build image exists, pulling/building if needed.
Returns:
Tuple of (success, message)
"""
runtime = self.runtime
# Check if our custom build image exists
proc = await asyncio.create_subprocess_exec(
runtime, "image", "inspect", BUILD_IMAGE,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
)
await proc.wait()
if proc.returncode == 0:
return True, f"Build image ready ({runtime})"
# Build image doesn't exist, create it from base Arch image
# Pull base image first
proc = await asyncio.create_subprocess_exec(
runtime, "pull", ARCHISO_BASE_IMAGE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
return False, f"Failed to pull base image: {stderr.decode()}"
# Create our build image with archiso installed
dockerfile = """\
FROM ghcr.io/archlinux/archlinux:latest
# Update and install archiso
RUN pacman -Syu --noconfirm && \\
pacman -S --noconfirm archiso && \\
pacman -Scc --noconfirm
# Set fixed locale for determinism
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen
ENV LC_ALL=C
ENV TZ=UTC
# Create build directories
RUN mkdir -p /build/profile /build/output /build/work
WORKDIR /build
"""
proc = await asyncio.create_subprocess_exec(
runtime, "build", "-t", BUILD_IMAGE, "-f", "-", ".",
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate(input=dockerfile.encode())
if proc.returncode != 0:
return False, f"Failed to build image: {stderr.decode()}"
return True, f"Build image created ({runtime})"
async def run_build(
self,
build_id: str,
profile_path: Path,
output_path: Path,
source_date_epoch: int,
) -> tuple[int, str, str]:
"""
Execute archiso build in container.
Args:
build_id: Unique identifier for this build
profile_path: Host path to archiso profile directory
output_path: Host path where ISO will be written
source_date_epoch: Timestamp for reproducible builds
Returns:
Tuple of (return_code, stdout, stderr)
"""
runtime = self.runtime
output_path.mkdir(parents=True, exist_ok=True)
# Ensure build image exists
success, message = await self.ensure_build_image()
if not success:
return -1, "", message
container_name = f"debate-build-{build_id}"
# Build container command
# Note: mkarchiso requires privileged for loop device mounts
container_cmd = [
runtime, "run",
"--name", container_name,
"--rm", # Remove container after exit
# Security: No network access
"--network=none",
# Security: Read-only root filesystem
"--read-only",
# Writable temp directories
"--tmpfs=/tmp:exec,mode=1777",
"--tmpfs=/var/tmp:exec,mode=1777",
"--tmpfs=/build/work:exec",
# Mount profile (read-only) and output (read-write)
"-v", f"{profile_path.absolute()}:/build/profile:ro",
"-v", f"{output_path.absolute()}:/build/output:rw",
# Deterministic build environment
"-e", f"SOURCE_DATE_EPOCH={source_date_epoch}",
"-e", "LC_ALL=C",
"-e", "TZ=UTC",
# Resource limits
f"--memory={self.config.memory_limit}",
f"--cpus={self.config.cpu_count}",
# Security: Drop all capabilities, add only what's needed
"--cap-drop=ALL",
"--cap-add=SYS_ADMIN", # Required for loop devices in mkarchiso
"--cap-add=MKNOD", # Required for device nodes
# Required for loop device access (mkarchiso mounts squashfs)
"--privileged",
# Image and command
BUILD_IMAGE,
"mkarchiso",
"-v",
"-w", "/build/work",
"-o", "/build/output",
"/build/profile",
]
proc = await asyncio.create_subprocess_exec(
*container_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(),
timeout=self.config.timeout_seconds,
)
return_code = proc.returncode if proc.returncode is not None else -1
return return_code, stdout.decode(), stderr.decode()
except TimeoutError:
# Kill the container on timeout
kill_proc = await asyncio.create_subprocess_exec(
runtime, "kill", container_name,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
)
await kill_proc.wait()
timeout_msg = f"Build timed out after {self.config.timeout_seconds} seconds"
return -1, "", timeout_msg
async def cleanup_build(self, build_id: str) -> None:
"""
Clean up any resources from a build.
Container --rm flag handles cleanup, but this ensures
any orphaned containers are removed.
"""
runtime = self.runtime
container_name = f"debate-build-{build_id}"
# Force remove container if it still exists
proc = await asyncio.create_subprocess_exec(
runtime, "rm", "-f", container_name,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
)
await proc.wait()
async def check_runtime(self) -> tuple[bool, str]:
"""
Check if container runtime is available and working.
Returns:
Tuple of (available, message)
"""
try:
runtime = self.runtime
except RuntimeError as e:
return False, str(e)
# Verify runtime works
proc = await asyncio.create_subprocess_exec(
runtime, "version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode == 0:
return True, f"{runtime} is available"
return False, f"{runtime} not working: {stderr.decode()}"