feat(01-05): add deterministic build configuration service

- Implement DeterministicBuildConfig class for reproducible builds
- Compute config hash with normalized JSON and sorted inputs
- Derive SOURCE_DATE_EPOCH from config hash (no wall clock dependency)
- Create archiso profile with fixed locale, timezone, compression settings
- Add tests verifying hash determinism and order independence
This commit is contained in:
Mikkel Georgsen 2026-01-25 20:20:11 +00:00
parent 0d1a008d2f
commit c49aee7b0a
3 changed files with 255 additions and 0 deletions

View file

@ -0,0 +1,192 @@
"""
Deterministic build configuration for reproducible ISOs.
Critical: Same configuration must produce identical ISO hash.
This is required for caching to work correctly.
Determinism factors:
- SOURCE_DATE_EPOCH: Fixed timestamps in all generated files
- LC_ALL=C: Fixed locale for sorting
- TZ=UTC: Fixed timezone
- Sorted inputs: Packages, files always in consistent order
- Fixed compression: Consistent squashfs settings
"""
import hashlib
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any
@dataclass
class OverlayFile:
"""A file to be included in the overlay."""
path: str # Absolute path in ISO (e.g., /etc/skel/.bashrc)
content: str
mode: str = "0644"
@dataclass
class BuildConfiguration:
"""Normalized build configuration for deterministic hashing."""
packages: list[str]
overlays: list[dict[str, Any]]
locale: str = "en_US.UTF-8"
timezone: str = "UTC"
class DeterministicBuildConfig:
"""Ensures reproducible ISO builds."""
@staticmethod
def compute_config_hash(config: dict[str, Any]) -> str:
"""
Generate deterministic hash of build configuration.
Process:
1. Normalize all inputs (sort lists, normalize paths)
2. Hash file contents (not file objects)
3. Use consistent JSON serialization
Returns:
SHA-256 hash of normalized configuration
"""
# Normalize packages (sorted, deduplicated)
packages = sorted(set(config.get("packages", [])))
# Normalize overlays
normalized_overlays = []
for overlay in sorted(
config.get("overlays", []), key=lambda x: x.get("name", "")
):
normalized_files = []
for f in sorted(
overlay.get("files", []), key=lambda x: x.get("path", "")
):
content = f.get("content", "")
content_hash = hashlib.sha256(content.encode()).hexdigest()
normalized_files.append(
{
"path": f.get("path", "").strip(),
"content_hash": content_hash,
"mode": f.get("mode", "0644"),
}
)
normalized_overlays.append(
{
"name": overlay.get("name", "").strip(),
"files": normalized_files,
}
)
# Build normalized config
normalized = {
"packages": packages,
"overlays": normalized_overlays,
"locale": config.get("locale", "en_US.UTF-8"),
"timezone": config.get("timezone", "UTC"),
}
# JSON with sorted keys for determinism
config_json = json.dumps(normalized, sort_keys=True, separators=(",", ":"))
return hashlib.sha256(config_json.encode()).hexdigest()
@staticmethod
def get_source_date_epoch(config_hash: str) -> int:
"""
Generate deterministic timestamp from config hash.
Using hash-derived timestamp ensures:
- Same config always gets same timestamp
- Different configs get different timestamps
- No dependency on wall clock time
The timestamp is within a reasonable range (2020-2030).
"""
# Use first 8 bytes of hash to generate timestamp
hash_int = int(config_hash[:16], 16)
# Map to range: Jan 1, 2020 to Dec 31, 2030
min_epoch = 1577836800 # 2020-01-01
max_epoch = 1924991999 # 2030-12-31
return min_epoch + (hash_int % (max_epoch - min_epoch))
@staticmethod
def create_archiso_profile(
config: dict[str, Any],
profile_path: Path,
source_date_epoch: int,
) -> None:
"""
Generate archiso profile with deterministic settings.
Creates:
- packages.x86_64: Sorted package list
- profiledef.sh: Build configuration
- pacman.conf: Package manager config
- airootfs/: Overlay files
"""
profile_path.mkdir(parents=True, exist_ok=True)
# packages.x86_64 (sorted for determinism)
packages = sorted(set(config.get("packages", ["base", "linux"])))
packages_file = profile_path / "packages.x86_64"
packages_file.write_text("\n".join(packages) + "\n")
# profiledef.sh
profiledef = profile_path / "profiledef.sh"
iso_date = f"$(date --date=@{source_date_epoch} +%Y%m)"
iso_version = f"$(date --date=@{source_date_epoch} +%Y.%m.%d)"
profiledef.write_text(f"""#!/usr/bin/env bash
# Deterministic archiso profile
# Generated for Debate platform
iso_name="debate-custom"
iso_label="DEBATE_{iso_date}"
iso_publisher="Debate Platform <https://debate.example.com>"
iso_application="Debate Custom Linux"
iso_version="{iso_version}"
install_dir="arch"
bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' \\
'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito')
arch="x86_64"
pacman_conf="pacman.conf"
airootfs_image_type="squashfs"
airootfs_image_tool_options=('-comp' 'xz' '-Xbcj' 'x86' '-b' '1M' '-Xdict-size' '1M')
file_permissions=(
["/etc/shadow"]="0:0:0400"
["/root"]="0:0:750"
["/etc/gshadow"]="0:0:0400"
)
""")
# pacman.conf
pacman_conf = profile_path / "pacman.conf"
pacman_conf.write_text("""[options]
Architecture = auto
CheckSpace
SigLevel = Required DatabaseOptional
LocalFileSigLevel = Optional
[core]
Include = /etc/pacman.d/mirrorlist
[extra]
Include = /etc/pacman.d/mirrorlist
""")
# airootfs structure with overlay files
airootfs = profile_path / "airootfs"
airootfs.mkdir(exist_ok=True)
for overlay in config.get("overlays", []):
for file_config in overlay.get("files", []):
file_path = airootfs / file_config["path"].lstrip("/")
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(file_config["content"])
if "mode" in file_config:
file_path.chmod(int(file_config["mode"], 8))

1
tests/__init__.py Normal file
View file

@ -0,0 +1 @@
"""Tests package."""

View file

@ -0,0 +1,62 @@
"""Tests for deterministic build configuration."""
from backend.app.services.deterministic import DeterministicBuildConfig
class TestDeterministicBuildConfig:
"""Test that same inputs produce same outputs."""
def test_hash_deterministic(self) -> None:
"""Same config produces same hash."""
config = {
"packages": ["vim", "git", "base"],
"overlays": [
{
"name": "test",
"files": [{"path": "/etc/test", "content": "hello"}],
}
],
}
hash1 = DeterministicBuildConfig.compute_config_hash(config)
hash2 = DeterministicBuildConfig.compute_config_hash(config)
assert hash1 == hash2
def test_hash_order_independent(self) -> None:
"""Package order doesn't affect hash."""
config1 = {"packages": ["vim", "git", "base"], "overlays": []}
config2 = {"packages": ["base", "git", "vim"], "overlays": []}
hash1 = DeterministicBuildConfig.compute_config_hash(config1)
hash2 = DeterministicBuildConfig.compute_config_hash(config2)
assert hash1 == hash2
def test_hash_different_configs(self) -> None:
"""Different configs produce different hashes."""
config1 = {"packages": ["vim"], "overlays": []}
config2 = {"packages": ["emacs"], "overlays": []}
hash1 = DeterministicBuildConfig.compute_config_hash(config1)
hash2 = DeterministicBuildConfig.compute_config_hash(config2)
assert hash1 != hash2
def test_source_date_epoch_deterministic(self) -> None:
"""Same hash produces same timestamp."""
config_hash = "abc123def456"
epoch1 = DeterministicBuildConfig.get_source_date_epoch(config_hash)
epoch2 = DeterministicBuildConfig.get_source_date_epoch(config_hash)
assert epoch1 == epoch2
def test_source_date_epoch_in_range(self) -> None:
"""Timestamp is within reasonable range."""
config_hash = "abc123def456"
epoch = DeterministicBuildConfig.get_source_date_epoch(config_hash)
# Should be between 2020 and 2030
assert 1577836800 <= epoch <= 1924991999