From c49aee7b0a5b50bee1829f57342753ce3166990f Mon Sep 17 00:00:00 2001 From: Mikkel Georgsen Date: Sun, 25 Jan 2026 20:20:11 +0000 Subject: [PATCH] feat(01-05): add deterministic build configuration service - Implement DeterministicBuildConfig class for reproducible builds - Compute config hash with normalized JSON and sorted inputs - Derive SOURCE_DATE_EPOCH from config hash (no wall clock dependency) - Create archiso profile with fixed locale, timezone, compression settings - Add tests verifying hash determinism and order independence --- backend/app/services/deterministic.py | 192 ++++++++++++++++++++++++++ tests/__init__.py | 1 + tests/test_deterministic.py | 62 +++++++++ 3 files changed, 255 insertions(+) create mode 100644 backend/app/services/deterministic.py create mode 100644 tests/__init__.py create mode 100644 tests/test_deterministic.py diff --git a/backend/app/services/deterministic.py b/backend/app/services/deterministic.py new file mode 100644 index 0000000..8ba858e --- /dev/null +++ b/backend/app/services/deterministic.py @@ -0,0 +1,192 @@ +""" +Deterministic build configuration for reproducible ISOs. + +Critical: Same configuration must produce identical ISO hash. +This is required for caching to work correctly. + +Determinism factors: +- SOURCE_DATE_EPOCH: Fixed timestamps in all generated files +- LC_ALL=C: Fixed locale for sorting +- TZ=UTC: Fixed timezone +- Sorted inputs: Packages, files always in consistent order +- Fixed compression: Consistent squashfs settings +""" + +import hashlib +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +@dataclass +class OverlayFile: + """A file to be included in the overlay.""" + + path: str # Absolute path in ISO (e.g., /etc/skel/.bashrc) + content: str + mode: str = "0644" + + +@dataclass +class BuildConfiguration: + """Normalized build configuration for deterministic hashing.""" + + packages: list[str] + overlays: list[dict[str, Any]] + locale: str = "en_US.UTF-8" + timezone: str = "UTC" + + +class DeterministicBuildConfig: + """Ensures reproducible ISO builds.""" + + @staticmethod + def compute_config_hash(config: dict[str, Any]) -> str: + """ + Generate deterministic hash of build configuration. + + Process: + 1. Normalize all inputs (sort lists, normalize paths) + 2. Hash file contents (not file objects) + 3. Use consistent JSON serialization + + Returns: + SHA-256 hash of normalized configuration + """ + # Normalize packages (sorted, deduplicated) + packages = sorted(set(config.get("packages", []))) + + # Normalize overlays + normalized_overlays = [] + for overlay in sorted( + config.get("overlays", []), key=lambda x: x.get("name", "") + ): + normalized_files = [] + for f in sorted( + overlay.get("files", []), key=lambda x: x.get("path", "") + ): + content = f.get("content", "") + content_hash = hashlib.sha256(content.encode()).hexdigest() + normalized_files.append( + { + "path": f.get("path", "").strip(), + "content_hash": content_hash, + "mode": f.get("mode", "0644"), + } + ) + normalized_overlays.append( + { + "name": overlay.get("name", "").strip(), + "files": normalized_files, + } + ) + + # Build normalized config + normalized = { + "packages": packages, + "overlays": normalized_overlays, + "locale": config.get("locale", "en_US.UTF-8"), + "timezone": config.get("timezone", "UTC"), + } + + # JSON with sorted keys for determinism + config_json = json.dumps(normalized, sort_keys=True, separators=(",", ":")) + return hashlib.sha256(config_json.encode()).hexdigest() + + @staticmethod + def get_source_date_epoch(config_hash: str) -> int: + """ + Generate deterministic timestamp from config hash. + + Using hash-derived timestamp ensures: + - Same config always gets same timestamp + - Different configs get different timestamps + - No dependency on wall clock time + + The timestamp is within a reasonable range (2020-2030). + """ + # Use first 8 bytes of hash to generate timestamp + hash_int = int(config_hash[:16], 16) + # Map to range: Jan 1, 2020 to Dec 31, 2030 + min_epoch = 1577836800 # 2020-01-01 + max_epoch = 1924991999 # 2030-12-31 + return min_epoch + (hash_int % (max_epoch - min_epoch)) + + @staticmethod + def create_archiso_profile( + config: dict[str, Any], + profile_path: Path, + source_date_epoch: int, + ) -> None: + """ + Generate archiso profile with deterministic settings. + + Creates: + - packages.x86_64: Sorted package list + - profiledef.sh: Build configuration + - pacman.conf: Package manager config + - airootfs/: Overlay files + """ + profile_path.mkdir(parents=True, exist_ok=True) + + # packages.x86_64 (sorted for determinism) + packages = sorted(set(config.get("packages", ["base", "linux"]))) + packages_file = profile_path / "packages.x86_64" + packages_file.write_text("\n".join(packages) + "\n") + + # profiledef.sh + profiledef = profile_path / "profiledef.sh" + iso_date = f"$(date --date=@{source_date_epoch} +%Y%m)" + iso_version = f"$(date --date=@{source_date_epoch} +%Y.%m.%d)" + + profiledef.write_text(f"""#!/usr/bin/env bash +# Deterministic archiso profile +# Generated for Debate platform + +iso_name="debate-custom" +iso_label="DEBATE_{iso_date}" +iso_publisher="Debate Platform " +iso_application="Debate Custom Linux" +iso_version="{iso_version}" +install_dir="arch" +bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' \\ + 'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito') +arch="x86_64" +pacman_conf="pacman.conf" +airootfs_image_type="squashfs" +airootfs_image_tool_options=('-comp' 'xz' '-Xbcj' 'x86' '-b' '1M' '-Xdict-size' '1M') + +file_permissions=( + ["/etc/shadow"]="0:0:0400" + ["/root"]="0:0:750" + ["/etc/gshadow"]="0:0:0400" +) +""") + + # pacman.conf + pacman_conf = profile_path / "pacman.conf" + pacman_conf.write_text("""[options] +Architecture = auto +CheckSpace +SigLevel = Required DatabaseOptional +LocalFileSigLevel = Optional + +[core] +Include = /etc/pacman.d/mirrorlist + +[extra] +Include = /etc/pacman.d/mirrorlist +""") + + # airootfs structure with overlay files + airootfs = profile_path / "airootfs" + airootfs.mkdir(exist_ok=True) + + for overlay in config.get("overlays", []): + for file_config in overlay.get("files", []): + file_path = airootfs / file_config["path"].lstrip("/") + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(file_config["content"]) + if "mode" in file_config: + file_path.chmod(int(file_config["mode"], 8)) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..46816dd --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests package.""" diff --git a/tests/test_deterministic.py b/tests/test_deterministic.py new file mode 100644 index 0000000..61415eb --- /dev/null +++ b/tests/test_deterministic.py @@ -0,0 +1,62 @@ +"""Tests for deterministic build configuration.""" + +from backend.app.services.deterministic import DeterministicBuildConfig + + +class TestDeterministicBuildConfig: + """Test that same inputs produce same outputs.""" + + def test_hash_deterministic(self) -> None: + """Same config produces same hash.""" + config = { + "packages": ["vim", "git", "base"], + "overlays": [ + { + "name": "test", + "files": [{"path": "/etc/test", "content": "hello"}], + } + ], + } + + hash1 = DeterministicBuildConfig.compute_config_hash(config) + hash2 = DeterministicBuildConfig.compute_config_hash(config) + + assert hash1 == hash2 + + def test_hash_order_independent(self) -> None: + """Package order doesn't affect hash.""" + config1 = {"packages": ["vim", "git", "base"], "overlays": []} + config2 = {"packages": ["base", "git", "vim"], "overlays": []} + + hash1 = DeterministicBuildConfig.compute_config_hash(config1) + hash2 = DeterministicBuildConfig.compute_config_hash(config2) + + assert hash1 == hash2 + + def test_hash_different_configs(self) -> None: + """Different configs produce different hashes.""" + config1 = {"packages": ["vim"], "overlays": []} + config2 = {"packages": ["emacs"], "overlays": []} + + hash1 = DeterministicBuildConfig.compute_config_hash(config1) + hash2 = DeterministicBuildConfig.compute_config_hash(config2) + + assert hash1 != hash2 + + def test_source_date_epoch_deterministic(self) -> None: + """Same hash produces same timestamp.""" + config_hash = "abc123def456" + + epoch1 = DeterministicBuildConfig.get_source_date_epoch(config_hash) + epoch2 = DeterministicBuildConfig.get_source_date_epoch(config_hash) + + assert epoch1 == epoch2 + + def test_source_date_epoch_in_range(self) -> None: + """Timestamp is within reasonable range.""" + config_hash = "abc123def456" + + epoch = DeterministicBuildConfig.get_source_date_epoch(config_hash) + + # Should be between 2020 and 2030 + assert 1577836800 <= epoch <= 1924991999