""" Deterministic build configuration for reproducible ISOs. Critical: Same configuration must produce identical ISO hash. This is required for caching to work correctly. Determinism factors: - SOURCE_DATE_EPOCH: Fixed timestamps in all generated files - LC_ALL=C: Fixed locale for sorting - TZ=UTC: Fixed timezone - Sorted inputs: Packages, files always in consistent order - Fixed compression: Consistent squashfs settings """ import hashlib import json from dataclasses import dataclass from pathlib import Path from typing import Any @dataclass class OverlayFile: """A file to be included in the overlay.""" path: str # Absolute path in ISO (e.g., /etc/skel/.bashrc) content: str mode: str = "0644" @dataclass class BuildConfiguration: """Normalized build configuration for deterministic hashing.""" packages: list[str] overlays: list[dict[str, Any]] locale: str = "en_US.UTF-8" timezone: str = "UTC" class DeterministicBuildConfig: """Ensures reproducible ISO builds.""" @staticmethod def compute_config_hash(config: dict[str, Any]) -> str: """ Generate deterministic hash of build configuration. Process: 1. Normalize all inputs (sort lists, normalize paths) 2. Hash file contents (not file objects) 3. Use consistent JSON serialization Returns: SHA-256 hash of normalized configuration """ # Normalize packages (sorted, deduplicated) packages = sorted(set(config.get("packages", []))) # Normalize overlays normalized_overlays = [] for overlay in sorted( config.get("overlays", []), key=lambda x: x.get("name", "") ): normalized_files = [] for f in sorted( overlay.get("files", []), key=lambda x: x.get("path", "") ): content = f.get("content", "") content_hash = hashlib.sha256(content.encode()).hexdigest() normalized_files.append( { "path": f.get("path", "").strip(), "content_hash": content_hash, "mode": f.get("mode", "0644"), } ) normalized_overlays.append( { "name": overlay.get("name", "").strip(), "files": normalized_files, } ) # Build normalized config normalized = { "packages": packages, "overlays": normalized_overlays, "locale": config.get("locale", "en_US.UTF-8"), "timezone": config.get("timezone", "UTC"), } # JSON with sorted keys for determinism config_json = json.dumps(normalized, sort_keys=True, separators=(",", ":")) return hashlib.sha256(config_json.encode()).hexdigest() @staticmethod def get_source_date_epoch(config_hash: str) -> int: """ Generate deterministic timestamp from config hash. Using hash-derived timestamp ensures: - Same config always gets same timestamp - Different configs get different timestamps - No dependency on wall clock time The timestamp is within a reasonable range (2020-2030). """ # Use first 8 bytes of hash to generate timestamp hash_int = int(config_hash[:16], 16) # Map to range: Jan 1, 2020 to Dec 31, 2030 min_epoch = 1577836800 # 2020-01-01 max_epoch = 1924991999 # 2030-12-31 return min_epoch + (hash_int % (max_epoch - min_epoch)) @staticmethod def create_archiso_profile( config: dict[str, Any], profile_path: Path, source_date_epoch: int, ) -> None: """ Generate archiso profile with deterministic settings. Creates: - packages.x86_64: Sorted package list - profiledef.sh: Build configuration - pacman.conf: Package manager config - airootfs/: Overlay files """ profile_path.mkdir(parents=True, exist_ok=True) # packages.x86_64 (sorted for determinism) packages = sorted(set(config.get("packages", ["base", "linux"]))) packages_file = profile_path / "packages.x86_64" packages_file.write_text("\n".join(packages) + "\n") # profiledef.sh profiledef = profile_path / "profiledef.sh" iso_date = f"$(date --date=@{source_date_epoch} +%Y%m)" iso_version = f"$(date --date=@{source_date_epoch} +%Y.%m.%d)" profiledef.write_text(f"""#!/usr/bin/env bash # Deterministic archiso profile # Generated for Debate platform iso_name="debate-custom" iso_label="DEBATE_{iso_date}" iso_publisher="Debate Platform " iso_application="Debate Custom Linux" iso_version="{iso_version}" install_dir="arch" bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' \\ 'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito') arch="x86_64" pacman_conf="pacman.conf" airootfs_image_type="squashfs" airootfs_image_tool_options=('-comp' 'xz' '-Xbcj' 'x86' '-b' '1M' '-Xdict-size' '1M') file_permissions=( ["/etc/shadow"]="0:0:0400" ["/root"]="0:0:750" ["/etc/gshadow"]="0:0:0400" ) """) # pacman.conf pacman_conf = profile_path / "pacman.conf" pacman_conf.write_text("""[options] Architecture = auto CheckSpace SigLevel = Required DatabaseOptional LocalFileSigLevel = Optional [core] Include = /etc/pacman.d/mirrorlist [extra] Include = /etc/pacman.d/mirrorlist """) # airootfs structure with overlay files airootfs = profile_path / "airootfs" airootfs.mkdir(exist_ok=True) for overlay in config.get("overlays", []): for file_config in overlay.get("files", []): file_path = airootfs / file_config["path"].lstrip("/") file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(file_config["content"]) if "mode" in file_config: file_path.chmod(int(file_config["mode"], 8))