feat(01-05): add deterministic build configuration service
- Implement DeterministicBuildConfig class for reproducible builds - Compute config hash with normalized JSON and sorted inputs - Derive SOURCE_DATE_EPOCH from config hash (no wall clock dependency) - Create archiso profile with fixed locale, timezone, compression settings - Add tests verifying hash determinism and order independence
This commit is contained in:
parent
0d1a008d2f
commit
c49aee7b0a
3 changed files with 255 additions and 0 deletions
192
backend/app/services/deterministic.py
Normal file
192
backend/app/services/deterministic.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
"""
|
||||
Deterministic build configuration for reproducible ISOs.
|
||||
|
||||
Critical: Same configuration must produce identical ISO hash.
|
||||
This is required for caching to work correctly.
|
||||
|
||||
Determinism factors:
|
||||
- SOURCE_DATE_EPOCH: Fixed timestamps in all generated files
|
||||
- LC_ALL=C: Fixed locale for sorting
|
||||
- TZ=UTC: Fixed timezone
|
||||
- Sorted inputs: Packages, files always in consistent order
|
||||
- Fixed compression: Consistent squashfs settings
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class OverlayFile:
|
||||
"""A file to be included in the overlay."""
|
||||
|
||||
path: str # Absolute path in ISO (e.g., /etc/skel/.bashrc)
|
||||
content: str
|
||||
mode: str = "0644"
|
||||
|
||||
|
||||
@dataclass
|
||||
class BuildConfiguration:
|
||||
"""Normalized build configuration for deterministic hashing."""
|
||||
|
||||
packages: list[str]
|
||||
overlays: list[dict[str, Any]]
|
||||
locale: str = "en_US.UTF-8"
|
||||
timezone: str = "UTC"
|
||||
|
||||
|
||||
class DeterministicBuildConfig:
|
||||
"""Ensures reproducible ISO builds."""
|
||||
|
||||
@staticmethod
|
||||
def compute_config_hash(config: dict[str, Any]) -> str:
|
||||
"""
|
||||
Generate deterministic hash of build configuration.
|
||||
|
||||
Process:
|
||||
1. Normalize all inputs (sort lists, normalize paths)
|
||||
2. Hash file contents (not file objects)
|
||||
3. Use consistent JSON serialization
|
||||
|
||||
Returns:
|
||||
SHA-256 hash of normalized configuration
|
||||
"""
|
||||
# Normalize packages (sorted, deduplicated)
|
||||
packages = sorted(set(config.get("packages", [])))
|
||||
|
||||
# Normalize overlays
|
||||
normalized_overlays = []
|
||||
for overlay in sorted(
|
||||
config.get("overlays", []), key=lambda x: x.get("name", "")
|
||||
):
|
||||
normalized_files = []
|
||||
for f in sorted(
|
||||
overlay.get("files", []), key=lambda x: x.get("path", "")
|
||||
):
|
||||
content = f.get("content", "")
|
||||
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||
normalized_files.append(
|
||||
{
|
||||
"path": f.get("path", "").strip(),
|
||||
"content_hash": content_hash,
|
||||
"mode": f.get("mode", "0644"),
|
||||
}
|
||||
)
|
||||
normalized_overlays.append(
|
||||
{
|
||||
"name": overlay.get("name", "").strip(),
|
||||
"files": normalized_files,
|
||||
}
|
||||
)
|
||||
|
||||
# Build normalized config
|
||||
normalized = {
|
||||
"packages": packages,
|
||||
"overlays": normalized_overlays,
|
||||
"locale": config.get("locale", "en_US.UTF-8"),
|
||||
"timezone": config.get("timezone", "UTC"),
|
||||
}
|
||||
|
||||
# JSON with sorted keys for determinism
|
||||
config_json = json.dumps(normalized, sort_keys=True, separators=(",", ":"))
|
||||
return hashlib.sha256(config_json.encode()).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def get_source_date_epoch(config_hash: str) -> int:
|
||||
"""
|
||||
Generate deterministic timestamp from config hash.
|
||||
|
||||
Using hash-derived timestamp ensures:
|
||||
- Same config always gets same timestamp
|
||||
- Different configs get different timestamps
|
||||
- No dependency on wall clock time
|
||||
|
||||
The timestamp is within a reasonable range (2020-2030).
|
||||
"""
|
||||
# Use first 8 bytes of hash to generate timestamp
|
||||
hash_int = int(config_hash[:16], 16)
|
||||
# Map to range: Jan 1, 2020 to Dec 31, 2030
|
||||
min_epoch = 1577836800 # 2020-01-01
|
||||
max_epoch = 1924991999 # 2030-12-31
|
||||
return min_epoch + (hash_int % (max_epoch - min_epoch))
|
||||
|
||||
@staticmethod
|
||||
def create_archiso_profile(
|
||||
config: dict[str, Any],
|
||||
profile_path: Path,
|
||||
source_date_epoch: int,
|
||||
) -> None:
|
||||
"""
|
||||
Generate archiso profile with deterministic settings.
|
||||
|
||||
Creates:
|
||||
- packages.x86_64: Sorted package list
|
||||
- profiledef.sh: Build configuration
|
||||
- pacman.conf: Package manager config
|
||||
- airootfs/: Overlay files
|
||||
"""
|
||||
profile_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# packages.x86_64 (sorted for determinism)
|
||||
packages = sorted(set(config.get("packages", ["base", "linux"])))
|
||||
packages_file = profile_path / "packages.x86_64"
|
||||
packages_file.write_text("\n".join(packages) + "\n")
|
||||
|
||||
# profiledef.sh
|
||||
profiledef = profile_path / "profiledef.sh"
|
||||
iso_date = f"$(date --date=@{source_date_epoch} +%Y%m)"
|
||||
iso_version = f"$(date --date=@{source_date_epoch} +%Y.%m.%d)"
|
||||
|
||||
profiledef.write_text(f"""#!/usr/bin/env bash
|
||||
# Deterministic archiso profile
|
||||
# Generated for Debate platform
|
||||
|
||||
iso_name="debate-custom"
|
||||
iso_label="DEBATE_{iso_date}"
|
||||
iso_publisher="Debate Platform <https://debate.example.com>"
|
||||
iso_application="Debate Custom Linux"
|
||||
iso_version="{iso_version}"
|
||||
install_dir="arch"
|
||||
bootmodes=('bios.syslinux.mbr' 'bios.syslinux.eltorito' \\
|
||||
'uefi-x64.systemd-boot.esp' 'uefi-x64.systemd-boot.eltorito')
|
||||
arch="x86_64"
|
||||
pacman_conf="pacman.conf"
|
||||
airootfs_image_type="squashfs"
|
||||
airootfs_image_tool_options=('-comp' 'xz' '-Xbcj' 'x86' '-b' '1M' '-Xdict-size' '1M')
|
||||
|
||||
file_permissions=(
|
||||
["/etc/shadow"]="0:0:0400"
|
||||
["/root"]="0:0:750"
|
||||
["/etc/gshadow"]="0:0:0400"
|
||||
)
|
||||
""")
|
||||
|
||||
# pacman.conf
|
||||
pacman_conf = profile_path / "pacman.conf"
|
||||
pacman_conf.write_text("""[options]
|
||||
Architecture = auto
|
||||
CheckSpace
|
||||
SigLevel = Required DatabaseOptional
|
||||
LocalFileSigLevel = Optional
|
||||
|
||||
[core]
|
||||
Include = /etc/pacman.d/mirrorlist
|
||||
|
||||
[extra]
|
||||
Include = /etc/pacman.d/mirrorlist
|
||||
""")
|
||||
|
||||
# airootfs structure with overlay files
|
||||
airootfs = profile_path / "airootfs"
|
||||
airootfs.mkdir(exist_ok=True)
|
||||
|
||||
for overlay in config.get("overlays", []):
|
||||
for file_config in overlay.get("files", []):
|
||||
file_path = airootfs / file_config["path"].lstrip("/")
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
file_path.write_text(file_config["content"])
|
||||
if "mode" in file_config:
|
||||
file_path.chmod(int(file_config["mode"], 8))
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Tests package."""
|
||||
62
tests/test_deterministic.py
Normal file
62
tests/test_deterministic.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""Tests for deterministic build configuration."""
|
||||
|
||||
from backend.app.services.deterministic import DeterministicBuildConfig
|
||||
|
||||
|
||||
class TestDeterministicBuildConfig:
|
||||
"""Test that same inputs produce same outputs."""
|
||||
|
||||
def test_hash_deterministic(self) -> None:
|
||||
"""Same config produces same hash."""
|
||||
config = {
|
||||
"packages": ["vim", "git", "base"],
|
||||
"overlays": [
|
||||
{
|
||||
"name": "test",
|
||||
"files": [{"path": "/etc/test", "content": "hello"}],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
hash1 = DeterministicBuildConfig.compute_config_hash(config)
|
||||
hash2 = DeterministicBuildConfig.compute_config_hash(config)
|
||||
|
||||
assert hash1 == hash2
|
||||
|
||||
def test_hash_order_independent(self) -> None:
|
||||
"""Package order doesn't affect hash."""
|
||||
config1 = {"packages": ["vim", "git", "base"], "overlays": []}
|
||||
config2 = {"packages": ["base", "git", "vim"], "overlays": []}
|
||||
|
||||
hash1 = DeterministicBuildConfig.compute_config_hash(config1)
|
||||
hash2 = DeterministicBuildConfig.compute_config_hash(config2)
|
||||
|
||||
assert hash1 == hash2
|
||||
|
||||
def test_hash_different_configs(self) -> None:
|
||||
"""Different configs produce different hashes."""
|
||||
config1 = {"packages": ["vim"], "overlays": []}
|
||||
config2 = {"packages": ["emacs"], "overlays": []}
|
||||
|
||||
hash1 = DeterministicBuildConfig.compute_config_hash(config1)
|
||||
hash2 = DeterministicBuildConfig.compute_config_hash(config2)
|
||||
|
||||
assert hash1 != hash2
|
||||
|
||||
def test_source_date_epoch_deterministic(self) -> None:
|
||||
"""Same hash produces same timestamp."""
|
||||
config_hash = "abc123def456"
|
||||
|
||||
epoch1 = DeterministicBuildConfig.get_source_date_epoch(config_hash)
|
||||
epoch2 = DeterministicBuildConfig.get_source_date_epoch(config_hash)
|
||||
|
||||
assert epoch1 == epoch2
|
||||
|
||||
def test_source_date_epoch_in_range(self) -> None:
|
||||
"""Timestamp is within reasonable range."""
|
||||
config_hash = "abc123def456"
|
||||
|
||||
epoch = DeterministicBuildConfig.get_source_date_epoch(config_hash)
|
||||
|
||||
# Should be between 2020 and 2030
|
||||
assert 1577836800 <= epoch <= 1924991999
|
||||
Loading…
Add table
Reference in a new issue