#!/usr/bin/env python3 """ Felt Infrastructure Capacity Planning Target server: Hetzner AX102 or similar - 16c/32t (AMD Ryzen 9 7950X3D or similar) - 64-128GB DDR5 RAM - 2× 1TB NVMe (RAID1 or split) - ~€100/mo Architecture: 1. Core Server(s) - handles NATS, PostgreSQL, API, admin dashboard 2. Virtual Leaf Server(s) - runs virtual Leaf instances for free tier 3. Pro venues have physical Leaf nodes — Core just receives NATS sync Key insight: Virtual Leafs are HEAVIER than Pro sync because they run the full tournament engine. Pro Leafs run on-premise — Core just stores their sync data. """ print("=" * 70) print("FELT INFRASTRUCTURE CAPACITY PLANNING") print("=" * 70) print(""" ┌─────────────────────────────────────────────────────────────────┐ │ ARCHITECTURE OVERVIEW │ │ │ │ FREE TIER (Virtual Leaf) PRO TIER (Physical Leaf) │ │ ┌──────────────┐ ┌──────────────┐ │ │ │ Go backend │ ← runs on │ Go backend │ ← runs on │ │ │ SQLite DB │ OUR server │ SQLite DB │ THEIR hw │ │ │ WebSocket │ │ NVMe storage │ │ │ │ NATS client │ │ NATS client │ │ │ └──────┬───────┘ └──────┬───────┘ │ │ │ full engine │ sync only │ │ ▼ ▼ │ │ ┌────────────────────────────────────────────────────┐ │ │ │ CORE SERVER │ │ │ │ NATS JetStream │ PostgreSQL │ API │ Admin UI │ │ │ └────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────┘ """) # ================================================================ # VIRTUAL LEAF RESOURCE ESTIMATES # ================================================================ print("=" * 70) print("VIRTUAL LEAF (Free Tier) — Resource Estimates") print("=" * 70) print(""" Each Virtual Leaf runs: - Go binary (tournament engine + HTTP server + WebSocket hub) - SQLite database (venue's tournaments, players, results) - NATS client (sync to Core when needed) - WebSocket connections (operator UI + player mobile + display proxy) Per Virtual Leaf process: """) # Memory estimates go_binary_base = 30 # MB - Go runtime + loaded binary sqlite_working_set = 10 # MB - SQLite in-memory cache (small venues) websocket_per_conn = 0.05 # MB per WebSocket connection avg_concurrent_ws = 20 # During a tournament: 1 operator + ~15 players + displays ws_memory = websocket_per_conn * avg_concurrent_ws nats_client = 5 # MB - NATS client overhead http_buffers = 10 # MB - HTTP server buffers, template cache signage_content = 5 # MB - Cached signage content bundles total_per_vleaf_active = go_binary_base + sqlite_working_set + ws_memory + nats_client + http_buffers + signage_content total_per_vleaf_idle = go_binary_base + 5 + nats_client # Idle: minimal working set print(f" Component Active Idle") print(f" ─────────────────────────────────────────────") print(f" Go runtime + binary {go_binary_base:>5} MB {go_binary_base:>5} MB") print(f" SQLite working set {sqlite_working_set:>5} MB {5:>5} MB") print(f" WebSocket ({avg_concurrent_ws} conns) {ws_memory:>5.1f} MB {0:>5} MB") print(f" NATS client {nats_client:>5} MB {nats_client:>5} MB") print(f" HTTP buffers/cache {http_buffers:>5} MB {2:>5} MB") print(f" Signage content cache {signage_content:>5} MB {0:>5} MB") print(f" ─────────────────────────────────────────────") print(f" TOTAL per Virtual Leaf {total_per_vleaf_active:>5.1f} MB {go_binary_base + 5 + nats_client:>5} MB") # CPU estimates print(f""" CPU usage: Idle (no tournament): ~0.01 cores (just NATS heartbeat) Active tournament: ~0.05-0.1 cores (timer ticks, WS pushes) Peak (level change): ~0.2 cores (burst: recalculate, push all) Signage editor (AI): ~0.5 cores (burst, rare) Disk usage per venue: SQLite database: 1-50 MB (scales with history) Signage content: 10-100 MB (images, templates) Average per venue: ~50 MB """) # ================================================================ # PRO VENUE CORE LOAD # ================================================================ print("=" * 70) print("PRO VENUE — Core Server Load (sync only)") print("=" * 70) print(""" Pro venues run their own Leaf hardware. Core only handles: - NATS JetStream: receive sync messages (tournament results, player updates, financial data) — async, bursty, small payloads - PostgreSQL: upsert synced data into venue's partition - API: serve admin dashboard, player profiles, public venue page - Reverse proxy config: Netbird routing for player/operator access Per Pro venue on Core: """) nats_per_pro = 2 # MB - JetStream consumer state + buffers pg_per_pro = 5 # MB - PostgreSQL working set per venue api_per_pro = 1 # MB - Cached API responses total_pro_core = nats_per_pro + pg_per_pro + api_per_pro print(f" Component Memory") print(f" ───────────────────────────────────") print(f" NATS consumer state {nats_per_pro:>5} MB") print(f" PostgreSQL working set {pg_per_pro:>5} MB") print(f" API cache {api_per_pro:>5} MB") print(f" ───────────────────────────────────") print(f" TOTAL per Pro venue {total_pro_core:>5} MB") print(f""" CPU usage per Pro venue: Idle (between syncs): ~0.001 cores (negligible) Active sync burst: ~0.02 cores (deserialize + upsert) API requests: ~0.01 cores (occasional dashboard/mobile) Disk usage per venue (PostgreSQL): Average: 10-200 MB (scales with history) With years of data: up to 500 MB """) # ================================================================ # SERVER CAPACITY CALCULATIONS # ================================================================ print("=" * 70) print("SERVER CAPACITY — Hetzner 16c/32t, 64GB RAM, 2×1TB NVMe") print("=" * 70) total_ram_mb = 64 * 1024 # 64 GB total_cores = 32 # threads total_disk_gb = 2000 # 2× 1TB (RAID1 = 1TB usable, or split = 2TB) server_cost = 100 # €/mo # OS + base services overhead os_overhead_ram = 2048 # MB nats_server_ram = 512 # MB - NATS JetStream server pg_server_ram = 4096 # MB - PostgreSQL shared buffers + overhead netbird_ram = 256 # MB - Netbird controller api_server_ram = 512 # MB - Core API + admin frontend monitoring_ram = 512 # MB - Prometheus, logging reserve_ram = 2048 # MB - headroom / burst base_overhead = os_overhead_ram + nats_server_ram + pg_server_ram + netbird_ram + api_server_ram + monitoring_ram + reserve_ram available_ram = total_ram_mb - base_overhead os_cores = 1 nats_cores = 1 pg_cores = 2 api_cores = 1 base_cores = os_cores + nats_cores + pg_cores + api_cores available_cores = total_cores - base_cores print(f"\n Base infrastructure overhead:") print(f" OS + system: {os_overhead_ram:>6} MB {os_cores} cores") print(f" NATS JetStream: {nats_server_ram:>6} MB {nats_cores} cores") print(f" PostgreSQL: {pg_server_ram:>6} MB {pg_cores} cores") print(f" Netbird controller: {netbird_ram:>6} MB") print(f" Core API + admin: {api_server_ram:>6} MB {api_cores} cores") print(f" Monitoring: {monitoring_ram:>6} MB") print(f" Reserve/headroom: {reserve_ram:>6} MB") print(f" ─────────────────────────────────────────") print(f" Total overhead: {base_overhead:>6} MB {base_cores} cores") print(f" Available for venues: {available_ram:>6} MB {available_cores} cores") print(f" Available disk: ~{total_disk_gb // 2} GB (RAID1) or ~{total_disk_gb} GB (split)") # ================================================================ # SCENARIO: DEDICATED VIRTUAL LEAF SERVER # ================================================================ print(f"\n{'─' * 70}") print(f"SCENARIO A: Dedicated Virtual Leaf Server (free tier only)") print(f"{'─' * 70}") # Key insight: most virtual Leafs are IDLE most of the time # A venue running 3 tournaments/week has active tournaments maybe # 12 hours/week = 7% of the time active_ratio = 0.10 # 10% of virtual Leafs active at any given time # Memory: need to keep all Leafs loaded (Go processes) # but SQLite working sets and WS connections only for active ones mem_per_vleaf = total_per_vleaf_idle # Base: all idle mem_per_active_delta = total_per_vleaf_active - total_per_vleaf_idle # Extra when active # CPU: only active Leafs consume meaningful CPU cpu_per_active = 0.1 # cores per active tournament # Solve for max Virtual Leafs (memory-bound) # N * idle_mem + N * active_ratio * active_delta <= available_ram # N * (idle_mem + active_ratio * active_delta) <= available_ram effective_mem_per_vleaf = mem_per_vleaf + active_ratio * mem_per_active_delta max_vleaf_by_ram = int(available_ram / effective_mem_per_vleaf) # Check CPU bound max_active = int(available_cores / cpu_per_active) max_vleaf_by_cpu = int(max_active / active_ratio) max_vleaf = min(max_vleaf_by_ram, max_vleaf_by_cpu) print(f"\n Assumptions:") print(f" Active ratio (tournaments running): {active_ratio:.0%}") print(f" Memory per Leaf (idle): {mem_per_vleaf:.0f} MB") print(f" Memory per Leaf (active delta): {mem_per_active_delta:.0f} MB") print(f" Effective memory per Leaf: {effective_mem_per_vleaf:.0f} MB") print(f" CPU per active tournament: {cpu_per_active} cores") print(f"") print(f" Capacity (RAM-limited): {max_vleaf_by_ram} Virtual Leafs") print(f" Capacity (CPU-limited): {max_vleaf_by_cpu} Virtual Leafs") print(f" ═══════════════════════════════════════════") print(f" PRACTICAL CAPACITY: ~{max_vleaf} Virtual Leafs per server") print(f" At {active_ratio:.0%} active: ~{int(max_vleaf * active_ratio)} concurrent tournaments") print(f" Server cost: €{server_cost}/mo") print(f" Cost per Virtual Leaf: €{server_cost/max_vleaf:.2f}/mo") # Disk check avg_disk_per_vleaf = 50 # MB total_disk_used = max_vleaf * avg_disk_per_vleaf / 1024 print(f" Disk usage ({max_vleaf} venues): ~{total_disk_used:.0f} GB (well within {total_disk_gb//2} GB)") # ================================================================ # SCENARIO: CORE SERVER (Pro venues + API + services) # ================================================================ print(f"\n{'─' * 70}") print(f"SCENARIO B: Core Server (Pro venue sync + API + all services)") print(f"{'─' * 70}") max_pro_by_ram = int(available_ram / total_pro_core) max_pro_by_cpu = int(available_cores / 0.03) # ~0.03 cores average per Pro venue max_pro = min(max_pro_by_ram, max_pro_by_cpu) print(f"\n Per Pro venue on Core: {total_pro_core} MB RAM, ~0.03 cores avg") print(f" Capacity (RAM-limited): {max_pro_by_ram} Pro venues") print(f" Capacity (CPU-limited): {max_pro_by_cpu} Pro venues") print(f" ═══════════════════════════════════════════") print(f" PRACTICAL CAPACITY: ~{min(max_pro, 5000)} Pro venues per server") print(f" (PostgreSQL becomes the bottleneck before RAM/CPU)") print(f" Server cost: €{server_cost}/mo") print(f" Cost per Pro venue: €{server_cost/min(max_pro,2000):.2f}/mo") # ================================================================ # COMBINED: REALISTIC DEPLOYMENT TOPOLOGY # ================================================================ print(f"\n{'=' * 70}") print(f"RECOMMENDED DEPLOYMENT TOPOLOGY") print(f"{'=' * 70}") print(f""" ┌─────────────────────────────────────────────────────────────┐ │ PHASE 1: Single Server (~€100/mo) │ │ │ │ One Hetzner box runs EVERYTHING: │ │ Core (NATS + PostgreSQL + API) + Virtual Leafs │ │ │ │ Capacity: ~300-400 Virtual Leafs + ~200 Pro venues │ │ This gets you through Year 1-2 easily. │ │ │ │ Actual cost per venue: │ │ If 200 free + 20 Pro: €100 / 220 = €0.45/venue/mo │ │ Way under the €4/mo we budgeted per free venue! │ └─────────────────────────────────────────────────────────────┘ ┌─────────────────────────────────────────────────────────────┐ │ PHASE 2: Split (2 servers, ~€200/mo) │ │ │ │ Server 1: Core (NATS + PG + API + Netbird controller) │ │ Server 2: Virtual Leaf farm │ │ │ │ Capacity: ~{max_vleaf} Virtual Leafs + ~2000 Pro venues │ │ Split when you hit ~400 free venues or need more RAM. │ └─────────────────────────────────────────────────────────────┘ ┌─────────────────────────────────────────────────────────────┐ │ PHASE 3: Regional (3+ servers, ~€300+/mo) │ │ │ │ EU: Core + VLeaf farm (Hetzner Falkenstein) │ │ US: Core replica + VLeaf farm (Hetzner Ashburn) │ │ APAC: Core replica + VLeaf farm (OVH Singapore) │ │ │ │ NATS super-cluster for cross-region sync │ │ Player profiles: globally replicated │ │ Venue data: stays in region (GDPR) │ └─────────────────────────────────────────────────────────────┘ """) # ================================================================ # THE PUNCHLINE # ================================================================ print(f"{'=' * 70}") print(f"THE PUNCHLINE") print(f"{'=' * 70}") year1_free = 200 year1_pro = 20 year1_total = year1_free + year1_pro year1_servers = 1 year1_infra = 100 print(f""" Year 1 reality: {year1_total} venues ({year1_free} free + {year1_pro} Pro) Infrastructure: {year1_servers} server @ €{year1_infra}/mo Actual cost per free venue: €{year1_infra / year1_total:.2f}/mo (not €4!) Actual cost per Pro venue: €{year1_infra / year1_total:.2f}/mo (not €6!) We budgeted €4/mo per free venue. Reality is €0.45/mo. That means the free tier is ~9× cheaper than we estimated. The original financial model is CONSERVATIVE. With real infrastructure costs: Year 1 net improvement: +€{(4 - year1_infra/year1_total) * year1_free * 12:.0f}/yr saved on free tier This doesn't change when you hit salary targets, but it means your runway is much longer and your margins are much better than the financial model suggests. One €100/mo server handles your first ~500 venues. You won't need a second server until you're already profitable. """)