infra-dashboard/app.py
Maddox ea9f8fca25 Initial commit - shareable infra dashboard
Externalize hardcoded host inventory and diagram topology into
JSON config files (hosts.json, diagram.json) loaded at runtime.
Add .env for configurable port, SSH key path, and refresh interval.
Include example configs and README for standalone deployment.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 14:57:56 -05:00

136 lines
5.6 KiB
Python

#!/usr/bin/env python3
import os, json, time, logging
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
from flask import Flask, render_template, jsonify
import paramiko
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = Flask(__name__)
CONFIG = {
"refresh_interval": int(os.getenv("REFRESH_INTERVAL", "60")),
"ssh_timeout": int(os.getenv("SSH_TIMEOUT", "10")),
"ssh_key_path": os.getenv("SSH_KEY_PATH", "/app/ssh/id_ed25519"),
}
CONFIG_DIR = os.getenv("CONFIG_DIR", "/app/config")
def load_json_config(filename, default):
for path in [os.path.join(CONFIG_DIR, filename), os.path.join(os.path.dirname(__file__), filename)]:
if os.path.exists(path):
with open(path) as f:
return json.load(f)
logger.warning(f"{filename} not found, using defaults")
return default
hosts_config = load_json_config("hosts.json", {"proxmox_nodes": {}, "docker_hosts": {}})
PROXMOX_NODES = hosts_config["proxmox_nodes"]
DOCKER_HOSTS = hosts_config["docker_hosts"]
DIAGRAM_DATA = load_json_config("diagram.json", {})
class DataCache:
def __init__(self):
self.data = {}
self.lock = Lock()
def set(self, key, value):
with self.lock: self.data[key] = value
def get(self, key, default=None):
with self.lock: return self.data.get(key, default)
cache = DataCache()
def get_ssh_client(host, user):
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
pkey = None
key_path = CONFIG["ssh_key_path"]
if os.path.exists(key_path):
try: pkey = paramiko.Ed25519Key.from_private_key_file(key_path)
except:
try: pkey = paramiko.RSAKey.from_private_key_file(key_path)
except: pass
client.connect(hostname=host, username=user, pkey=pkey, timeout=CONFIG["ssh_timeout"], allow_agent=True, look_for_keys=True)
return client
def get_docker_containers(hostname, host_config):
try:
client = get_ssh_client(host_config["ip"], host_config["user"])
cmd = "docker ps --format '{{.Names}}|{{.Status}}|{{.Image}}' 2>/dev/null"
stdin, stdout, stderr = client.exec_command(cmd, timeout=15)
output = stdout.read().decode('utf-8').strip()
containers = []
for line in output.split('\n'):
if line and '|' in line:
parts = line.split('|')
if len(parts) >= 3:
status = parts[1].lower()
health = 'healthy' if 'healthy' in status else 'unhealthy' if 'unhealthy' in status else 'running' if 'up' in status else 'unknown'
containers.append({"name": parts[0], "status": parts[1], "image": parts[2], "health": health})
client.close()
return {"hostname": hostname, "status": "online", "container_count": len(containers), "containers": containers,
"healthy": sum(1 for c in containers if c["health"] in ["healthy", "running"]),
"unhealthy": sum(1 for c in containers if c["health"] == "unhealthy")}
except Exception as e:
logger.warning(f"Failed to connect to {hostname}: {e}")
return {"hostname": hostname, "status": "offline", "container_count": 0, "containers": [], "error": str(e)}
def collect_all_hosts():
results = {}
with ThreadPoolExecutor(max_workers=10) as executor:
futures = {executor.submit(get_docker_containers, name, config): name for name, config in DOCKER_HOSTS.items()}
for future in as_completed(futures):
hostname = futures[future]
try: results[hostname] = future.result()
except Exception as e: results[hostname] = {"hostname": hostname, "status": "error", "error": str(e)}
return results
def refresh_data():
logger.info("Refreshing data...")
hosts = collect_all_hosts()
cache.set("hosts", hosts)
total = sum(h.get("container_count", 0) for h in hosts.values())
online = sum(1 for h in hosts.values() if h.get("status") == "online")
cache.set("summary", {"total_containers": total, "total_hosts": len(DOCKER_HOSTS), "online_hosts": online, "proxmox_nodes": len(PROXMOX_NODES)})
cache.set("last_update", datetime.now().isoformat())
logger.info(f"Refresh complete - {total} containers across {online}/{len(DOCKER_HOSTS)} hosts")
@app.route("/")
def index():
return render_template("index.html", refresh_interval=CONFIG["refresh_interval"], docker_hosts=DOCKER_HOSTS, proxmox_nodes=PROXMOX_NODES)
@app.route("/api/data")
def api_data():
return jsonify({"timestamp": cache.get("last_update"), "summary": cache.get("summary", {}), "hosts": cache.get("hosts", {}),
"config": {"docker_hosts": DOCKER_HOSTS, "proxmox_nodes": PROXMOX_NODES}})
@app.route("/api/refresh", methods=["POST"])
def api_refresh():
refresh_data()
return jsonify({"status": "ok", "timestamp": cache.get("last_update")})
@app.route("/api/diagram")
def api_diagram():
return jsonify(DIAGRAM_DATA)
@app.route("/health")
def health():
return jsonify({"status": "healthy", "last_update": cache.get("last_update")})
def background_refresh():
while True:
try: refresh_data()
except Exception as e: logger.error(f"Refresh error: {e}")
time.sleep(CONFIG["refresh_interval"])
import threading
refresh_data()
threading.Thread(target=background_refresh, daemon=True).start()
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000, debug=False)