From ea9f8fca25137d773195e90a8f8a370a471dd924 Mon Sep 17 00:00:00 2001 From: Maddox Date: Tue, 3 Feb 2026 14:57:56 -0500 Subject: [PATCH] Initial commit - shareable infra dashboard Externalize hardcoded host inventory and diagram topology into JSON config files (hosts.json, diagram.json) loaded at runtime. Add .env for configurable port, SSH key path, and refresh interval. Include example configs and README for standalone deployment. Co-Authored-By: Claude Opus 4.5 --- .env.example | 5 + .gitignore | 3 + Dockerfile | 14 ++ README.md | 113 ++++++++++ app.py | 136 ++++++++++++ diagram.json.example | 26 +++ docker-compose.yml | 29 +++ entrypoint.sh | 9 + hosts.json.example | 9 + requirements.txt | 5 + templates/index.html | 487 +++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 836 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 app.py create mode 100644 diagram.json.example create mode 100644 docker-compose.yml create mode 100644 entrypoint.sh create mode 100644 hosts.json.example create mode 100644 requirements.txt create mode 100644 templates/index.html diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..5c6d45d --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +DASHBOARD_PORT=5050 +REFRESH_INTERVAL=60 +SSH_TIMEOUT=10 +SSH_KEY_PATH=/root/.ssh/id_ed25519 +TZ=America/New_York diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cff5543 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +__pycache__/ +*.pyc diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d2b41f7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.12-slim +WORKDIR /app +RUN apt-get update && apt-get install -y --no-install-recommends openssh-client gosu && rm -rf /var/lib/apt/lists/* +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY app.py . +COPY templates/ templates/ +COPY entrypoint.sh /entrypoint.sh +RUN mkdir -p /app/ssh /app/config && chmod 700 /app/ssh +RUN useradd -m -s /bin/bash dashboard && chown -R dashboard:dashboard /app +RUN chmod +x /entrypoint.sh +ENV PYTHONUNBUFFERED=1 REFRESH_INTERVAL=60 SSH_TIMEOUT=10 SSH_KEY_PATH=/app/ssh/id_ed25519 +EXPOSE 5000 +ENTRYPOINT ["/entrypoint.sh"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..6b16825 --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# Infrastructure Dashboard + +A lightweight, real-time infrastructure monitoring dashboard for Docker-based homelabs. Connects to hosts via SSH to collect container status and displays everything in a network topology diagram. + +## Features + +- **Network Diagram** — SVG topology view of your Proxmox nodes, VMs, LXCs, and remote hosts +- **Live Container Status** — SSH-based polling shows container health across all hosts +- **Host Cards** — Grid view with per-host container breakdown +- **Inventory Table** — Flat table of all managed hosts +- **Auto-refresh** — Configurable background polling interval + +## Quick Start + +1. **Clone the repo** + ```bash + git clone ssh://git@your-server:2222/your-user/infra-dashboard.git + cd infra-dashboard + ``` + +2. **Configure environment** + ```bash + cp .env.example .env + # Edit .env with your port, SSH key path, and timezone + ``` + +3. **Configure hosts** + ```bash + cp hosts.json.example hosts.json + # Edit hosts.json with your Proxmox nodes and Docker hosts + ``` + +4. **Configure diagram** + ```bash + cp diagram.json.example diagram.json + # Edit diagram.json with your network topology + ``` + +5. **Deploy** + ```bash + docker compose up -d --build + ``` + +6. **Access** at `http://your-host:5050` + +## Configuration Files + +| File | Purpose | +|------|---------| +| `.env` | Runtime environment variables (port, intervals, SSH key path) | +| `hosts.json` | Docker hosts and Proxmox nodes inventory | +| `diagram.json` | Network topology layout for SVG diagram | + +### hosts.json + +Defines your infrastructure inventory: + +```json +{ + "proxmox_nodes": { + "node-name": {"ip": "192.168.1.5", "hardware": "CPU • RAM", "role": "General"} + }, + "docker_hosts": { + "host-name": {"ip": "192.168.1.80", "user": "root", "type": "vm", "vmid": "100", "node": "node-name", "purpose": "Description"} + } +} +``` + +- `type`: `vm`, `lxc`, or `remote` +- `vmid`: Proxmox VM/LXC ID (null for remote hosts) +- `node`: Which Proxmox node this guest runs on + +### diagram.json + +Defines network topology and layout positions for the SVG diagram. Includes network infrastructure (router, switch, NAS), remote hosts, and Proxmox node children with their positions. + +Children with `"type": "static"` are shown in the diagram but not polled for container data. + +### .env + +| Variable | Default | Description | +|----------|---------|-------------| +| `DASHBOARD_PORT` | `5050` | Host port for the web UI | +| `REFRESH_INTERVAL` | `60` | Seconds between background polls | +| `SSH_TIMEOUT` | `10` | SSH connection timeout in seconds | +| `SSH_KEY_PATH` | `/root/.ssh/id_ed25519` | Path to SSH private key on the host | +| `TZ` | `America/New_York` | Container timezone | + +## API Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/` | GET | Dashboard web UI | +| `/api/data` | GET | All host data and container status | +| `/api/diagram` | GET | Network topology JSON | +| `/api/refresh` | POST | Trigger immediate data refresh | +| `/health` | GET | Health check with last update timestamp | + +## SSH Requirements + +The dashboard connects to each host via SSH to run `docker ps`. Ensure: + +- The SSH key specified in `SSH_KEY_PATH` exists on the Docker host +- The key is authorized on all target hosts +- Target hosts are reachable from the container's network + +## Architecture + +- **Flask** web server with Gunicorn (2 workers, 4 threads) +- **Paramiko** for SSH connections +- Background thread polls all hosts on the configured interval +- Container runs as non-root `dashboard` user (entrypoint copies SSH key) +- Memory limited to 256MB, CPU limited to 0.5 cores diff --git a/app.py b/app.py new file mode 100644 index 0000000..7dce6b2 --- /dev/null +++ b/app.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +import os, json, time, logging +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor, as_completed +from threading import Lock +from flask import Flask, render_template, jsonify +import paramiko +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) +app = Flask(__name__) + +CONFIG = { + "refresh_interval": int(os.getenv("REFRESH_INTERVAL", "60")), + "ssh_timeout": int(os.getenv("SSH_TIMEOUT", "10")), + "ssh_key_path": os.getenv("SSH_KEY_PATH", "/app/ssh/id_ed25519"), +} + +CONFIG_DIR = os.getenv("CONFIG_DIR", "/app/config") + +def load_json_config(filename, default): + for path in [os.path.join(CONFIG_DIR, filename), os.path.join(os.path.dirname(__file__), filename)]: + if os.path.exists(path): + with open(path) as f: + return json.load(f) + logger.warning(f"{filename} not found, using defaults") + return default + +hosts_config = load_json_config("hosts.json", {"proxmox_nodes": {}, "docker_hosts": {}}) +PROXMOX_NODES = hosts_config["proxmox_nodes"] +DOCKER_HOSTS = hosts_config["docker_hosts"] + +DIAGRAM_DATA = load_json_config("diagram.json", {}) + +class DataCache: + def __init__(self): + self.data = {} + self.lock = Lock() + def set(self, key, value): + with self.lock: self.data[key] = value + def get(self, key, default=None): + with self.lock: return self.data.get(key, default) + +cache = DataCache() + +def get_ssh_client(host, user): + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + pkey = None + key_path = CONFIG["ssh_key_path"] + if os.path.exists(key_path): + try: pkey = paramiko.Ed25519Key.from_private_key_file(key_path) + except: + try: pkey = paramiko.RSAKey.from_private_key_file(key_path) + except: pass + client.connect(hostname=host, username=user, pkey=pkey, timeout=CONFIG["ssh_timeout"], allow_agent=True, look_for_keys=True) + return client + +def get_docker_containers(hostname, host_config): + try: + client = get_ssh_client(host_config["ip"], host_config["user"]) + cmd = "docker ps --format '{{.Names}}|{{.Status}}|{{.Image}}' 2>/dev/null" + stdin, stdout, stderr = client.exec_command(cmd, timeout=15) + output = stdout.read().decode('utf-8').strip() + containers = [] + for line in output.split('\n'): + if line and '|' in line: + parts = line.split('|') + if len(parts) >= 3: + status = parts[1].lower() + health = 'healthy' if 'healthy' in status else 'unhealthy' if 'unhealthy' in status else 'running' if 'up' in status else 'unknown' + containers.append({"name": parts[0], "status": parts[1], "image": parts[2], "health": health}) + client.close() + return {"hostname": hostname, "status": "online", "container_count": len(containers), "containers": containers, + "healthy": sum(1 for c in containers if c["health"] in ["healthy", "running"]), + "unhealthy": sum(1 for c in containers if c["health"] == "unhealthy")} + except Exception as e: + logger.warning(f"Failed to connect to {hostname}: {e}") + return {"hostname": hostname, "status": "offline", "container_count": 0, "containers": [], "error": str(e)} + +def collect_all_hosts(): + results = {} + with ThreadPoolExecutor(max_workers=10) as executor: + futures = {executor.submit(get_docker_containers, name, config): name for name, config in DOCKER_HOSTS.items()} + for future in as_completed(futures): + hostname = futures[future] + try: results[hostname] = future.result() + except Exception as e: results[hostname] = {"hostname": hostname, "status": "error", "error": str(e)} + return results + +def refresh_data(): + logger.info("Refreshing data...") + hosts = collect_all_hosts() + cache.set("hosts", hosts) + total = sum(h.get("container_count", 0) for h in hosts.values()) + online = sum(1 for h in hosts.values() if h.get("status") == "online") + cache.set("summary", {"total_containers": total, "total_hosts": len(DOCKER_HOSTS), "online_hosts": online, "proxmox_nodes": len(PROXMOX_NODES)}) + cache.set("last_update", datetime.now().isoformat()) + logger.info(f"Refresh complete - {total} containers across {online}/{len(DOCKER_HOSTS)} hosts") + +@app.route("/") +def index(): + return render_template("index.html", refresh_interval=CONFIG["refresh_interval"], docker_hosts=DOCKER_HOSTS, proxmox_nodes=PROXMOX_NODES) + +@app.route("/api/data") +def api_data(): + return jsonify({"timestamp": cache.get("last_update"), "summary": cache.get("summary", {}), "hosts": cache.get("hosts", {}), + "config": {"docker_hosts": DOCKER_HOSTS, "proxmox_nodes": PROXMOX_NODES}}) + +@app.route("/api/refresh", methods=["POST"]) +def api_refresh(): + refresh_data() + return jsonify({"status": "ok", "timestamp": cache.get("last_update")}) + +@app.route("/api/diagram") +def api_diagram(): + return jsonify(DIAGRAM_DATA) + +@app.route("/health") +def health(): + return jsonify({"status": "healthy", "last_update": cache.get("last_update")}) + +def background_refresh(): + while True: + try: refresh_data() + except Exception as e: logger.error(f"Refresh error: {e}") + time.sleep(CONFIG["refresh_interval"]) + +import threading +refresh_data() +threading.Thread(target=background_refresh, daemon=True).start() + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=5000, debug=False) diff --git a/diagram.json.example b/diagram.json.example new file mode 100644 index 0000000..ac80124 --- /dev/null +++ b/diagram.json.example @@ -0,0 +1,26 @@ +{ + "network": { + "subnet": "192.168.1.0/24", + "internet": {"label": "INTERNET", "description": "ISP Connection"}, + "router": {"label": "Router", "description": "Gateway .1"}, + "switch": {"label": "Network Switch", "description": "Managed Switch .2"}, + "nas": {"label": "NAS", "description": "Storage .100"} + }, + "remote": {}, + "proxmox_nodes": { + "pve-node1": { + "ip": ".5", + "hardware": "CPU Model | 64GB", + "gpu_label": null, + "children": [ + {"name": "my-vm", "vmid": "100", "type": "vm"}, + {"name": "my-lxc", "vmid": "121", "type": "lxc"} + ] + } + }, + "layout": { + "positions": { + "pve-node1": {"x_offset": 280, "y": 240} + } + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..dbc104b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,29 @@ +services: + infra-dashboard: + build: . + container_name: infra-dashboard + restart: unless-stopped + env_file: .env + ports: + - "${DASHBOARD_PORT:-5050}:5000" + volumes: + - ${SSH_KEY_PATH:-/root/.ssh/id_ed25519}:/app/ssh/id_ed25519:ro + - ./hosts.json:/app/config/hosts.json:ro + - ./diagram.json:/app/config/diagram.json:ro + networks: + - proxy + deploy: + resources: + limits: + memory: 256M + cpus: '0.5' + labels: + - "autoheal=true" + - "com.centurylinklabs.watchtower.enable=true" + - "homepage.group=Infrastructure" + - "homepage.name=Infra Dashboard" + - "homepage.icon=grafana.png" + +networks: + proxy: + external: true diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..268c3d8 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# Copy the mounted SSH key so the dashboard user can read it +if [ -f /app/ssh/id_ed25519 ]; then + cp /app/ssh/id_ed25519 /tmp/id_ed25519 + chown dashboard:dashboard /tmp/id_ed25519 + chmod 600 /tmp/id_ed25519 + export SSH_KEY_PATH=/tmp/id_ed25519 +fi +exec gosu dashboard gunicorn --bind 0.0.0.0:5000 --workers 2 --threads 4 app:app diff --git a/hosts.json.example b/hosts.json.example new file mode 100644 index 0000000..29e1cd7 --- /dev/null +++ b/hosts.json.example @@ -0,0 +1,9 @@ +{ + "proxmox_nodes": { + "pve-node1": {"ip": "192.168.1.5", "hardware": "CPU Model • 64GB", "role": "General"} + }, + "docker_hosts": { + "my-vm": {"ip": "192.168.1.80", "user": "root", "type": "vm", "vmid": "100", "node": "pve-node1", "purpose": "Web services"}, + "my-lxc": {"ip": "192.168.1.121", "user": "root", "type": "lxc", "vmid": "121", "node": "pve-node1", "purpose": "Databases"} + } +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..46365fd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +flask==3.0.0 +paramiko==3.4.0 +requests==2.31.0 +urllib3==2.1.0 +gunicorn==21.2.0 diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..0113e2e --- /dev/null +++ b/templates/index.html @@ -0,0 +1,487 @@ + + + + + + Infrastructure Dashboard + + + + +
+

Infrastructure Dashboard

+
+
Containers
--
+
Hosts
--
+
Proxmox
--
+
Loading...
+ +
+
+ +
+ + + + +
+ +
+ +
+
+ +
+

Select a host

+
+

Click on any host in the diagram to see container details.

+
+
+
+
+ + +
+ +
+ + +
+
+
+ + +
+
+ + + +