diff --git a/hosts/lab/default.nix b/hosts/lab/default.nix index 0434586..4cdd80a 100644 --- a/hosts/lab/default.nix +++ b/hosts/lab/default.nix @@ -9,13 +9,24 @@ ../../modules/site.nix ./sites.nix ./fail2ban.nix - ./uptime-kuma.nix ./tinyauth.nix ./shlink.nix ./guestbook.nix ./telegram-alerts.nix + ../../modules/uptime ]; + services.uptime = { + enable = true; + services = { + website = "https://ily.rs"; + forgejo = "https://git.ily.rs"; + foundry = "https://foundry.ily.rs"; + wiki = "https://wiki.ily.rs"; + penfield = "https://penfield.ily.rs"; + }; + }; + networking.hostName = "lab"; services.openssh = { diff --git a/hosts/lab/sites.nix b/hosts/lab/sites.nix index 5f3ff92..bd40fd7 100644 --- a/hosts/lab/sites.nix +++ b/hosts/lab/sites.nix @@ -62,6 +62,13 @@ in reverse_proxy localhost:8123 } + @status path /status + handle @status { + root * /var/lib/uptime + rewrite * /status.txt + file_server + } + @site_file file { try_files {path} {path}/index.html } diff --git a/hosts/lab/telegram-alerts.nix b/hosts/lab/telegram-alerts.nix index fd3b600..417cc15 100644 --- a/hosts/lab/telegram-alerts.nix +++ b/hosts/lab/telegram-alerts.nix @@ -29,7 +29,6 @@ let "podman-dokuwiki" "podman-shlink" "podman-shlink-web-client" - "podman-uptime-kuma" "podman-tinyauth" "site-webhook" ]; diff --git a/hosts/lab/uptime-kuma.nix b/hosts/lab/uptime-kuma.nix deleted file mode 100644 index 93e949f..0000000 --- a/hosts/lab/uptime-kuma.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ ... }: -{ - services.caddy.virtualHosts."status.ily.rs" = { - extraConfig = '' - reverse_proxy localhost:3001 - encode zstd gzip - ''; - }; - - services.caddy.virtualHosts."status.wynne.rs" = { - extraConfig = '' - redir https://status.ily.rs{uri} permanent - ''; - }; - - virtualisation.oci-containers.containers.uptime-kuma = { - image = "louislam/uptime-kuma:2.2.1"; - podman.user = "podman"; - volumes = [ - "/srv/uptime-kuma/data:/app/data" - ]; - ports = [ "127.0.0.1:3001:3001" ]; - }; - - # Workaround for NixOS/nixpkgs#410857 until backport of #475089 lands - systemd.services.podman-uptime-kuma.serviceConfig.Delegate = true; -} diff --git a/modules/uptime/default.nix b/modules/uptime/default.nix new file mode 100644 index 0000000..10450ed --- /dev/null +++ b/modules/uptime/default.nix @@ -0,0 +1,98 @@ +{ config, lib, pkgs, ... }: +let + cfg = config.services.uptime; + + servicesEnv = lib.concatStringsSep "\n" + (lib.mapAttrsToList (name: url: "${name} ${url}") cfg.services); + + runScript = pkgs.writeShellApplication { + name = "uptime-run"; + runtimeInputs = with pkgs; [ curl gawk coreutils ]; + text = builtins.readFile ./run.sh; + }; +in +{ + options.services.uptime = { + enable = lib.mkEnableOption "minimal text-only uptime status page"; + + interval = lib.mkOption { + type = lib.types.str; + default = "5min"; + description = "Probe interval, passed to the timer's OnUnitActiveSec."; + }; + + outputPath = lib.mkOption { + type = lib.types.str; + default = "/var/lib/uptime/status.txt"; + description = '' + Path where the rendered status page is written. + Must be writable by the uptime user. The default lives inside the + unit's StateDirectory, which is created and owned automatically. + ''; + }; + + services = lib.mkOption { + type = lib.types.attrsOf lib.types.str; + default = {}; + example = { forgejo = "https://git.ily.rs"; }; + description = "Map of service name to URL to probe."; + }; + + retentionDays = lib.mkOption { + type = lib.types.int; + default = 90; + description = "How many days of log entries to keep on disk."; + }; + + displayDays = lib.mkOption { + type = lib.types.int; + default = 30; + description = "How many days of history to render in the bar."; + }; + }; + + config = lib.mkIf cfg.enable { + assertions = [{ + assertion = cfg.displayDays <= cfg.retentionDays; + message = "services.uptime.displayDays (${toString cfg.displayDays}) must be <= retentionDays (${toString cfg.retentionDays})."; + }]; + + users.users.uptime = { + isSystemUser = true; + group = "uptime"; + home = "/var/lib/uptime"; + }; + users.groups.uptime = {}; + + systemd.services.uptime = { + description = "Probe configured services and render the status page"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + environment = { + SERVICES = servicesEnv; + OUTPUT_PATH = cfg.outputPath; + RETENTION_DAYS = toString cfg.retentionDays; + DISPLAY_DAYS = toString cfg.displayDays; + }; + serviceConfig = { + Type = "oneshot"; + ExecStart = lib.getExe runScript; + User = "uptime"; + Group = "uptime"; + StateDirectory = "uptime"; + StateDirectoryMode = "0755"; + UMask = "0022"; + }; + }; + + systemd.timers.uptime = { + description = "Periodic uptime probe"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnBootSec = "1min"; + OnUnitActiveSec = cfg.interval; + Unit = "uptime.service"; + }; + }; + }; +} diff --git a/modules/uptime/run.sh b/modules/uptime/run.sh new file mode 100644 index 0000000..4ba15a9 --- /dev/null +++ b/modules/uptime/run.sh @@ -0,0 +1,100 @@ +: "${SERVICES:?must be set}" +: "${OUTPUT_PATH:?must be set}" +: "${RETENTION_DAYS:?must be set}" +: "${DISPLAY_DAYS:?must be set}" + +state_dir=$(dirname "$OUTPUT_PATH") +mkdir -p "$state_dir" + +now=$(date -u +%s) +retention_cutoff=$(( now - RETENTION_DAYS * 86400 )) + +# Probe each service and rotate its log. +while IFS= read -r line; do + [ -z "$line" ] && continue + name=${line%% *} + url=${line#* } + log="$state_dir/$name.log" + + code=$(curl -fsS --max-time 10 -o /dev/null -w '%{http_code}' "$url" 2>/dev/null || true) + if [ -z "$code" ] || [ "$code" = "000" ]; then + code="000" + up=0 + elif [ "${code:0:1}" = "2" ] || [ "${code:0:1}" = "3" ]; then + up=1 + else + up=0 + fi + printf '%s %s %s\n' "$now" "$up" "$code" >> "$log" + + awk -v cutoff="$retention_cutoff" '$1 >= cutoff' "$log" > "$log.tmp" + mv "$log.tmp" "$log" +done <<< "$SERVICES" + +# Render. bucket_size and cell_count are parameters so we can add +# hour/minute granularity rows later without restructuring the awk. +render_row() { + local log_file="$1" + local now_arg="$2" + local bucket_size="$3" + local cells="$4" + + if [ ! -s "$log_file" ]; then + local pad + pad=$(printf '%*s' "$cells" '' | tr ' ' '?') + printf '%s 0.0 unknown\n' "$pad" + return + fi + + awk -v now="$now_arg" -v bucket="$bucket_size" -v cells="$cells" ' + BEGIN { + bucket_origin = int(now / bucket) * bucket + window_start = bucket_origin - (cells - 1) * bucket + window_end = bucket_origin + bucket + last_ok = -1 + } + { + ts = $1; ok = $2 + if (ts >= window_start && ts < window_end) { + idx = int((ts - window_start) / bucket) + if (idx >= 0 && idx < cells) { + if (ok == 1) up[idx]++; else down[idx]++ + } + } + last_ok = ok + } + END { + bar = ""; total_up = 0; total_all = 0 + for (i = 0; i < cells; i++) { + u = (i in up) ? up[i] : 0 + d = (i in down) ? down[i] : 0 + if (u + d == 0) bar = bar "?" + else if (d == 0) bar = bar "#" + else if (u == 0) bar = bar "_" + else bar = bar "." + total_up += u; total_all += u + d + } + pct = (total_all == 0) ? 0 : (100 * total_up / total_all) + state = (last_ok == 1) ? "up" : (last_ok == 0) ? "down" : "unknown" + printf "%s %.1f %s\n", bar, pct, state + }' "$log_file" +} + +day_bucket=86400 + +tmp="$OUTPUT_PATH.tmp" +{ + printf '# updated %s\n\n' "$(date -u -d "@$now" '+%Y-%m-%d %H:%M:%S UTC')" + + while IFS= read -r line; do + [ -z "$line" ] && continue + name=${line%% *} + log="$state_dir/$name.log" + read -r bar pct state < <(render_row "$log" "$now" "$day_bucket" "$DISPLAY_DAYS") + printf '%-20s %s %-7s %5s%%\n' "$name" "$bar" "$state" "$pct" + done <<< "$SERVICES" + + printf '\nlegend: # up . degraded _ down ? no data\n' +} > "$tmp" + +mv "$tmp" "$OUTPUT_PATH"