ops: add production hardening automation for secrets, backups, and rollback

This commit is contained in:
Austin A
2026-04-18 09:13:13 +01:00
parent 731a833075
commit 95633a6722
6 changed files with 738 additions and 0 deletions

114
infra/deploy/db-restore-test.sh Executable file
View File

@@ -0,0 +1,114 @@
#!/usr/bin/env bash
set -Eeuo pipefail
APP_DIR="${APP_DIR:-/opt/proxpanel}"
SECRET_FILE="${SECRET_FILE:-$APP_DIR/.backup.env}"
BACKUP_ROOT="${BACKUP_ROOT:-/opt/proxpanel-backups/daily}"
TMP_ROOT="${TMP_ROOT:-/tmp/proxpanel-restore-test}"
TEST_CONTAINER="${TEST_CONTAINER:-proxpanel-restore-test}"
PG_IMAGE="${PG_IMAGE:-postgres:16-alpine}"
PG_USER="${PG_USER:-proxpanel}"
PG_PASSWORD="${PG_PASSWORD:-restoretestpass}"
PG_DB="${PG_DB:-proxpanel_restore}"
cleanup() {
docker rm -f "$TEST_CONTAINER" >/dev/null 2>&1 || true
rm -rf "$TMP_ROOT"
}
trap cleanup EXIT
log() {
printf '[%s] %s\n' "$(date -u +'%Y-%m-%d %H:%M:%S UTC')" "$*"
}
die() {
printf '[ERROR] %s\n' "$*" >&2
exit 1
}
require_file() {
[[ -f "$1" ]] || die "Missing required file: $1"
}
require_command() {
command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1"
}
find_latest_encrypted_backup() {
find "$BACKUP_ROOT" -mindepth 2 -maxdepth 2 -type f -name 'proxpanel.sql.enc' | sort | tail -n 1
}
wait_pg_ready() {
local tries=60
local i
for ((i=1; i<=tries; i++)); do
if docker exec "$TEST_CONTAINER" pg_isready -U "$PG_USER" -d "$PG_DB" >/dev/null 2>&1; then
return
fi
sleep 1
done
die "Restore test postgres did not become ready."
}
main() {
require_command docker
require_command openssl
require_command sha256sum
require_file "$SECRET_FILE"
# shellcheck disable=SC1090
source "$SECRET_FILE"
[[ -n "${BACKUP_ENCRYPTION_KEY:-}" ]] || die "BACKUP_ENCRYPTION_KEY is empty in $SECRET_FILE"
export BACKUP_ENCRYPTION_KEY
local encrypted_backup checksum_file latest_dir decrypted_sql
encrypted_backup="$(find_latest_encrypted_backup)"
[[ -n "$encrypted_backup" ]] || die "No encrypted backup found in $BACKUP_ROOT"
checksum_file="${encrypted_backup}.sha256"
require_file "$checksum_file"
latest_dir="$(dirname "$encrypted_backup")"
mkdir -p "$TMP_ROOT"
chmod 700 "$TMP_ROOT"
decrypted_sql="${TMP_ROOT}/restore.sql"
log "Verifying checksum for $encrypted_backup"
(cd "$latest_dir" && sha256sum -c "$(basename "$checksum_file")")
log "Decrypting latest backup"
openssl enc -d -aes-256-cbc -pbkdf2 -iter 200000 \
-in "$encrypted_backup" \
-out "$decrypted_sql" \
-pass env:BACKUP_ENCRYPTION_KEY
log "Starting isolated restore-test postgres container"
docker rm -f "$TEST_CONTAINER" >/dev/null 2>&1 || true
docker run -d --name "$TEST_CONTAINER" \
-e POSTGRES_USER="$PG_USER" \
-e POSTGRES_PASSWORD="$PG_PASSWORD" \
-e POSTGRES_DB="$PG_DB" \
"$PG_IMAGE" >/dev/null
wait_pg_ready
log "Applying restored SQL into test DB"
cat "$decrypted_sql" | docker exec -i "$TEST_CONTAINER" psql -U "$PG_USER" -d "$PG_DB" >/dev/null
log "Running restore sanity checks"
local table_count required_table_count
table_count="$(
docker exec "$TEST_CONTAINER" psql -U "$PG_USER" -d "$PG_DB" -Atc \
"select count(*) from information_schema.tables where table_schema='public';"
)"
required_table_count="$(
docker exec "$TEST_CONTAINER" psql -U "$PG_USER" -d "$PG_DB" -Atc \
"select count(*) from information_schema.tables where table_schema='public' and table_name in ('User','Tenant','AuditLog');"
)"
[[ "${table_count:-0}" -ge 10 ]] || die "Restore sanity check failed (unexpected table count: $table_count)"
[[ "${required_table_count:-0}" -eq 3 ]] || die "Restore sanity check failed (required tables missing)"
log "Restore test passed (tables=$table_count)"
}
main "$@"