Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,10 @@ jobs:
# i.e., direct references to alertmanager, loki push clients, or
# docker-compose.monitoring in the application source.
# Exclude comment-only lines (-h suppresses filenames for grep -Ev).
# With bash -o pipefail (GHA default), grep exits 1 when there are zero matches;
# that must not fail the step β€” only non-empty LEAKS after filtering is an error.
LEAKS=$(grep -rhE "(alertmanager|docker-compose\.monitoring)" src/ tests/ 2>/dev/null \
| grep -Ev '^\s*(//|#|\*|/\*)')
| grep -Ev '^\s*(//|#|\*|/\*)' || true)
if [ -n "$LEAKS" ]; then
echo "::error::Infra client references found in src/ or tests/"
echo "$LEAKS"
Expand Down Expand Up @@ -278,7 +280,7 @@ jobs:
# Guard 1: no stale network name (fieldtrack_network is not the canonical name)
if grep -rE '\bfieldtrack_network\b' src/ scripts/ \
--include='*.ts' --include='*.sh' \
2>/dev/null | grep -Ev '^\s*#'; then
2>/dev/null | grep -Ev '^[^:]+:\s*(#|//)'; then
echo "::error::Forbidden network name 'fieldtrack_network' found β€” canonical name is 'api_network'"
FAIL=1
fi
Expand Down Expand Up @@ -791,7 +793,8 @@ jobs:
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
ACTIVE_SLOT=$(cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "unknown")
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
ACTIVE_SLOT=$(cat "$DEPLOY_ROOT/.fieldtrack/active-slot" 2>/dev/null || cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "unknown")
ACTIVE_CONTAINER="api-${ACTIVE_SLOT}"
DEPLOY_STATUS="UNKNOWN"

Expand Down Expand Up @@ -836,7 +839,7 @@ jobs:
[ -d "$DEPLOY_ROOT" ] || { echo "::error::DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
cd "$DEPLOY_ROOT"

ACTIVE_SLOT=$(cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "blue")
ACTIVE_SLOT=$(cat "$DEPLOY_ROOT/.fieldtrack/active-slot" 2>/dev/null || cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "blue")
ACTIVE_CONTAINER="api-$ACTIVE_SLOT"

docker inspect "$ACTIVE_CONTAINER" >/dev/null 2>&1 || {
Expand Down Expand Up @@ -896,7 +899,7 @@ jobs:
# Scope: scripts/ and src/ only (not workflows where guard steps live).
if grep -rE "\./infra/|\.\.\./infra/" scripts/ src/ \
--binary-files=without-match --exclude-dir=node_modules 2>/dev/null \
| grep -Ev '^\s*#'; then
| grep -Ev '^[^:]+:\s*(#|//)'; then
echo "::error::Local repo-relative infra coupling (./infra/ or ../infra/) detected in scripts/ or src/"
exit 1
fi
Expand All @@ -913,7 +916,8 @@ jobs:
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
[ -d "$DEPLOY_ROOT" ] || { echo "::error::DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
cd "$DEPLOY_ROOT"
API_BASE_URL=$(grep -E '^API_BASE_URL=' .env | head -1 | cut -d'=' -f2-)
API_BASE_URL=$(grep -E '^API_BASE_URL=' .env 2>/dev/null | head -1 | cut -d'=' -f2- || true)
[ -n "$API_BASE_URL" ] || { echo "::error::API_BASE_URL missing or empty in .env"; exit 1; }
API_HOSTNAME=$(echo "$API_BASE_URL" | sed -E 's|^https?://||' | cut -d'/' -f1)
for i in $(seq 1 30); do
# Phase 1: in-network (source of truth)
Expand Down Expand Up @@ -949,7 +953,8 @@ jobs:
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
[ -d "$DEPLOY_ROOT" ] || { echo "::error::DEPLOY_ROOT not found: $DEPLOY_ROOT"; exit 1; }
cd "$DEPLOY_ROOT"
API_BASE_URL=$(grep -E '^API_BASE_URL=' .env | head -1 | cut -d'=' -f2-)
API_BASE_URL=$(grep -E '^API_BASE_URL=' .env 2>/dev/null | head -1 | cut -d'=' -f2- || true)
[ -n "$API_BASE_URL" ] || { echo "::error::API_BASE_URL missing or empty in .env"; exit 1; }
API_HOSTNAME=$(echo "$API_BASE_URL" | sed -E 's|^https?://||' | cut -d'/' -f1)
for i in $(seq 1 10); do
# Phase 1: in-network (source of truth)
Expand Down Expand Up @@ -986,7 +991,8 @@ jobs:
username: ${{ secrets.DO_USER }}
key: ${{ secrets.DO_SSH_KEY }}
script: |
ACTIVE_SLOT=$(cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "unknown")
export DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
ACTIVE_SLOT=$(cat "$DEPLOY_ROOT/.fieldtrack/active-slot" 2>/dev/null || cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "unknown")
ACTIVE_CONTAINER="api-${ACTIVE_SLOT}"
FT_CURL_IMG="curlimages/curl:8.7.1"
READY_RESP=$(docker run --rm --network api_network "$FT_CURL_IMG" \
Expand Down Expand Up @@ -1061,5 +1067,5 @@ jobs:
chmod +x scripts/*.sh
./scripts/deploy.sh --rollback --auto

ACTIVE_SLOT=$(cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "unknown")
ACTIVE_SLOT=$(cat "$DEPLOY_ROOT/.fieldtrack/active-slot" 2>/dev/null || cat /var/lib/fieldtrack/active-slot 2>/dev/null || echo "unknown")
echo "[DEPLOY] Rollback complete β€” slot=$ACTIVE_SLOT sha=${{ github.sha }}"
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ jobs:

if grep -rE '\bfieldtrack_network\b' src/ scripts/ \
--include='*.ts' --include='*.sh' \
2>/dev/null | grep -Ev '^\s*#'; then
2>/dev/null | grep -Ev '^[^:]+:\s*(#|//)'; then
echo "::error::Forbidden network name 'fieldtrack_network' β€” canonical name is 'api_network'"
FAIL=1
fi
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# Deployment history (VPS-side file, never committed)
.deploy_history
.last_deploy
# Fallback deploy state when /var/lib/fieldtrack is not writable (slot, lock)
.fieldtrack/

# ----------------
# Dependencies
Expand Down
80 changes: 66 additions & 14 deletions scripts/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
# - NEVER depends on: Redis, Supabase, BullMQ, monitoring stack
# - No /ready usage anywhere in this script
# - All nginx reloads flow through switch_nginx() β€” exactly once per deploy
#
# Deploy state (slot, lock, last-good):
# - FIELDTRACK_STATE_DIR or /var/lib/fieldtrack when writable (sudo chown if needed)
# - Otherwise $DEPLOY_ROOT/.fieldtrack; existing /var/lib/fieldtrack/* is migrated once
# =============================================================================
set -euo pipefail
if [ "${DEBUG:-false}" = "true" ]; then set -x; fi
Expand Down Expand Up @@ -302,14 +306,66 @@ run() {

# ---------------------------------------------------------------------------
# SLOT DIRECTORY AND FILE MANAGEMENT
#
# Primary state dir: FIELDTRACK_STATE_DIR or /var/lib/fieldtrack (persistent).
# If that path exists but is root-owned (common after manual/bootstrap mkdir),
# we sudo chown it for the deploy user. If we still cannot write, fall back to
# $DEPLOY_ROOT/.fieldtrack (always user-writable) and migrate slot files once.
# ---------------------------------------------------------------------------
_ft_make_state_dir_writable() {
local d="$1"
if [ ! -d "$d" ]; then
sudo mkdir -p "$d" 2>/dev/null || return 1
fi
if [ -w "$d" ]; then
return 0
fi
sudo chown "$(id -un):$(id -gn)" "$d" 2>/dev/null || return 1
sudo chmod u+rwx "$d" 2>/dev/null || true
[ -w "$d" ]
}

_ft_migrate_state_from_var_lib_if_needed() {
local legacy="/var/lib/fieldtrack"
[ "$SLOT_DIR" = "$legacy" ] && return 0
[ -f "$ACTIVE_SLOT_FILE" ] && return 0
[ ! -r "$legacy/active-slot" ] && return 0
_ft_log "msg='migrating active-slot from legacy path' from=$legacy/active-slot"
cp -a "$legacy/active-slot" "$ACTIVE_SLOT_FILE" 2>/dev/null || true
if [ -f "$legacy/active-slot.backup" ] && [ ! -f "$SLOT_BACKUP_FILE" ]; then
cp -a "$legacy/active-slot.backup" "$SLOT_BACKUP_FILE" 2>/dev/null || true
fi
if [ -f "$legacy/last-good" ] && [ ! -f "$LAST_GOOD_FILE" ]; then
cp -a "$legacy/last-good" "$LAST_GOOD_FILE" 2>/dev/null || true
fi
}

_ft_init_fieldtrack_state() {
local preferred="${FIELDTRACK_STATE_DIR:-/var/lib/fieldtrack}"
SLOT_DIR="$preferred"
if ! _ft_make_state_dir_writable "$SLOT_DIR"; then
SLOT_DIR="$DEPLOY_ROOT/.fieldtrack"
mkdir -p "$SLOT_DIR"
_ft_log "msg='preferred state dir not writable; using DEPLOY_ROOT fallback' preferred=$preferred fallback=$SLOT_DIR user=$(id -un)"
fi
ACTIVE_SLOT_FILE="$SLOT_DIR/active-slot"
SLOT_BACKUP_FILE="$SLOT_DIR/active-slot.backup"
LOCK_FILE="$SLOT_DIR/deploy.lock"
SNAP_DIR="$SLOT_DIR"
LAST_GOOD_FILE="$SNAP_DIR/last-good"
_ft_migrate_state_from_var_lib_if_needed
}

_ft_ensure_slot_dir() {
if [ ! -d "$SLOT_DIR" ]; then
_ft_log "msg='slot dir missing, creating' path=$SLOT_DIR"
sudo mkdir -p "$SLOT_DIR"
sudo chown "$(id -un):$(id -gn)" "$SLOT_DIR"
sudo chmod 750 "$SLOT_DIR"
mkdir -p "$SLOT_DIR" 2>/dev/null || sudo mkdir -p "$SLOT_DIR"
sudo chown "$(id -un):$(id -gn)" "$SLOT_DIR" 2>/dev/null || true
fi
if [ ! -w "$SLOT_DIR" ]; then
_ft_log "level=ERROR msg='slot directory not writable' path=$SLOT_DIR user=$(id -un)"
return 1
fi
return 0
}

_ft_ensure_slot_backup_dir() {
Expand All @@ -331,7 +387,7 @@ _ft_validate_slot() {
_ft_write_slot() {
local slot="$1"
_ft_validate_slot "$slot" || return 1
_ft_ensure_slot_dir
_ft_ensure_slot_dir || _ft_exit 1 "DEPLOY_FAILED_SAFE" "reason=slot_dir_not_writable"
local tmp
tmp=$(mktemp "${SLOT_DIR}/active-slot.XXXXXX")
printf '%s\n' "$slot" > "$tmp"
Expand All @@ -350,7 +406,7 @@ _ft_write_slot() {
# DEPLOYMENT LOCK
# ---------------------------------------------------------------------------
_ft_acquire_lock() {
_ft_ensure_slot_dir
_ft_ensure_slot_dir || _ft_exit 1 "DEPLOY_FAILED_SAFE" "reason=slot_dir_not_writable"
_ft_log "msg='acquiring deployment lock' pid=$$ file=$LOCK_FILE"
exec 200>"$LOCK_FILE"
if ! flock -n 200; then
Expand Down Expand Up @@ -469,7 +525,7 @@ pull_image() {
# ---------------------------------------------------------------------------
resolve_slot() {
_ft_state "RESOLVE_SLOT" "msg='determining active slot'"
_ft_ensure_slot_dir
_ft_ensure_slot_dir || _ft_exit 1 "DEPLOY_FAILED_SAFE" "reason=slot_dir_not_writable"

local recovered_slot=""

Expand Down Expand Up @@ -1213,15 +1269,15 @@ DEPLOY_ROOT="${DEPLOY_ROOT:-$HOME/api}"
REPO_DIR="$DEPLOY_ROOT"
INFRA_ROOT="${INFRA_ROOT:-/opt/infra}"

_ft_init_fieldtrack_state

BLUE_NAME="api-blue"
GREEN_NAME="api-green"
APP_PORT=3000
NETWORK="api_network"
_FT_CURL_IMG="curlimages/curl:8.7.1"

SLOT_DIR="/var/lib/fieldtrack"
ACTIVE_SLOT_FILE="$SLOT_DIR/active-slot"
SLOT_BACKUP_FILE="/var/lib/fieldtrack/active-slot.backup" # persists; same dir as primary
# SLOT_DIR, ACTIVE_SLOT_FILE, LOCK_FILE, etc. β€” set by _ft_init_fieldtrack_state()

NGINX_CONF="$INFRA_ROOT/nginx/live/api.conf"
NGINX_LIVE_DIR="$INFRA_ROOT/nginx/live"
Expand All @@ -1233,10 +1289,6 @@ MAX_HISTORY=5
MAX_HEALTH_ATTEMPTS=40
HEALTH_INTERVAL=3

LOCK_FILE="$SLOT_DIR/deploy.lock"
SNAP_DIR="$SLOT_DIR"
LAST_GOOD_FILE="$SNAP_DIR/last-good"

# DEPLOY_HISTORY is set inside preflight() after _ft_load_env()
DEPLOY_HISTORY=""

Expand Down
53 changes: 27 additions & 26 deletions scripts/vps-readiness-check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#
# HARD FAILURES (exit 1):
# - Docker daemon not running
# - Ports 80 or 443 occupied by ANY non-docker-proxy, non-nginx process
# - Ports 80 or 443 occupied by processes other than docker-proxy / nginx
# - Any container has host port bindings (violates production architecture)
# - Required containers not attached to api_network
# - Required .env file missing
Expand Down Expand Up @@ -87,40 +87,34 @@ if ! docker network ls --format '{{.Name}}' | grep -Eq "^${NETWORK}$"; then
else
ok "Network '$NETWORK' exists."
fi
# ── CHECK 4: Ports 80 and 443 β€” no non-docker processes ──────────────────────
# ── CHECK 4: Ports 80 and 443 β€” expected listeners only ─────────────────────
#
# Design: we do NOT auto-kill unknown processes. If port 80 or 443 is held by
# a non-docker process (e.g., system nginx, apache, lighttpd), that is a VPS
# configuration error that requires operator action. Silently killing unknown
# processes risks breaking the system in unpredictable ways.
# Design: we do NOT auto-kill unknown processes. Published container ports show
# up as docker-proxy (full name in `ss -tlnp`; lsof often truncates COMMAND to
# 8 chars e.g. "docker-pr", which broke older allowlists).
#
# Allowed occupants (hard-coded safe list):
# - docker-proxy (managed by Docker / our nginx container)
# - nginx (running as Docker container β€” docker exec nginx)
# Use ss (same as elsewhere in this script) and allow:
# - docker-proxy / docker-pr (truncated) β€” Docker publishing nginx :80/:443
# - nginx β€” system or container worker name in ss output
#
# Everything else β†’ hard fail with diagnostics.
# Everything else on these ports β†’ hard fail (e.g. apache bind-mount).
echo ""
echo "--- CHECK 4: Port 80/443 β€” no non-docker processes ---"
echo "--- CHECK 4: Port 80/443 β€” docker-proxy / nginx only ---"
_check_port() {
local port="$1"
local listeners
listeners=$(sudo ss -tlnp "sport = :${port}" 2>/dev/null || ss -tlnp "sport = :${port}" 2>/dev/null || true)

# Check if anything is listening on the port at all
if ! ss -tlnp "sport = :${port}" 2>/dev/null | grep -q 'LISTEN'; then
if ! echo "$listeners" | grep -q 'LISTEN'; then
ok "Port $port is free."
return 0
fi

# Check for non-docker-proxy, non-nginx processes via lsof
# lsof -i :PORT lists ALL processes holding the port.
# We exclude docker-proxy and nginx (expected Docker-managed processes).
NON_DOCKER=$(sudo lsof -i ":${port}" -sTCP:LISTEN -P -n 2>/dev/null \
| awk 'NR>1 {print $1, $2}' \
| grep -vE '^(docker-pro|nginx)' || true)

if [ -n "$NON_DOCKER" ]; then
record_failure "Port $port is occupied by a non-docker process."
echo " Offending process(es):"
sudo lsof -i ":${port}" -sTCP:LISTEN -P -n 2>/dev/null | awk 'NR>1' | sed 's/^/ /'
# Any LISTEN line that does not reference an allowed process is a failure.
if echo "$listeners" | grep 'LISTEN' | grep -Ev 'docker-proxy|docker-pr|nginx' | grep -q .; then
record_failure "Port $port is occupied by an unexpected process (not docker-proxy/nginx)."
echo " Listeners (ss -tlnp sport = :${port}):"
echo "$listeners" | sed 's/^/ /'
echo " This is a VPS configuration error. Stop the conflicting service before deploying."
echo " Example: sudo systemctl stop nginx OR sudo systemctl stop apache2"
return 1
Expand Down Expand Up @@ -198,9 +192,16 @@ for dir in "$RUNTIME_DIR" "$LOG_DIR"; do
if [ ! -d "$dir" ]; then
warn "Runtime directory missing: $dir β€” creating it."
install -d -m 750 "$dir" 2>/dev/null || sudo install -d -m 750 "$dir"
ok "Created: $dir"
fi
if [ ! -w "$dir" ]; then
warn "Runtime directory not writable by deploy user: $dir β€” fixing ownership."
sudo chown "$(id -un):$(id -gn)" "$dir" 2>/dev/null || true
sudo chmod u+rwx "$dir" 2>/dev/null || true
fi
if [ ! -w "$dir" ]; then
record_failure "Cannot write to $dir β€” run: sudo chown -R $(id -un):$(id -gn) $dir"
else
ok "Directory exists: $dir"
ok "Directory ready (writable): $dir"
fi
done

Expand Down
Loading