Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ task platform:destroy # Uninstall platform bootstrap components
task argo:sync # Refresh every Argo CD application
task argo:sync app=plex # Refresh one Argo CD application

# Backup / DR
task backup:snapshots # List local restic appdata snapshots
task backup:check # Check the local restic repository
task restore:app app=paperless snapshot=latest confirm=RESTORE
task restore:all-appdata snapshot=latest confirm=RESTORE_ALL
task dr:drill app=atuin snapshot=latest

# Vault
task vault:edit-talos # Edit encrypted Talos bootstrap secrets

Expand Down
64 changes: 64 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,70 @@ tasks:
cmds:
- '"{{.ANSIBLE_PLAYBOOK}}" ansible/playbooks/argo-refresh.yml {{if .app}}-e "app={{.app}}"{{end}}'

# Backup and disaster recovery operations
backup:snapshots:
desc: List restic appdata backup snapshots
dir: "{{.TASKFILE_DIR}}"
deps:
- task: ansible:check
cmds:
- '"{{.ANSIBLE_PLAYBOOK}}" ansible/playbooks/backup-snapshots.yml'

backup:check:
desc: Check the local restic backup repository
dir: "{{.TASKFILE_DIR}}"
deps:
- task: ansible:check
cmds:
- '"{{.ANSIBLE_PLAYBOOK}}" ansible/playbooks/backup-check.yml'

restore:app:
desc: Restore one restic-backed app from a snapshot
interactive: true
dir: "{{.TASKFILE_DIR}}"
deps:
- task: ansible:check
vars:
app: '{{.app | default ""}}'
snapshot: '{{.snapshot | default "latest"}}'
confirm: '{{.confirm | default ""}}'
requires:
vars:
- app
- confirm
cmds:
- '"{{.ANSIBLE_PLAYBOOK}}" ansible/playbooks/restore-app.yml -e "backup_app={{.app}} backup_snapshot={{.snapshot}} backup_confirm={{.confirm}}"'

restore:all-appdata:
desc: Restore all restic-backed appdata apps from a snapshot; requires confirm=RESTORE_ALL
interactive: true
dir: "{{.TASKFILE_DIR}}"
deps:
- task: ansible:check
vars:
snapshot: '{{.snapshot | default "latest"}}'
confirm: '{{.confirm | default ""}}'
requires:
vars:
- confirm
cmds:
- '"{{.ANSIBLE_PLAYBOOK}}" ansible/playbooks/restore-all-appdata.yml -e "backup_snapshot={{.snapshot}} backup_confirm={{.confirm}}"'

dr:drill:
desc: Restore one app into a temporary drill path and verify restored content
interactive: true
dir: "{{.TASKFILE_DIR}}"
deps:
- task: ansible:check
vars:
app: '{{.app | default ""}}'
snapshot: '{{.snapshot | default "latest"}}'
requires:
vars:
- app
cmds:
- '"{{.ANSIBLE_PLAYBOOK}}" ansible/playbooks/dr-drill.yml -e "backup_app={{.app}} backup_snapshot={{.snapshot}}"'

# Terraform operations
tf:plan:
desc: Terraform plan
Expand Down
9 changes: 9 additions & 0 deletions ansible/playbooks/backup-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: Check restic backup repository
hosts: localhost
gather_facts: false
tasks:
- name: Check restic repository
ansible.builtin.include_role:
name: backup
tasks_from: check
9 changes: 9 additions & 0 deletions ansible/playbooks/backup-snapshots.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: List restic backup snapshots
hosts: localhost
gather_facts: false
tasks:
- name: List restic snapshots
ansible.builtin.include_role:
name: backup
tasks_from: snapshots
9 changes: 9 additions & 0 deletions ansible/playbooks/dr-drill.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: Run app restore drill
hosts: localhost
gather_facts: false
tasks:
- name: Run restore drill
ansible.builtin.include_role:
name: backup
tasks_from: drill
9 changes: 9 additions & 0 deletions ansible/playbooks/restore-all-appdata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: Restore all restic appdata from restic
hosts: localhost
gather_facts: false
tasks:
- name: Restore all appdata
ansible.builtin.include_role:
name: backup
tasks_from: restore_all_appdata
9 changes: 9 additions & 0 deletions ansible/playbooks/restore-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: Restore app data from restic
hosts: localhost
gather_facts: false
tasks:
- name: Restore app data
ansible.builtin.include_role:
name: backup
tasks_from: restore_app
20 changes: 20 additions & 0 deletions ansible/roles/backup/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
backup_repo_root: "{{ playbook_dir }}/../.."
backup_namespace: selfhosted
backup_argocd_namespace: argocd
backup_appdata_claim: restic-appdata
backup_restic_image: restic/restic:0.19.0
backup_restic_password_secret: restic-credentials
backup_restic_password_key: RESTIC_PASSWORD
backup_restic_repository: rest:http://restic:$(RESTIC_PASSWORD)@restic.selfhosted.svc.cluster.local:8000/
backup_restic_host: homelab
backup_restic_tag: appdata
backup_retry_lock: 30m
backup_job_ttl_seconds: 3600
backup_job_wait_sleep: 10
backup_job_wait_timeout: 3600
backup_restore_root: /restore-root/data/appdata
backup_restore_staging: /restore-staging
backup_snapshot: latest
backup_confirm: ""
backup_test_mode: false
47 changes: 47 additions & 0 deletions ansible/roles/backup/tasks/build_job.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
- name: Build restic job definition
ansible.builtin.set_fact:
backup_job_definition:
apiVersion: batch/v1
kind: Job
metadata:
name: "{{ backup_job_name }}"
namespace: "{{ backup_namespace }}"
spec:
backoffLimit: 0
ttlSecondsAfterFinished: "{{ backup_job_ttl_seconds }}"
template:
metadata:
labels:
app.kubernetes.io/name: restic
app.kubernetes.io/component: "{{ backup_job_component }}"
spec:
restartPolicy: Never
containers:
- name: restic
image: "{{ backup_restic_image }}"
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- "{{ backup_job_script }}"
env: >-
{{
[
{
'name': 'RESTIC_PASSWORD',
'valueFrom': {
'secretKeyRef': {
'name': backup_restic_password_secret,
'key': backup_restic_password_key
}
}
},
{
'name': 'RESTIC_REPOSITORY',
'value': backup_restic_repository
}
] + (backup_job_env_extra | default([]))
}}
volumeMounts: "{{ backup_job_volume_mounts | default([]) }}"
volumes: "{{ backup_job_volumes | default([]) }}"
17 changes: 17 additions & 0 deletions ansible/roles/backup/tasks/check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
- name: Build check job
ansible.builtin.include_tasks: build_job.yml
vars:
backup_job_name: "restic-check-{{ lookup('ansible.builtin.pipe', 'date -u +%Y%m%d%H%M%S') }}"
backup_job_component: check
backup_job_script: |
set -eu
restic check --retry-lock {{ backup_retry_lock }}

- name: Record check job
ansible.builtin.set_fact:
backup_check_job: "{{ backup_job_definition }}"

- name: Run check job
when: not (backup_test_mode | bool)
ansible.builtin.include_tasks: run_job.yml
45 changes: 45 additions & 0 deletions ansible/roles/backup/tasks/collect_restore_all_app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
- name: Load candidate app metadata
ansible.builtin.include_vars:
file: "{{ backup_restore_all_candidate }}"
name: backup_restore_all_candidate_metadata

- name: Load candidate app values
ansible.builtin.include_vars:
file: "{{ backup_restore_all_candidate | dirname }}/values.yaml"
name: backup_restore_all_candidate_values

- name: Reset candidate local PVC inventory
ansible.builtin.set_fact:
backup_restore_all_candidate_local_pvc_names: []

- name: Collect candidate local PVC persistence entries
ansible.builtin.set_fact:
backup_restore_all_candidate_local_pvc_names: "{{ backup_restore_all_candidate_local_pvc_names + [item.key] }}"
loop: "{{ backup_restore_all_candidate_values.persistence | default({}) | dict2items }}"
loop_control:
label: "{{ item.key }}"
when:
- (item.value.type | default('persistentVolumeClaim')) == 'persistentVolumeClaim'
- (item.value.existingClaim | default('')) == ''

- name: Validate restic appdata candidate
when: backup_restore_all_candidate_metadata.dr.restore.mode | default('') == "restic-appdata"
ansible.builtin.assert:
that:
- backup_restore_all_candidate_local_pvc_names | length == 1
- backup_restore_all_candidate_values.nameOverride is not defined
- backup_restore_all_candidate_values.fullnameOverride is not defined
fail_msg: "App {{ backup_restore_all_candidate | dirname | basename }} must define exactly one local PVC and avoid nameOverride/fullnameOverride to use restore:all-appdata."

- name: Add restic appdata candidate
when: backup_restore_all_candidate_metadata.dr.restore.mode | default('') == "restic-appdata"
ansible.builtin.set_fact:
backup_restore_all_apps: "{{ backup_restore_all_apps + [backup_restore_all_candidate_app] }}"
vars:
backup_restore_all_candidate_app:
app_name: "{{ backup_restore_all_candidate | dirname | basename }}"
namespace: "{{ backup_restore_all_candidate | dirname | dirname | basename }}"
application_name: "{{ backup_restore_all_candidate | dirname | basename }}"
restore_path: "{{ backup_restore_all_candidate | dirname | dirname | basename }}/{{ backup_restore_all_candidate | dirname | basename }}"
local_pvc_name: "{{ backup_restore_all_candidate_local_pvc_names[0] }}"
63 changes: 63 additions & 0 deletions ansible/roles/backup/tasks/drill.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
---
- name: Load app drill context
ansible.builtin.include_tasks: load_app.yml

- name: Assert app uses restic appdata restore
ansible.builtin.assert:
that:
- backup_app_restore_mode == "restic-appdata"
- backup_app_local_pvc_names | length == 1
- backup_app_restore_paths | length > 0
- not (backup_app_has_name_override | bool)
- not (backup_app_has_fullname_override | bool)
fail_msg: "App {{ backup_app }} must use dr.restore.mode restic-appdata, define exactly one local PVC, and avoid nameOverride/fullnameOverride to use dr:drill."

- name: Set drill timestamp
ansible.builtin.set_fact:
backup_restore_timestamp: "{{ lookup('ansible.builtin.pipe', 'date -u +%Y%m%dT%H%M%SZ') }}"

- name: Build drill job
ansible.builtin.include_tasks: build_job.yml
vars:
backup_job_name: "restic-drill-{{ backup_app_name }}-{{ lookup('ansible.builtin.pipe', 'date -u +%Y%m%d%H%M%S') }}"
backup_job_component: drill
backup_job_volumes:
- name: appdata
persistentVolumeClaim:
claimName: "{{ backup_appdata_claim }}"
backup_job_volume_mounts:
- name: appdata
mountPath: "{{ backup_restore_root }}"
backup_job_script: |
set -eu
DRILL_PATH="{{ backup_restore_root }}/.drill/{{ backup_app_name }}-{{ backup_restore_timestamp }}"
trap 'rm -rf "$DRILL_PATH"' EXIT
mkdir -p "$DRILL_PATH"
for RESTORE_INCLUDE in {% for restore_path in backup_app_restore_paths %}"/data/appdata/{{ restore_path }}" {% endfor %}; do
RESTORE_PATH="${RESTORE_INCLUDE#/data/appdata/}"
case "$RESTORE_PATH" in
/*|*..*) echo "Refusing unsafe drill path: $RESTORE_PATH"; exit 1 ;;
esac
restic restore "$RESTIC_SNAPSHOT" \
--host {{ backup_restic_host }} \
--tag {{ backup_restic_tag }} \
--exclude-xattr '*' \
--include "$RESTORE_INCLUDE" \
--target "$DRILL_PATH"
RESTORED="$DRILL_PATH/data/appdata/${RESTORE_PATH}"
test -d "$RESTORED"
find "$RESTORED" -mindepth 1 -print -quit | grep -q .
done
backup_job_env_extra:
- name: RESTIC_SNAPSHOT
value: "{{ backup_snapshot }}"

- name: Record drill job
ansible.builtin.set_fact:
backup_drill_job: "{{ backup_job_definition }}"

- name: Run drill job
when: not (backup_test_mode | bool)
ansible.builtin.include_tasks: run_job.yml
vars:
backup_job_definition: "{{ backup_drill_job }}"
Loading
Loading