Skip to content
Open
38 changes: 19 additions & 19 deletions shared/Makefile.shared
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ generate: _bootstrap _guard-workspace-target ## Run generate_abac.py in the sele
echo " then use 'make generate SPACE=\"$(SPACE)\"' to add or update individual spaces."; \
exit 1; \
fi; \
cd "$(ENV_DIR)" && python "$(SHARED_ROOT)/generate_abac.py" $(GENERATE_ARGS) $(if $(MODE),--mode $(MODE),) $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),) --space "$(SPACE)"; \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/generate_abac.py" $(GENERATE_ARGS) $(if $(MODE),--mode $(MODE),) $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),) --space "$(SPACE)"; \
else \
cd "$(ENV_DIR)" && python "$(SHARED_ROOT)/generate_abac.py" $(GENERATE_ARGS) $(if $(MODE),--mode $(MODE),) $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/generate_abac.py" $(GENERATE_ARGS) $(if $(MODE),--mode $(MODE),) $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
fi

audit-schema: _bootstrap _guard-workspace-target ## Report untagged sensitive columns and stale tag assignments in managed tables
Expand All @@ -207,18 +207,18 @@ generate-delta: _bootstrap _guard-workspace-target ## Detect schema drift and ge
validate-generated: _bootstrap _guard-workspace-target ## Validate generated/ files in the selected workspace environment
@echo "=== Validate (generated/) [$(ENV)] ==="
@cd "$(ENV_DIR)" && if [ -f generated/masking_functions.sql ]; then \
python "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars generated/masking_functions.sql $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
python3 "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars generated/masking_functions.sql $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
else \
python "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
python3 "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
fi

validate: _bootstrap ## Validate split config in the selected environment
@echo "=== Validate ($(ENV)) ==="
@if [ "$(ENV)" = "$(ACCOUNT_ENV)" ]; then \
cd "$(ACCOUNT_ENV_DIR)" && if [ -f abac.auto.tfvars ]; then python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); fi; \
cd "$(ACCOUNT_ENV_DIR)" && if [ -f abac.auto.tfvars ]; then python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); fi; \
else \
cd "$(ENV_DIR)/$(DATA_ACCESS_SUBDIR)" && if [ -f abac.auto.tfvars ]; then python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars masking_functions.sql $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); fi; \
cd "$(ENV_DIR)" && python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
cd "$(ENV_DIR)/$(DATA_ACCESS_SUBDIR)" && if [ -f abac.auto.tfvars ]; then python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars masking_functions.sql $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); fi; \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars $(if $(COUNTRY),--country $(COUNTRY),) $(if $(INDUSTRY),--industry $(INDUSTRY),); \
fi

promote: _bootstrap _guard-workspace-target ## Split generated/ into layers (same-env), or cross-env: make promote SOURCE_ENV=dev DEST_ENV=prod DEST_CATALOG_MAP="dev_cat=prod_cat"
Expand Down Expand Up @@ -264,9 +264,9 @@ print(', '.join(cats) if cats else '(none detected)') \
$$map_flags; \
cd "$$dest_env_dir" && python3 "$(SHARED_ROOT)/scripts/split_abac_config.py" generated/abac.auto.tfvars ../$(ACCOUNT_ENV)/abac.auto.tfvars $(DATA_ACCESS_SUBDIR)/abac.auto.tfvars abac.auto.tfvars; \
cp "$$dest_env_dir/generated/masking_functions.sql" "$$dest_env_dir/$(DATA_ACCESS_SUBDIR)/masking_functions.sql"; \
cd "$(ACCOUNT_ENV_DIR)" && if [ -f abac.auto.tfvars ]; then python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; fi; \
cd "$$dest_env_dir/$(DATA_ACCESS_SUBDIR)" && if [ -f abac.auto.tfvars ]; then python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars masking_functions.sql; fi; \
cd "$$dest_env_dir" && python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; \
cd "$(ACCOUNT_ENV_DIR)" && if [ -f abac.auto.tfvars ]; then python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; fi; \
cd "$$dest_env_dir/$(DATA_ACCESS_SUBDIR)" && if [ -f abac.auto.tfvars ]; then python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars masking_functions.sql; fi; \
cd "$$dest_env_dir" && python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; \
echo ""; \
echo "=== Promote complete: $(SOURCE_ENV) -> $(DEST_ENV) ==="; \
echo " Next: edit $$dest_env_dir/auth.auto.tfvars ($(DEST_ENV) workspace credentials)"; \
Expand All @@ -281,17 +281,17 @@ print(', '.join(cats) if cats else '(none detected)') \
fi; \
echo "=== Split generated/ to account + data_access + workspace ($(ENV)) ==="; \
if [ -f "$(ENV_DIR)/generated/masking_functions.sql" ]; then \
cd "$(ENV_DIR)" && python "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars generated/masking_functions.sql; \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars generated/masking_functions.sql; \
else \
cd "$(ENV_DIR)" && python "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars; \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/validate_abac.py" generated/abac.auto.tfvars; \
fi; \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/scripts/split_abac_config.py" generated/abac.auto.tfvars ../$(ACCOUNT_ENV)/abac.auto.tfvars $(DATA_ACCESS_SUBDIR)/abac.auto.tfvars abac.auto.tfvars; \
if [ -f "$(ENV_DIR)/generated/masking_functions.sql" ]; then \
cp "$(ENV_DIR)/generated/masking_functions.sql" "$(ENV_DIR)/$(DATA_ACCESS_SUBDIR)/masking_functions.sql"; \
fi; \
cd "$(ACCOUNT_ENV_DIR)" && if [ -f abac.auto.tfvars ]; then python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; fi; \
cd "$(ENV_DIR)/$(DATA_ACCESS_SUBDIR)" && if [ -f abac.auto.tfvars ]; then python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars masking_functions.sql; fi; \
cd "$(ENV_DIR)" && python "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; \
cd "$(ACCOUNT_ENV_DIR)" && if [ -f abac.auto.tfvars ]; then python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; fi; \
cd "$(ENV_DIR)/$(DATA_ACCESS_SUBDIR)" && if [ -f abac.auto.tfvars ]; then python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars masking_functions.sql; fi; \
cd "$(ENV_DIR)" && python3 "$(SHARED_ROOT)/validate_abac.py" abac.auto.tfvars; \
fi

_plan-layer:
Expand Down Expand Up @@ -722,7 +722,7 @@ integration-test: ## Run the full end-to-end integration test (setup → generat
@echo "========================================================"
@echo ""
@echo "── Step 1: Create test catalogs (dev + prod) ──────────"
cd "$(CLOUD_ROOT)" && python $(SHARED_ROOT)/scripts/setup_test_data.py \
cd "$(CLOUD_ROOT)" && python3 $(SHARED_ROOT)/scripts/setup_test_data.py \
--auth-file "$(ITEST_AUTH)" $(ITEST_WH_FLAG) --prod
@echo ""
@echo "── Step 2: Env scaffolding ─────────────────────────────"
Expand All @@ -738,7 +738,7 @@ integration-test: ## Run the full end-to-end integration test (setup → generat
$(MAKE) --no-print-directory apply ENV="$(ENV)"
@echo ""
@echo "── Step 6: Verify dev (row counts + ABAC governance) ───"
cd "$(CLOUD_ROOT)" && python $(SHARED_ROOT)/scripts/setup_test_data.py \
cd "$(CLOUD_ROOT)" && python3 $(SHARED_ROOT)/scripts/setup_test_data.py \
--auth-file "$(ITEST_AUTH)" $(ITEST_WH_FLAG) --verify
@echo ""
@echo "── Step 7: Per-space generation test (Finance only) ────"
Expand All @@ -753,12 +753,12 @@ integration-test: ## Run the full end-to-end integration test (setup → generat
$(MAKE) --no-print-directory apply ENV="$(ITEST_DEST_ENV)"
@echo ""
@echo "── Step 10: Verify prod (row counts + ABAC governance) ─"
cd "$(CLOUD_ROOT)" && python $(SHARED_ROOT)/scripts/setup_test_data.py \
cd "$(CLOUD_ROOT)" && python3 $(SHARED_ROOT)/scripts/setup_test_data.py \
--auth-file "$(ITEST_AUTH)" $(ITEST_WH_FLAG) --verify-prod
@echo ""
@if [ -z "$(KEEP_DATA)" ]; then \
echo "── Step 11: Teardown ────────────────────────────────────"; \
cd "$(CLOUD_ROOT)" && python $(SHARED_ROOT)/scripts/setup_test_data.py \
cd "$(CLOUD_ROOT)" && python3 $(SHARED_ROOT)/scripts/setup_test_data.py \
--auth-file "$(ITEST_AUTH)" $(ITEST_WH_FLAG) --teardown --teardown-prod; \
$(MAKE) --no-print-directory destroy ENV="$(ITEST_DEST_ENV)"; \
$(MAKE) --no-print-directory destroy ENV="$(ENV)"; \
Expand Down
109 changes: 89 additions & 20 deletions shared/examples/aus_bank_demo/setup_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,12 @@ def _create_tables_via_sdk(dev_state: dict) -> str:
wh_id = wh.id
break
if not wh_id:
from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType
wh = w.warehouses.create(
name="Demo Warehouse",
cluster_size="2X-Small",
warehouse_type="PRO",
warehouse_type=CreateWarehouseRequestWarehouseType.PRO,
max_num_clusters=1,
auto_stop_mins=15,
enable_serverless_compute=True,
).result()
Expand Down Expand Up @@ -544,31 +546,98 @@ def _create_prod_workspace(cfg: dict, cloud: str, metastore_id: str, dev_state:
else:
ws_kwargs = {
"location": region,
"managed_resource_group_id": (
f"/subscriptions/{cfg.get('AZURE_SUBSCRIPTION_ID', '')}"
f"/resourceGroups/{prod_ws_name}-managed"
),
}

print(f" Creating workspace: {prod_ws_name} in {region}...")
try:
from databricks.sdk.service.provisioning import (
CustomerFacingComputeMode,
PricingTier,
if cloud == "azure":
# Azure workspaces must be created via ARM REST API, not the account SDK
import json
import urllib.request
import urllib.error
from azure.identity import ClientSecretCredential

subscription_id = cfg.get("AZURE_SUBSCRIPTION_ID", "")
resource_group = cfg.get("AZURE_RESOURCE_GROUP", "")
arm_cred = ClientSecretCredential(
tenant_id=cfg.get("AZURE_TENANT_ID", ""),
client_id=cfg.get("AZURE_CLIENT_ID", ""),
client_secret=cfg.get("AZURE_CLIENT_SECRET", ""),
)
ws = a.workspaces.create_and_wait(
workspace_name=prod_ws_name,
pricing_tier=PricingTier.ENTERPRISE,
compute_mode=CustomerFacingComputeMode.SERVERLESS,
**ws_kwargs,
arm_token = arm_cred.get_token("https://management.azure.com/.default").token
arm_api_version = "2025-10-01-preview"

arm_url = (
f"https://management.azure.com/subscriptions/{subscription_id}"
f"/resourceGroups/{resource_group}"
f"/providers/Microsoft.Databricks/workspaces/{prod_ws_name}"
f"?api-version={arm_api_version}"
)
except (ImportError, TypeError):
# Fallback for older SDK versions without compute_mode
ws = a.workspaces.create(workspace_name=prod_ws_name, **ws_kwargs).result()
owner = cfg.get("AZURE_CLIENT_ID", cfg.get("DATABRICKS_CLIENT_ID", "unknown"))
arm_body = json.dumps({
"location": region,
"sku": {"name": "premium"},
"properties": {"computeMode": "Serverless"},
"tags": {"ManagedBy": "setup_demo", "owner": owner},
}).encode()

req = urllib.request.Request(arm_url, data=arm_body, method="PUT", headers={
"Authorization": f"Bearer {arm_token}",
"Content-Type": "application/json",
})
try:
with urllib.request.urlopen(req) as resp:
data = json.loads(resp.read())
except urllib.error.HTTPError as e:
detail = e.read().decode(errors="replace")
raise RuntimeError(f"ARM PUT {e.code}: {detail}") from e

# Poll until Succeeded
import time as _time
deadline = _time.time() + 600
while _time.time() < deadline:
_time.sleep(15)
get_req = urllib.request.Request(arm_url, headers={"Authorization": f"Bearer {arm_token}"})
try:
with urllib.request.urlopen(get_req) as resp:
data = json.loads(resp.read())
except urllib.error.HTTPError as e:
if e.code >= 500:
print(f" Transient error ({e.code}), retrying...")
continue
raise
props = data.get("properties", {})
prov_state = props.get("provisioningState", "Unknown")
elapsed = int(_time.time() - (deadline - 600))
print(f" [{elapsed}s] {prov_state}")
if prov_state == "Succeeded":
break
if prov_state in ("Failed", "Canceled"):
raise RuntimeError(f"Workspace creation {prov_state}")
else:
raise TimeoutError("Workspace did not reach Succeeded within 10 minutes")

props = data.get("properties", {})
prod_ws_id = str(props["workspaceId"])
ws_url = props["workspaceUrl"]
prod_host = f"https://{ws_url}" if not ws_url.startswith("https://") else ws_url
else:
try:
from databricks.sdk.service.provisioning import (
CustomerFacingComputeMode,
PricingTier,
)
ws = a.workspaces.create_and_wait(
workspace_name=prod_ws_name,
pricing_tier=PricingTier.ENTERPRISE,
compute_mode=CustomerFacingComputeMode.SERVERLESS,
**ws_kwargs,
)
except (ImportError, TypeError):
# Fallback for older SDK versions without compute_mode
ws = a.workspaces.create(workspace_name=prod_ws_name, **ws_kwargs).result()

prod_host = (f"https://{ws.deployment_name}.cloud.databricks.com"
if cloud == "aws" else (ws.workspace_url or ""))
prod_ws_id = str(ws.workspace_id)
prod_host = f"https://{ws.deployment_name}.cloud.databricks.com"
prod_ws_id = str(ws.workspace_id)
print(f" {_green('✓')} Prod workspace created: {prod_host}")

# Assign shared metastore
Expand Down
Loading