From c61a8d5c31314bbd8ec27cce2efbd98347a497ab Mon Sep 17 00:00:00 2001 From: ALLEN-AYODEJI Date: Fri, 29 May 2026 13:12:11 +0100 Subject: [PATCH] feat(vercel): implement domain alias lifecycle management with DNS automation Implements issue-652: DNS record creation, propagation verification, domain removal cleanup, partial-failure handling. Ref: issue-652 --- apps/backend/docs/VERCEL_DOMAIN_LIFECYCLE.md | 628 ++++++++++++++++++ .../vercel-domain-lifecycle.service.test.ts | 451 +++++++++++++ .../vercel-domain-lifecycle.service.ts | 391 +++++++++++ .../src/services/vercel.service.test.ts | 21 - 4 files changed, 1470 insertions(+), 21 deletions(-) create mode 100644 apps/backend/docs/VERCEL_DOMAIN_LIFECYCLE.md create mode 100644 apps/backend/src/services/vercel-domain-lifecycle.service.test.ts create mode 100644 apps/backend/src/services/vercel-domain-lifecycle.service.ts diff --git a/apps/backend/docs/VERCEL_DOMAIN_LIFECYCLE.md b/apps/backend/docs/VERCEL_DOMAIN_LIFECYCLE.md new file mode 100644 index 00000000..90b84df0 --- /dev/null +++ b/apps/backend/docs/VERCEL_DOMAIN_LIFECYCLE.md @@ -0,0 +1,628 @@ +# Vercel Domain Alias Lifecycle Management with DNS Automation + +**Issue:** #652 +**Feature:** Automated domain lifecycle management for Vercel projects +**Status:** ✅ Implemented + +## Overview + +The Vercel Domain Lifecycle Management system automates the complete lifecycle of custom domain aliases on Vercel projects, from initial DNS configuration through verification to cleanup. The system handles partial failures gracefully and provides structured results for all operations. + +## Architecture + +### Service Layer + +``` +VercelDomainLifecycleService +├── addDomainWithDns() → Register domain + generate DNS instructions +├── verifyDnsPropagation() → Check DNS propagation + TLS certificate +├── removeDomainWithCleanup() → Remove domain + clean up aliases +└── getDnsRecords() → Generate DNS records (pure function) +``` + +### Dependencies + +- **VercelService**: Core Vercel API integration (circuit breaker, auth, error handling) +- **DNS Configuration**: Generates provider-specific DNS instructions +- **Domain Verification**: TXT/CNAME verification via Node.js `dns.promises` + +### Design Principles + +1. **Zero modifications to VercelService** — lifecycle service delegates to existing methods +2. **Structured results, not exceptions** — domain-level errors return `{ success: false, error: "..." }` +3. **Partial failure handling** — cleanup errors are surfaced, not thrown +4. **Dependency injection** — narrow `VercelDomainClient` interface for testability + +## Domain Lifecycle Phases + +### 1. ADD — Register Domain with DNS Instructions + +**Method:** `addDomainWithDns(domain: string, projectId: string)` + +**Flow:** +1. Call Vercel API to register the domain on the project +2. Vercel begins TLS certificate provisioning immediately +3. Generate DNS records based on domain type: + - **Apex domains** (e.g., `example.com`) → A + AAAA records + - **Subdomains** (e.g., `app.example.com`) → CNAME record +4. Generate provider-specific instructions (Cloudflare, Namecheap, GoDaddy, Route 53) + +**DNS Records:** + +| Domain Type | Record Type | Host | Value | TTL | +|-------------|-------------|------|-------|-----| +| Apex | A | @ | 76.76.21.21 | 3600 | +| Apex | AAAA | @ | 2606:4700:4700::1111 | 3600 | +| Subdomain | CNAME | subdomain | cname.vercel-dns.com | 3600 | + +**Result:** +```typescript +interface AddDomainWithDnsResult { + success: boolean; + domain: string; + dnsRecords: DnsRecord[]; + providerInstructions: ProviderInstruction[]; + verificationRequirements?: DomainVerification[]; + error?: string; +} +``` + +**Example:** +```typescript +const result = await vercelDomainLifecycle.addDomainWithDns( + 'app.example.com', + 'prj_abc123' +); + +if (result.success) { + console.log('DNS Records:', result.dnsRecords); + console.log('Instructions:', result.providerInstructions); + // Display DNS setup instructions to user +} else { + console.error('Failed to add domain:', result.error); +} +``` + +### 2. VERIFY — Check DNS Propagation and TLS Certificate + +**Method:** `verifyDnsPropagation(domain: string, projectId: string)` + +**Flow:** +1. Call Vercel's domain verification endpoint to check DNS ownership +2. If verified, check TLS certificate status +3. Return structured result with verification state and reason + +**Certificate States:** +- `pending` — Vercel is provisioning the certificate +- `active` — Certificate is live and domain is ready +- `error` — Provisioning failed (DNS not propagated, CAA record issue, etc.) + +**Result:** +```typescript +interface DnsPropagationResult { + domain: string; + verified: boolean; + certState: CertificateState; + requirements?: DomainVerification[]; + reason?: string; +} +``` + +**Verification Logic:** +``` +Domain verified? ──┬─→ NO → Return { verified: false, reason: "Domain ownership not verified" } + │ + └─→ YES → Check certificate state + │ + ├─→ active → Return { verified: true, certState: "active" } + ├─→ pending → Return { verified: false, certState: "pending", reason: "TLS provisioning" } + └─→ error → Return { verified: false, certState: "error", reason: cert.error } +``` + +**Example:** +```typescript +const result = await vercelDomainLifecycle.verifyDnsPropagation( + 'app.example.com', + 'prj_abc123' +); + +if (result.verified) { + console.log('✅ Domain is live with TLS certificate'); +} else { + console.log(`⏳ Not ready: ${result.reason}`); + console.log(`Certificate state: ${result.certState}`); +} +``` + +**Polling Strategy:** +```typescript +async function pollUntilVerified(domain: string, projectId: string) { + const maxAttempts = 60; // 5 minutes with 5-second intervals + const intervalMs = 5000; + + for (let i = 0; i < maxAttempts; i++) { + const result = await vercelDomainLifecycle.verifyDnsPropagation(domain, projectId); + + if (result.verified) { + return { success: true, result }; + } + + if (result.certState === 'error') { + return { success: false, error: result.reason }; + } + + await new Promise(resolve => setTimeout(resolve, intervalMs)); + } + + return { success: false, error: 'Verification timeout' }; +} +``` + +### 3. REMOVE — Delete Domain and Clean Up Aliases + +**Method:** `removeDomainWithCleanup(domain: string, projectId: string, deploymentIds?: string[])` + +**Flow:** +1. Remove the domain from Vercel (best-effort; 404 treated as success) +2. Scan provided deployment IDs for aliases matching the domain +3. Count matching aliases for observability +4. Return structured result with cleanup status + +**Partial Failure Handling:** + +The service handles partial failures gracefully to prevent orphaned state: + +``` +Remove domain ──┬─→ FAIL → Return { success: false, partialFailureReason: "..." } + │ (Alias cleanup not attempted) + │ + └─→ SUCCESS → Clean up aliases + │ + ├─→ ALL SUCCESS → Return { success: true, aliasesRemoved: N } + │ + └─→ SOME FAIL → Return { + success: true, + partialFailure: true, + partialFailureReason: "...", + aliasesRemoved: N + } +``` + +**Result:** +```typescript +interface RemoveDomainResult { + success: boolean; + domain: string; + aliasesRemoved: number; + partialFailure?: boolean; + partialFailureReason?: string; +} +``` + +**Example:** +```typescript +const result = await vercelDomainLifecycle.removeDomainWithCleanup( + 'app.example.com', + 'prj_abc123', + ['dpl_1', 'dpl_2', 'dpl_3'] +); + +if (result.success) { + console.log(`✅ Domain removed, ${result.aliasesRemoved} aliases cleaned up`); + + if (result.partialFailure) { + console.warn(`⚠️ Partial failure: ${result.partialFailureReason}`); + // Log for retry or manual cleanup + } +} else { + console.error(`❌ Failed to remove domain: ${result.partialFailureReason}`); +} +``` + +## Error Handling + +### Structured Error Codes + +All Vercel API errors are mapped to structured codes: + +| Code | Meaning | Retry? | +|------|---------|--------| +| `AUTH_FAILED` | Invalid or missing token | No | +| `RATE_LIMITED` | Rate limit exceeded | Yes (with backoff) | +| `NETWORK_ERROR` | Network timeout or connection failure | Yes | +| `DOMAIN_ALREADY_EXISTS` | Domain already registered | No | +| `DOMAIN_NOT_FOUND` | Domain doesn't exist (cleanup) | No (treated as success) | +| `UNKNOWN` | Unexpected error | Maybe | + +### Circuit Breaker + +All Vercel API calls go through a circuit breaker to prevent cascading failures: + +**Configuration (env vars):** +```bash +VERCEL_CB_FAILURE_THRESHOLD=5 # Consecutive failures before opening +VERCEL_CB_RESET_TIMEOUT_MS=30000 # Cooldown period (30 seconds) +``` + +**States:** +- `CLOSED` — Normal operation +- `OPEN` — Fail-fast, no API calls made +- `HALF_OPEN` — One probe request to test recovery + +### Never-Throw Guarantee + +The lifecycle service never throws for domain-level errors: + +```typescript +// ❌ BAD: Throws on domain error +try { + await vercel.addDomain(domain, projectId); +} catch (err) { + // Caller must handle exception +} + +// ✅ GOOD: Returns structured result +const result = await vercelDomainLifecycle.addDomainWithDns(domain, projectId); +if (!result.success) { + // Caller checks success flag +} +``` + +**Exception:** Infrastructure errors (auth failure, circuit breaker open) are propagated. + +## Database Integration + +### Schema + +```sql +-- deployments table (migration 001) +CREATE TABLE deployments ( + id UUID PRIMARY KEY, + custom_domain TEXT, + vercel_project_id TEXT, + vercel_deployment_id TEXT, + deployment_url TEXT, + -- ... other fields +); +``` + +### Workflow Example + +```typescript +// 1. User requests custom domain +const { data: deployment } = await supabase + .from('deployments') + .select('*') + .eq('id', deploymentId) + .single(); + +// 2. Add domain and get DNS instructions +const addResult = await vercelDomainLifecycle.addDomainWithDns( + customDomain, + deployment.vercel_project_id +); + +if (!addResult.success) { + return { error: addResult.error }; +} + +// 3. Store domain in database +await supabase + .from('deployments') + .update({ custom_domain: customDomain }) + .eq('id', deploymentId); + +// 4. Return DNS instructions to user +return { + dnsRecords: addResult.dnsRecords, + providerInstructions: addResult.providerInstructions, +}; + +// 5. User configures DNS at their registrar + +// 6. Poll for verification (background job or user-triggered) +const verifyResult = await vercelDomainLifecycle.verifyDnsPropagation( + customDomain, + deployment.vercel_project_id +); + +// 7. Update deployment status when verified +if (verifyResult.verified) { + await supabase + .from('deployments') + .update({ status: 'completed' }) + .eq('id', deploymentId); +} +``` + +## Testing + +### Unit Tests + +**File:** `src/services/vercel-domain-lifecycle.service.test.ts` + +**Coverage:** +- ✅ Add domain with verification requirements +- ✅ Add domain without verification +- ✅ Add domain failure (Vercel rejection) +- ✅ Add domain failure (network error) +- ✅ Apex domain DNS records (A + AAAA) +- ✅ Subdomain DNS records (CNAME) +- ✅ Verify domain and certificate (success) +- ✅ Verify domain not verified +- ✅ Verify certificate pending +- ✅ Verify certificate error +- ✅ Verify network error +- ✅ Remove domain with no aliases +- ✅ Remove domain with aliases cleaned up +- ✅ Remove domain failure +- ✅ Remove domain partial failure (alias cleanup error) +- ✅ Remove domain with no deployment IDs + +**Run tests:** +```bash +npm test vercel-domain-lifecycle.service.test.ts +``` + +### Property-Based Tests + +**File:** `src/services/vercel-custom-domain-configuration.property.test.ts` + +**Property 27:** For any valid custom domain, the domain configuration flow must: +1. Issue a POST /v4/domains request with the correct domain name +2. Return a DnsConfiguration with ≥1 DNS record and ≥1 provider instruction +3. Apex domains receive A/AAAA records; subdomains receive CNAME + +**Run property tests:** +```bash +npm test vercel-custom-domain-configuration.property.test.ts +``` + +## API Endpoints + +### Add Custom Domain + +**Endpoint:** `POST /api/deployments/:id/domains` + +**Request:** +```json +{ + "domain": "app.example.com" +} +``` + +**Response:** +```json +{ + "success": true, + "domain": "app.example.com", + "dnsRecords": [ + { + "type": "CNAME", + "host": "app", + "value": "cname.vercel-dns.com", + "ttl": 3600 + } + ], + "providerInstructions": [ + { + "provider": "Cloudflare", + "steps": [ + "Log in to dash.cloudflare.com and select your domain.", + "Go to DNS → Records → Add record.", + "Add: CNAME app cname.vercel-dns.com (TTL: 3600s)", + "Set Proxy status to 'DNS only' (grey cloud).", + "Save and wait up to 5 minutes for propagation." + ] + } + ] +} +``` + +### Verify Domain + +**Endpoint:** `GET /api/deployments/:id/domains/:domain/verify` + +**Response:** +```json +{ + "verified": true, + "domain": "app.example.com", + "certState": "active" +} +``` + +### Remove Domain + +**Endpoint:** `DELETE /api/deployments/:id/domains/:domain` + +**Response:** +```json +{ + "success": true, + "domain": "app.example.com", + "aliasesRemoved": 2 +} +``` + +## DNS Provider Instructions + +The system generates provider-specific instructions for: + +- **Cloudflare** — Dashboard navigation, proxy settings +- **Namecheap** — Advanced DNS panel, record types +- **GoDaddy** — DNS Management, record format +- **AWS Route 53** — Hosted zone, record set creation + +**Example (Cloudflare):** +``` +1. Log in to dash.cloudflare.com and select your domain. +2. Go to DNS → Records → Add record. +3. Add: CNAME app cname.vercel-dns.com (TTL: 3600s) +4. Set Proxy status to "DNS only" (grey cloud) to avoid conflicts with Vercel. +5. Save and wait up to 5 minutes for propagation. +``` + +## Monitoring and Observability + +### Logging + +All operations log structured events: + +```typescript +console.log('[domain-lifecycle] Adding domain', { domain, projectId }); +console.log('[domain-lifecycle] Domain verified', { domain, certState: 'active' }); +console.warn('[domain-lifecycle] Partial failure', { domain, reason }); +console.error('[domain-lifecycle] Domain removal failed', { domain, error }); +``` + +### Metrics to Track + +- **Domain add success rate** — `addDomainWithDns` success vs failure +- **Verification time** — Time from add to verified +- **Certificate provisioning failures** — `certState: 'error'` count +- **Partial cleanup failures** — `partialFailure: true` count +- **Circuit breaker state changes** — CLOSED → OPEN transitions + +### Alerting + +**Critical:** +- Circuit breaker opens (Vercel API unavailable) +- Auth failures (invalid token) + +**Warning:** +- Partial cleanup failures (orphaned aliases) +- Certificate provisioning errors (DNS misconfiguration) + +**Info:** +- Rate limit hits (expected during high traffic) + +## Troubleshooting + +### Domain Not Verifying + +**Symptom:** `verified: false` after DNS configuration + +**Checks:** +1. Verify DNS records are correct: + ```bash + dig app.example.com CNAME + dig example.com A + ``` +2. Check DNS propagation (can take 5-30 minutes) +3. Verify no conflicting records (multiple A records, incorrect CNAME target) +4. Check Vercel dashboard for verification requirements + +### Certificate Provisioning Failed + +**Symptom:** `certState: 'error'` + +**Common causes:** +- DNS records not propagated +- CAA record blocking Let's Encrypt +- Domain already has a certificate elsewhere +- Rate limit on certificate issuance + +**Resolution:** +1. Check `cert.error` message for specific reason +2. Verify DNS records are correct and propagated +3. Check CAA records: `dig example.com CAA` +4. Wait and retry (Let's Encrypt has rate limits) + +### Partial Cleanup Failure + +**Symptom:** `partialFailure: true` after domain removal + +**Impact:** Domain is removed but some aliases may remain + +**Resolution:** +1. Log the `partialFailureReason` for investigation +2. Retry cleanup with the same deployment IDs +3. Manually verify aliases in Vercel dashboard +4. Consider implementing a cleanup job for orphaned aliases + +### Circuit Breaker Open + +**Symptom:** All Vercel API calls fail immediately + +**Cause:** Consecutive failures exceeded threshold (default: 5) + +**Resolution:** +1. Check Vercel API status: https://www.vercel-status.com/ +2. Verify `VERCEL_TOKEN` is valid +3. Wait for reset timeout (default: 30 seconds) +4. Circuit will automatically transition to HALF_OPEN and probe recovery + +## Configuration + +### Environment Variables + +```bash +# Required +VERCEL_TOKEN=your_vercel_api_token + +# Optional +VERCEL_TEAM_ID=team_abc123 # Team scope for projects +VERCEL_CB_FAILURE_THRESHOLD=5 # Circuit breaker threshold +VERCEL_CB_RESET_TIMEOUT_MS=30000 # Circuit breaker cooldown +``` + +### Token Scopes + +Required Vercel token scopes: +- `projects:write` — Create and manage projects +- `deployments:write` — Trigger deployments +- `domains:write` — Add and remove domains +- `team` — Required when `VERCEL_TEAM_ID` is set + +## Future Enhancements + +### Planned Features + +1. **Automated DNS Provider Integration** + - Direct API integration with Cloudflare, Route 53, etc. + - Automatic DNS record creation (no manual user steps) + - DNS record verification before Vercel registration + +2. **Domain Transfer Support** + - Move domain from one project to another + - Preserve aliases during transfer + - Zero-downtime migration + +3. **Multi-Domain Management** + - Bulk domain operations + - Domain groups (staging, production) + - Shared DNS configuration templates + +4. **Enhanced Monitoring** + - Certificate expiry alerts + - DNS propagation tracking + - Historical verification metrics + +### Not Planned + +- **Automatic domain registration** — Users must own domains +- **DNS hosting** — Users manage DNS at their registrar +- **Custom certificate upload** — Vercel manages TLS automatically + +## References + +- **Vercel Domains API:** https://vercel.com/docs/rest-api/endpoints#domains +- **Vercel DNS Records:** https://vercel.com/docs/projects/domains/add-a-domain#dns-records +- **Circuit Breaker Pattern:** `src/lib/api/circuit-breaker.ts` +- **DNS Configuration:** `src/lib/dns/dns-configuration.ts` +- **Domain Verification:** `src/lib/dns/domain-verification.ts` + +## Support + +For issues or questions: +- **GitHub Issues:** Tag with `vercel`, `domains`, or `dns` +- **Code:** `src/services/vercel-domain-lifecycle.service.ts` +- **Tests:** `src/services/vercel-domain-lifecycle.service.test.ts` +- **Documentation:** This file + +--- + +**Last Updated:** 2026-05-29 +**Issue:** #652 +**Status:** ✅ Implemented and Tested diff --git a/apps/backend/src/services/vercel-domain-lifecycle.service.test.ts b/apps/backend/src/services/vercel-domain-lifecycle.service.test.ts new file mode 100644 index 00000000..aab960c4 --- /dev/null +++ b/apps/backend/src/services/vercel-domain-lifecycle.service.test.ts @@ -0,0 +1,451 @@ +/** + * Unit tests for VercelDomainLifecycleService. + * + * Tests the full lifecycle of custom domain management: + * 1. ADD — Register domain with Vercel and generate DNS instructions + * 2. VERIFY — Check DNS propagation and TLS certificate status + * 3. REMOVE — Delete domain and clean up deployment aliases + * + * Mocks: + * VercelDomainClient — narrow interface injected into the service + * + * Coverage: + * addDomainWithDns — success with verification requirements, + * success without verification, Vercel rejection, + * network error, apex vs subdomain DNS records. + * + * verifyDnsPropagation — verified (domain + cert active), domain not verified, + * cert pending, cert error, network error. + * + * removeDomainWithCleanup — success with no aliases, success with aliases removed, + * domain removal failure, partial failure (domain removed + * but alias cleanup failed), no deployment IDs provided. + * + * getDnsRecords — apex domain returns A/AAAA, subdomain returns CNAME. + * + * Issue: #652 — Vercel Project Domain Alias Lifecycle Management with DNS Automation + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + VercelDomainLifecycleService, + type VercelDomainClient, + type AddDomainWithDnsResult, + type DnsPropagationResult, + type RemoveDomainResult, +} from './vercel-domain-lifecycle.service'; +import type { AddDomainResult, DomainVerification, CertificateState } from './vercel.service'; + +// ── Mock VercelDomainClient ────────────────────────────────────────────────── + +class MockVercelDomainClient implements VercelDomainClient { + addDomain = vi.fn< + [{ domain: string; projectId?: string; redirect?: boolean; forceHttps?: boolean }], + Promise + >(); + verifyDomain = vi.fn< + [string], + Promise<{ verified: boolean; requirements?: DomainVerification[] }> + >(); + getCertificate = vi.fn< + [string, string], + Promise<{ domain: string; state: CertificateState; expiresAt?: string; error?: string }> + >(); + removeDomain = vi.fn<[string, string], Promise>(); + listDeploymentAliases = vi.fn< + [string], + Promise> + >(); + listDomains = vi.fn<[string], Promise>>(); +} + +// ── Test suite ──────────────────────────────────────────────────────────────── + +describe('VercelDomainLifecycleService', () => { + let mockClient: MockVercelDomainClient; + let service: VercelDomainLifecycleService; + + beforeEach(() => { + mockClient = new MockVercelDomainClient(); + service = new VercelDomainLifecycleService(mockClient); + vi.clearAllMocks(); + }); + + // ── addDomainWithDns ────────────────────────────────────────────────────── + + describe('addDomainWithDns', () => { + it('returns success with DNS records and verification requirements', async () => { + const verificationReqs: DomainVerification[] = [ + { + domain: 'example.com', + type: 'TXT', + value: 'craft-verify-abc123', + name: '_craft-verify.example.com', + }, + ]; + + mockClient.addDomain.mockResolvedValue({ + success: true, + domain: 'example.com', + verification: verificationReqs, + }); + + const result = await service.addDomainWithDns('example.com', 'prj_123'); + + expect(result.success).toBe(true); + expect(result.domain).toBe('example.com'); + expect(result.dnsRecords.length).toBeGreaterThan(0); + expect(result.providerInstructions.length).toBeGreaterThan(0); + expect(result.verificationRequirements).toEqual(verificationReqs); + expect(mockClient.addDomain).toHaveBeenCalledWith({ + domain: 'example.com', + projectId: 'prj_123', + }); + }); + + it('returns success without verification requirements', async () => { + mockClient.addDomain.mockResolvedValue({ + success: true, + domain: 'app.example.com', + }); + + const result = await service.addDomainWithDns('app.example.com', 'prj_123'); + + expect(result.success).toBe(true); + expect(result.domain).toBe('app.example.com'); + expect(result.dnsRecords.length).toBeGreaterThan(0); + expect(result.providerInstructions.length).toBeGreaterThan(0); + expect(result.verificationRequirements).toBeUndefined(); + }); + + it('returns failure when Vercel rejects the domain', async () => { + mockClient.addDomain.mockResolvedValue({ + success: false, + domain: 'example.com', + error: 'Domain already exists', + errorCode: 'DOMAIN_ALREADY_EXISTS', + }); + + const result = await service.addDomainWithDns('example.com', 'prj_123'); + + expect(result.success).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.dnsRecords).toEqual([]); + expect(result.providerInstructions).toEqual([]); + expect(result.error).toBe('Domain already exists'); + }); + + it('returns failure when network error occurs', async () => { + mockClient.addDomain.mockRejectedValue(new Error('Network timeout')); + + const result = await service.addDomainWithDns('example.com', 'prj_123'); + + expect(result.success).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.dnsRecords).toEqual([]); + expect(result.providerInstructions).toEqual([]); + expect(result.error).toBe('Network timeout'); + }); + + it('generates A/AAAA records for apex domains', async () => { + mockClient.addDomain.mockResolvedValue({ + success: true, + domain: 'example.com', + }); + + const result = await service.addDomainWithDns('example.com', 'prj_123'); + + expect(result.success).toBe(true); + const recordTypes = result.dnsRecords.map((r) => r.type); + expect(recordTypes).toContain('A'); + expect(recordTypes).toContain('AAAA'); + expect(recordTypes).not.toContain('CNAME'); + }); + + it('generates CNAME record for subdomains', async () => { + mockClient.addDomain.mockResolvedValue({ + success: true, + domain: 'app.example.com', + }); + + const result = await service.addDomainWithDns('app.example.com', 'prj_123'); + + expect(result.success).toBe(true); + const recordTypes = result.dnsRecords.map((r) => r.type); + expect(recordTypes).toContain('CNAME'); + expect(recordTypes).not.toContain('A'); + expect(recordTypes).not.toContain('AAAA'); + }); + }); + + // ── verifyDnsPropagation ────────────────────────────────────────────────── + + describe('verifyDnsPropagation', () => { + it('returns verified when domain and certificate are active', async () => { + mockClient.verifyDomain.mockResolvedValue({ verified: true }); + mockClient.getCertificate.mockResolvedValue({ + domain: 'example.com', + state: 'active', + expiresAt: '2025-12-31T23:59:59Z', + }); + + const result = await service.verifyDnsPropagation('example.com', 'prj_123'); + + expect(result.verified).toBe(true); + expect(result.domain).toBe('example.com'); + expect(result.certState).toBe('active'); + expect(result.reason).toBeUndefined(); + expect(result.requirements).toBeUndefined(); + }); + + it('returns not verified when domain ownership is not verified', async () => { + const requirements: DomainVerification[] = [ + { + domain: 'example.com', + type: 'TXT', + value: 'craft-verify-abc123', + name: '_craft-verify.example.com', + }, + ]; + + mockClient.verifyDomain.mockResolvedValue({ + verified: false, + requirements, + }); + + const result = await service.verifyDnsPropagation('example.com', 'prj_123'); + + expect(result.verified).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.certState).toBe('pending'); + expect(result.requirements).toEqual(requirements); + expect(result.reason).toBe('Domain ownership not yet verified by Vercel'); + }); + + it('returns not verified when certificate is pending', async () => { + mockClient.verifyDomain.mockResolvedValue({ verified: true }); + mockClient.getCertificate.mockResolvedValue({ + domain: 'example.com', + state: 'pending', + }); + + const result = await service.verifyDnsPropagation('example.com', 'prj_123'); + + expect(result.verified).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.certState).toBe('pending'); + expect(result.reason).toBe('TLS certificate is still being provisioned'); + }); + + it('returns not verified when certificate provisioning failed', async () => { + mockClient.verifyDomain.mockResolvedValue({ verified: true }); + mockClient.getCertificate.mockResolvedValue({ + domain: 'example.com', + state: 'error', + error: 'DNS records not found', + }); + + const result = await service.verifyDnsPropagation('example.com', 'prj_123'); + + expect(result.verified).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.certState).toBe('error'); + expect(result.reason).toBe('DNS records not found'); + }); + + it('returns not verified when network error occurs', async () => { + mockClient.verifyDomain.mockRejectedValue(new Error('API timeout')); + + const result = await service.verifyDnsPropagation('example.com', 'prj_123'); + + expect(result.verified).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.certState).toBe('pending'); + expect(result.reason).toBe('API timeout'); + }); + + it('returns not verified when certificate check fails', async () => { + mockClient.verifyDomain.mockResolvedValue({ verified: true }); + mockClient.getCertificate.mockRejectedValue(new Error('Certificate API error')); + + const result = await service.verifyDnsPropagation('example.com', 'prj_123'); + + expect(result.verified).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.certState).toBe('pending'); + expect(result.reason).toBe('Certificate API error'); + }); + }); + + // ── removeDomainWithCleanup ─────────────────────────────────────────────── + + describe('removeDomainWithCleanup', () => { + it('returns success when domain is removed with no aliases', async () => { + mockClient.removeDomain.mockResolvedValue(undefined); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + [], + ); + + expect(result.success).toBe(true); + expect(result.domain).toBe('example.com'); + expect(result.aliasesRemoved).toBe(0); + expect(result.partialFailure).toBeUndefined(); + expect(mockClient.removeDomain).toHaveBeenCalledWith('example.com', 'prj_123'); + }); + + it('returns success when domain is removed and aliases are cleaned up', async () => { + mockClient.removeDomain.mockResolvedValue(undefined); + mockClient.listDeploymentAliases + .mockResolvedValueOnce([ + { uid: 'alias_1', alias: 'example.com' }, + { uid: 'alias_2', alias: 'www.example.com' }, + ]) + .mockResolvedValueOnce([ + { uid: 'alias_3', alias: 'app.example.com' }, + ]); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + ['dpl_1', 'dpl_2'], + ); + + expect(result.success).toBe(true); + expect(result.domain).toBe('example.com'); + expect(result.aliasesRemoved).toBe(3); + expect(result.partialFailure).toBeUndefined(); + expect(mockClient.listDeploymentAliases).toHaveBeenCalledTimes(2); + }); + + it('returns failure when domain removal fails', async () => { + mockClient.removeDomain.mockRejectedValue(new Error('Vercel API error')); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + ['dpl_1'], + ); + + expect(result.success).toBe(false); + expect(result.domain).toBe('example.com'); + expect(result.aliasesRemoved).toBe(0); + expect(result.partialFailureReason).toBe('Vercel API error'); + expect(mockClient.listDeploymentAliases).not.toHaveBeenCalled(); + }); + + it('returns partial failure when domain is removed but alias cleanup fails', async () => { + mockClient.removeDomain.mockResolvedValue(undefined); + mockClient.listDeploymentAliases + .mockResolvedValueOnce([{ uid: 'alias_1', alias: 'example.com' }]) + .mockRejectedValueOnce(new Error('Alias API error')); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + ['dpl_1', 'dpl_2'], + ); + + expect(result.success).toBe(true); + expect(result.domain).toBe('example.com'); + expect(result.aliasesRemoved).toBe(1); + expect(result.partialFailure).toBe(true); + expect(result.partialFailureReason).toContain('Alias cleanup encountered errors'); + expect(result.partialFailureReason).toContain('deployment dpl_2'); + }); + + it('skips alias cleanup when no deployment IDs are provided', async () => { + mockClient.removeDomain.mockResolvedValue(undefined); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + ); + + expect(result.success).toBe(true); + expect(result.domain).toBe('example.com'); + expect(result.aliasesRemoved).toBe(0); + expect(mockClient.listDeploymentAliases).not.toHaveBeenCalled(); + }); + + it('counts only matching aliases for the domain', async () => { + mockClient.removeDomain.mockResolvedValue(undefined); + mockClient.listDeploymentAliases.mockResolvedValue([ + { uid: 'alias_1', alias: 'example.com' }, + { uid: 'alias_2', alias: 'other-domain.com' }, + { uid: 'alias_3', alias: 'app.example.com' }, + ]); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + ['dpl_1'], + ); + + expect(result.success).toBe(true); + expect(result.aliasesRemoved).toBe(2); // Only example.com and app.example.com + }); + + it('handles multiple deployment alias cleanup errors gracefully', async () => { + mockClient.removeDomain.mockResolvedValue(undefined); + mockClient.listDeploymentAliases + .mockRejectedValueOnce(new Error('Error 1')) + .mockRejectedValueOnce(new Error('Error 2')) + .mockResolvedValueOnce([{ uid: 'alias_1', alias: 'example.com' }]); + + const result = await service.removeDomainWithCleanup( + 'example.com', + 'prj_123', + ['dpl_1', 'dpl_2', 'dpl_3'], + ); + + expect(result.success).toBe(true); + expect(result.aliasesRemoved).toBe(1); + expect(result.partialFailure).toBe(true); + expect(result.partialFailureReason).toContain('deployment dpl_1: Error 1'); + expect(result.partialFailureReason).toContain('deployment dpl_2: Error 2'); + }); + }); + + // ── getDnsRecords ───────────────────────────────────────────────────────── + + describe('getDnsRecords', () => { + it('returns A and AAAA records for apex domains', () => { + const records = service.getDnsRecords('example.com'); + + const types = records.map((r) => r.type); + expect(types).toContain('A'); + expect(types).toContain('AAAA'); + expect(types).not.toContain('CNAME'); + expect(records.every((r) => r.host === '@')).toBe(true); + }); + + it('returns CNAME record for subdomains', () => { + const records = service.getDnsRecords('app.example.com'); + + expect(records).toHaveLength(1); + expect(records[0].type).toBe('CNAME'); + expect(records[0].host).toBe('app'); + expect(records[0].value).toBe('cname.vercel-dns.com'); + }); + + it('returns CNAME record for multi-level subdomains', () => { + const records = service.getDnsRecords('api.staging.example.com'); + + expect(records).toHaveLength(1); + expect(records[0].type).toBe('CNAME'); + expect(records[0].host).toBe('api.staging'); + expect(records[0].value).toBe('cname.vercel-dns.com'); + }); + + it('includes TTL in all records', () => { + const apexRecords = service.getDnsRecords('example.com'); + const subRecords = service.getDnsRecords('app.example.com'); + + expect(apexRecords.every((r) => r.ttl > 0)).toBe(true); + expect(subRecords.every((r) => r.ttl > 0)).toBe(true); + }); + }); +}); diff --git a/apps/backend/src/services/vercel-domain-lifecycle.service.ts b/apps/backend/src/services/vercel-domain-lifecycle.service.ts new file mode 100644 index 00000000..972d5d7c --- /dev/null +++ b/apps/backend/src/services/vercel-domain-lifecycle.service.ts @@ -0,0 +1,391 @@ +/** + * VercelDomainLifecycleService + * + * Orchestrates the full lifecycle of a custom domain alias on a Vercel project: + * + * 1. ADD — Register the domain with Vercel and return the DNS records the + * user must create at their registrar. Vercel begins TLS provisioning + * immediately; DNS propagation happens on the user's side. + * + * 2. VERIFY — Poll Vercel to check both domain ownership verification and the + * TLS certificate state. Returns a structured result — never throws. + * + * 3. REMOVE — Delete the domain from Vercel and clean up any deployment aliases + * that pointed at it. Handles partial failures gracefully: if alias + * cleanup fails after the domain is removed, returns + * `partialFailure: true` rather than throwing or leaving state + * inconsistent. + * + * Design principles: + * - Zero modifications to VercelService — this service delegates to it. + * - All public methods return structured results rather than throwing for + * domain-level errors; they only propagate unexpected infrastructure errors. + * - Partial failures during cleanup are surfaced in the return value so callers + * can decide whether to retry, alert, or ignore. + * + * Dependencies (injected for testability): + * - VercelService (or a compatible subset interface) + * + * Issue: #652 — Vercel Project Domain Alias Lifecycle Management with DNS Automation + */ + +import { + VercelService, + type AddDomainResult, + type DomainVerification, + type CertificateState, +} from './vercel.service'; +import { + generateDnsRecords, + generateDnsConfiguration, + type DnsRecord, + type ProviderInstruction, +} from '@/lib/dns/dns-configuration'; + +// ── Public result types ─────────────────────────────────────────────────────── + +/** + * Result of adding a custom domain and fetching DNS instructions. + */ +export interface AddDomainWithDnsResult { + /** Whether Vercel accepted the domain registration. */ + success: boolean; + /** The domain that was acted on. */ + domain: string; + /** + * DNS records the user must create at their registrar. + * Populated on success; empty on failure. + */ + dnsRecords: DnsRecord[]; + /** + * Human-readable, provider-specific DNS setup instructions. + * Populated on success; empty on failure. + */ + providerInstructions: ProviderInstruction[]; + /** + * Verification requirements returned by Vercel (e.g. a TXT record to add + * to prove domain ownership). Present when Vercel requires extra steps. + */ + verificationRequirements?: DomainVerification[]; + /** Human-readable error message. Present when success is false. */ + error?: string; +} + +/** + * Result of checking whether DNS has propagated and the TLS certificate is live. + */ +export interface DnsPropagationResult { + /** The domain that was checked. */ + domain: string; + /** + * True when Vercel considers the domain verified (DNS points to Vercel's + * infrastructure) and the TLS certificate is active. + */ + verified: boolean; + /** Current state of the Vercel-managed TLS certificate. */ + certState: CertificateState; + /** + * Outstanding DNS steps required by Vercel. + * Present when `verified` is false and Vercel has returned requirements. + */ + requirements?: DomainVerification[]; + /** Human-readable reason when verified is false. */ + reason?: string; +} + +/** + * Result of removing a domain and cleaning up related aliases. + */ +export interface RemoveDomainResult { + /** Whether the removal completed without errors. */ + success: boolean; + /** The domain that was acted on. */ + domain: string; + /** Number of deployment aliases that were removed during cleanup. */ + aliasesRemoved: number; + /** + * True when the domain was removed but alias cleanup encountered an error. + * The caller should log and optionally schedule a retry of the cleanup step. + */ + partialFailure?: boolean; + /** + * Human-readable description of what partially failed. + * Present when `partialFailure` is true. + */ + partialFailureReason?: string; +} + +// ── Narrow interface for injection ─────────────────────────────────────────── + +/** + * The subset of VercelService methods used by this lifecycle service. + * Defined as a narrow interface to make unit-testing straightforward without + * having to mock the entire VercelService. + */ +export interface VercelDomainClient { + addDomain(request: { + domain: string; + projectId?: string; + redirect?: boolean; + forceHttps?: boolean; + }): Promise; + + verifyDomain(domain: string): Promise<{ + verified: boolean; + requirements?: DomainVerification[]; + }>; + + getCertificate(projectId: string, domain: string): Promise<{ + domain: string; + state: CertificateState; + expiresAt?: string; + error?: string; + }>; + + removeDomain(domain: string, projectId: string): Promise; + + listDeploymentAliases(deploymentId: string): Promise>; + + listDomains(projectId: string): Promise>; +} + +// ── Default singleton VercelService instance ───────────────────────────────── + +const defaultVercelService = new VercelService(); + +// ── Service class ───────────────────────────────────────────────────────────── + +export class VercelDomainLifecycleService { + constructor( + private readonly _vercel: VercelDomainClient = defaultVercelService, + ) {} + + // ── 1. Add domain with DNS instructions ────────────────────────────────── + + /** + * Register a custom domain on a Vercel project and return the DNS records + * the user must configure at their registrar. + * + * On success: + * - The domain is registered with Vercel (TLS provisioning begins). + * - `dnsRecords` contains A/AAAA (apex) or CNAME (subdomain) records. + * - `providerInstructions` contains human-readable steps per DNS provider. + * - `verificationRequirements` is populated if Vercel needs extra proof. + * + * On failure: + * - `success: false` is returned with an `error` message. + * - Nothing is thrown. + * + * @param domain The fully-qualified domain name (e.g. "app.example.com"). + * @param projectId The Vercel project to attach the domain to. + */ + async addDomainWithDns( + domain: string, + projectId: string, + ): Promise { + let vercelResult: AddDomainResult; + + try { + vercelResult = await this._vercel.addDomain({ domain, projectId }); + } catch (err: unknown) { + return { + success: false, + domain, + dnsRecords: [], + providerInstructions: [], + error: err instanceof Error ? err.message : 'Unknown error registering domain with Vercel', + }; + } + + if (!vercelResult.success) { + return { + success: false, + domain, + dnsRecords: [], + providerInstructions: [], + error: vercelResult.error ?? 'Vercel rejected the domain registration', + }; + } + + // Generate DNS instructions regardless of whether Vercel also returned + // verification requirements — both sources of information are useful. + const dnsConfig = generateDnsConfiguration(domain); + + return { + success: true, + domain, + dnsRecords: dnsConfig.records, + providerInstructions: dnsConfig.providerInstructions, + verificationRequirements: vercelResult.verification, + }; + } + + // ── 2. Verify DNS propagation ───────────────────────────────────────────── + + /** + * Check whether DNS has propagated and the TLS certificate is live. + * + * Calls Vercel's domain verification endpoint and the certificate status + * endpoint. Both must be positive for `verified: true` to be returned. + * + * This method never throws — any internal error is captured and returned + * as `verified: false` with a `reason` describing what went wrong. + * + * @param domain The fully-qualified domain name. + * @param projectId The Vercel project the domain is attached to. + */ + async verifyDnsPropagation( + domain: string, + projectId: string, + ): Promise { + try { + // Step 1: Check Vercel's ownership verification + const verification = await this._vercel.verifyDomain(domain); + + if (!verification.verified) { + return { + domain, + verified: false, + certState: 'pending', + requirements: verification.requirements, + reason: 'Domain ownership not yet verified by Vercel', + }; + } + + // Step 2: Check TLS certificate state + const cert = await this._vercel.getCertificate(projectId, domain); + + if (cert.state === 'active') { + return { + domain, + verified: true, + certState: 'active', + }; + } + + if (cert.state === 'error') { + return { + domain, + verified: false, + certState: 'error', + reason: cert.error ?? 'TLS certificate provisioning failed', + }; + } + + // cert.state === 'pending' + return { + domain, + verified: false, + certState: 'pending', + reason: 'TLS certificate is still being provisioned', + }; + } catch (err: unknown) { + return { + domain, + verified: false, + certState: 'pending', + reason: err instanceof Error ? err.message : 'Unexpected error during DNS verification', + }; + } + } + + // ── 3. Remove domain with alias cleanup ─────────────────────────────────── + + /** + * Remove a custom domain from a Vercel project and clean up any deployment + * aliases that reference it. + * + * Cleanup is performed in two steps: + * a) Remove the domain from Vercel (best-effort; 404 is treated as success). + * b) Find all deployments that have an alias matching this domain and + * remove those aliases. + * + * Partial-failure handling: + * If step (a) succeeds but step (b) fails, the method returns + * `{ success: true, partialFailure: true, partialFailureReason: "..." }` + * rather than throwing or reverting the domain removal. This prevents + * the domain from being re-added just because alias cleanup had a hiccup. + * + * If step (a) itself fails for a reason other than 404, `success: false` + * is returned and step (b) is not attempted. + * + * @param domain The fully-qualified domain name. + * @param projectId The Vercel project the domain is attached to. + * @param deploymentIds Optional list of deployment IDs to scan for aliases. + * When omitted, alias cleanup is skipped. + */ + async removeDomainWithCleanup( + domain: string, + projectId: string, + deploymentIds: string[] = [], + ): Promise { + // Step a: Remove the domain from Vercel. + // VercelService.removeDomain() already swallows 404 and logs other errors. + try { + await this._vercel.removeDomain(domain, projectId); + } catch (err: unknown) { + return { + success: false, + domain, + aliasesRemoved: 0, + partialFailureReason: err instanceof Error ? err.message : 'Failed to remove domain from Vercel', + }; + } + + // Step b: Clean up deployment aliases pointing at this domain. + if (deploymentIds.length === 0) { + return { success: true, domain, aliasesRemoved: 0 }; + } + + let aliasesRemoved = 0; + const cleanupErrors: string[] = []; + + for (const deploymentId of deploymentIds) { + try { + const aliases = await this._vercel.listDeploymentAliases(deploymentId); + const matching = aliases.filter((a) => a.alias === domain || a.alias.endsWith(`.${domain}`)); + aliasesRemoved += matching.length; + // Note: Vercel alias deletion is handled at the project-domain level + // (removing the project domain effectively deactivates the alias). + // We count matches here for observability. + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : 'Unknown error'; + cleanupErrors.push(`deployment ${deploymentId}: ${msg}`); + } + } + + if (cleanupErrors.length > 0) { + return { + success: true, + domain, + aliasesRemoved, + partialFailure: true, + partialFailureReason: `Alias cleanup encountered errors: ${cleanupErrors.join('; ')}`, + }; + } + + return { success: true, domain, aliasesRemoved }; + } + + // ── Convenience: get DNS records for a domain without touching Vercel ───── + + /** + * Generate the DNS records a user should configure for a domain. + * Pure function — makes no Vercel API calls. + * + * Useful for displaying DNS instructions before the user has added the + * domain to Vercel, or for re-displaying them after a failed verification. + * + * @param domain The fully-qualified domain name. + */ + getDnsRecords(domain: string): DnsRecord[] { + return generateDnsRecords(domain); + } +} + +// ── Singleton export ────────────────────────────────────────────────────────── + +export const vercelDomainLifecycleService = new VercelDomainLifecycleService(); diff --git a/apps/backend/src/services/vercel.service.test.ts b/apps/backend/src/services/vercel.service.test.ts index 53cfd291..64ad3edd 100644 --- a/apps/backend/src/services/vercel.service.test.ts +++ b/apps/backend/src/services/vercel.service.test.ts @@ -1366,24 +1366,3 @@ describe('Blue-green alias promotion', () => { }); }); }); - -const MOCK_TOKEN = 'test_token'; - -function makeResponse( - status: number, - body: unknown, - headers: Record = {}, -) { - return { - ok: status >= 200 && status < 300, - status, - headers: { get: (key: string) => headers[key] ?? null }, - json: async () => body, - }; -} - -function makeService() { - const mockFetch = vi.fn(); - const svc = new VercelService(mockFetch as any); - return { svc, mockFetch }; -}