diff --git a/CHANGELOG.md b/CHANGELOG.md index 66bcc8f6..ab8788cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **Force-kill a stuck session.** `POST /sessions/:id/force-kill` (OPERATOR) recovers a session whose engine + is wedged and won't respond to a normal stop/delete: the whatsapp-web.js engine **SIGKILLs its own Chromium + process directly** (never a process-wide kill that could take down other sessions), then best-effort tears + the client down; the Baileys engine ends its socket. The teardown is time-bounded and isolated, the session + is left `DISCONNECTED`, and it can be started again. Backs the dashboard's "Kill Stuck" button. + ## [0.4.2] - 2026-06-19 Bug-fix and hardening release: access-control tightening, session-lifecycle resilience, data-migration diff --git a/src/engine/adapters/baileys.adapter.ts b/src/engine/adapters/baileys.adapter.ts index 452c7c8a..c18ad3d8 100644 --- a/src/engine/adapters/baileys.adapter.ts +++ b/src/engine/adapters/baileys.adapter.ts @@ -293,6 +293,12 @@ export class BaileysAdapter implements IWhatsAppEngine { return Promise.resolve(); } + // Baileys has no separate Chromium process to SIGKILL (destroy() already ends the socket + // synchronously), so a force-destroy is just a destroy. + forceDestroy(): Promise { + return this.destroy(); + } + // ----- Status ----- getStatus(): EngineStatus { diff --git a/src/engine/adapters/whatsapp-web-js.adapter.spec.ts b/src/engine/adapters/whatsapp-web-js.adapter.spec.ts index 3519d676..319110a1 100644 --- a/src/engine/adapters/whatsapp-web-js.adapter.spec.ts +++ b/src/engine/adapters/whatsapp-web-js.adapter.spec.ts @@ -128,6 +128,46 @@ describe('WhatsAppWebJsAdapter readiness guard (#100)', () => { }); }); +describe('WhatsAppWebJsAdapter.forceDestroy (recover a wedged session, #351)', () => { + const newAdapter = (): WhatsAppWebJsAdapter => + new WhatsAppWebJsAdapter({ sessionId: 'sess-1', sessionDataPath: './data/sessions', puppeteer: {} }); + const setClient = (adapter: WhatsAppWebJsAdapter, client: unknown): void => { + (adapter as unknown as { client: unknown }).client = client; + }; + const getClient = (adapter: WhatsAppWebJsAdapter): unknown => (adapter as unknown as { client: unknown }).client; + + it('SIGKILLs only its own browser process, then best-effort destroys the client', async () => { + const kill = jest.fn(); + const destroy = jest.fn().mockResolvedValue(undefined); + const adapter = newAdapter(); + setClient(adapter, { pupBrowser: { process: () => ({ kill }) }, destroy }); + + await adapter.forceDestroy(); + + expect(kill).toHaveBeenCalledWith('SIGKILL'); + expect(destroy).toHaveBeenCalledTimes(1); + expect(getClient(adapter)).toBeNull(); + expect(adapter.getStatus()).toBe(EngineStatus.DISCONNECTED); + }); + + it('still completes when the process handle is gone and destroy() rejects (best-effort)', async () => { + const adapter = newAdapter(); + setClient(adapter, { + pupBrowser: { process: () => null }, + destroy: jest.fn().mockRejectedValue(new Error('wedged')), + }); + + await expect(adapter.forceDestroy()).resolves.toBeUndefined(); + expect(getClient(adapter)).toBeNull(); + expect(adapter.getStatus()).toBe(EngineStatus.DISCONNECTED); + }); + + it('is a no-op when there is no client', async () => { + const adapter = newAdapter(); + await expect(adapter.forceDestroy()).resolves.toBeUndefined(); + }); +}); + describe('WhatsAppWebJsAdapter.resolveContactPhone (@lid -> phone, #263)', () => { // Stub a "ready" adapter with a fake client so we exercise the mapping without a real browser. const readyAdapter = (getContactLidAndPhone: jest.Mock): WhatsAppWebJsAdapter => { diff --git a/src/engine/adapters/whatsapp-web-js.adapter.ts b/src/engine/adapters/whatsapp-web-js.adapter.ts index 4c940c3d..5bfcdd7e 100644 --- a/src/engine/adapters/whatsapp-web-js.adapter.ts +++ b/src/engine/adapters/whatsapp-web-js.adapter.ts @@ -443,6 +443,36 @@ export class WhatsAppWebJsAdapter extends EventEmitter implements IWhatsAppEngin } } + /** + * Force-recover a wedged session: SIGKILL THIS client's own Chromium process directly (not a + * process-wide `pkill`, which would also kill other sessions), then best-effort `client.destroy()` + * for the rest of the cleanup. Both steps are wrapped so a missing process handle or a hung destroy + * can't prevent the engine from being torn down and the status reset. + */ + async forceDestroy(): Promise { + const client = this.client; + if (!client) return; + + try { + // pupBrowser is the Puppeteer Browser; .process() is the Chromium ChildProcess (null if already gone). + const proc = ( + client as unknown as { pupBrowser?: { process?: () => { kill?: (sig: string) => void } | null } } + ).pupBrowser?.process?.(); + proc?.kill?.('SIGKILL'); + } catch (err) { + this.logger.warn('forceDestroy: failed to kill the browser process', { error: String(err) }); + } + + try { + await client.destroy(); + } catch (err) { + this.logger.warn('forceDestroy: client.destroy() failed after the kill (continuing)', { error: String(err) }); + } + + this.client = null; + this.setStatus(EngineStatus.DISCONNECTED); + } + getStatus(): EngineStatus { return this.status; } diff --git a/src/engine/interfaces/whatsapp-engine.interface.ts b/src/engine/interfaces/whatsapp-engine.interface.ts index aecf4c50..9c539f57 100644 --- a/src/engine/interfaces/whatsapp-engine.interface.ts +++ b/src/engine/interfaces/whatsapp-engine.interface.ts @@ -336,6 +336,10 @@ export interface IWhatsAppEngine { disconnect(): Promise; // Closes browser but keeps session (can reconnect without QR) logout(): Promise; // Logs out and clears session data (requires QR scan again) destroy(): Promise; + // Force-kill THIS engine's own resources immediately (e.g. SIGKILL a wedged Chromium for a stuck + // session), then best-effort graceful teardown — used to recover a session that destroy() can't. + // Each adapter kills only its own resources (never a process-wide pkill). + forceDestroy(): Promise; // Status getStatus(): EngineStatus; diff --git a/src/modules/audit/entities/audit-log.entity.ts b/src/modules/audit/entities/audit-log.entity.ts index 8eea14aa..6a10492e 100644 --- a/src/modules/audit/entities/audit-log.entity.ts +++ b/src/modules/audit/entities/audit-log.entity.ts @@ -12,6 +12,7 @@ export enum AuditAction { SESSION_CREATED = 'session_created', SESSION_STARTED = 'session_started', SESSION_STOPPED = 'session_stopped', + SESSION_FORCE_KILLED = 'session_force_killed', SESSION_DELETED = 'session_deleted', SESSION_QR_GENERATED = 'session_qr_generated', SESSION_CONNECTED = 'session_connected', diff --git a/src/modules/session/session.controller.ts b/src/modules/session/session.controller.ts index 822b0f0d..f72f9231 100644 --- a/src/modules/session/session.controller.ts +++ b/src/modules/session/session.controller.ts @@ -145,6 +145,25 @@ export class SessionController { return this.transformSession(session); } + @Post(':id/force-kill') + @RequireRole(ApiKeyRole.OPERATOR) + @ApiOperation({ summary: 'Force-kill a stuck session (SIGKILL its wedged engine, then tear it down)' }) + @ApiParam({ name: 'id', description: 'Session ID' }) + @ApiResponse({ + status: 200, + description: 'Session force-killed', + type: SessionResponseDto, + }) + @ApiResponse({ status: 404, description: 'Session not found' }) + async forceKill(@Param('id') id: string): Promise { + const session = await this.sessionService.forceKill(id); + await this.auditService.logInfo(AuditAction.SESSION_FORCE_KILLED, { + sessionId: session.id, + sessionName: session.name, + }); + return this.transformSession(session); + } + @Get(':id/qr') @RequireRole(ApiKeyRole.OPERATOR) @ApiOperation({ summary: 'Get QR code for session authentication' }) diff --git a/src/modules/session/session.service.spec.ts b/src/modules/session/session.service.spec.ts index 938b9b49..18d75391 100644 --- a/src/modules/session/session.service.spec.ts +++ b/src/modules/session/session.service.spec.ts @@ -185,6 +185,37 @@ describe('SessionService', () => { await expect(service.delete('sess-uuid-1')).rejects.toThrow('db down'); expect(stoppingOf().has('sess-uuid-1')).toBe(false); // mark still cleared on failure }); + + it('forceKill() force-destroys the engine, reconciles the map, and marks the session stopping', async () => { + (repository.findOne as jest.Mock).mockResolvedValue(createMockSession()); + (repository.update as jest.Mock).mockResolvedValue({ affected: 1 }); + const engine = { forceDestroy: jest.fn().mockResolvedValue(undefined) }; + enginesOf().set('sess-uuid-1', engine); + + const result = await service.forceKill('sess-uuid-1'); + + expect(engine.forceDestroy).toHaveBeenCalledTimes(1); + expect(enginesOf().has('sess-uuid-1')).toBe(false); // map reconciled + // Stop-mark stays set (like stop()): it blocks an in-flight reconnect from resurrecting the + // session we just killed; a later start() clears it. + expect(stoppingOf().has('sess-uuid-1')).toBe(true); + expect(result).toBeDefined(); + }); + + it('forceKill() completes even when forceDestroy() rejects (best-effort recovery)', async () => { + (repository.findOne as jest.Mock).mockResolvedValue(createMockSession()); + (repository.update as jest.Mock).mockResolvedValue({ affected: 1 }); + const engine = { forceDestroy: jest.fn().mockRejectedValue(new Error('still wedged')) }; + enginesOf().set('sess-uuid-1', engine); + + await expect(service.forceKill('sess-uuid-1')).resolves.toBeDefined(); + expect(enginesOf().has('sess-uuid-1')).toBe(false); // map reconciled despite the failure + }); + + it('forceKill() throws NotFoundException for an unknown session', async () => { + (repository.findOne as jest.Mock).mockResolvedValue(null); + await expect(service.forceKill('nope')).rejects.toThrow(NotFoundException); + }); }); // ── create ──────────────────────────────────────────────────────── diff --git a/src/modules/session/session.service.ts b/src/modules/session/session.service.ts index b5e2b1b2..55a0c27e 100644 --- a/src/modules/session/session.service.ts +++ b/src/modules/session/session.service.ts @@ -209,7 +209,7 @@ export class SessionService implements OnModuleDestroy, OnModuleInit, OnApplicat sessionId: string, engine: IWhatsAppEngine, teardown: (e: IWhatsAppEngine) => Promise, - label: 'destroy' | 'disconnect', + label: 'destroy' | 'disconnect' | 'force-destroy', ): Promise { let timer: ReturnType | undefined; try { @@ -862,6 +862,33 @@ export class SessionService implements OnModuleDestroy, OnModuleInit, OnApplicat return this.findOne(id); } + /** + * Force-recover a stuck session: SIGKILL its engine's own resources (a wedged Chromium for the + * whatsapp-web.js engine) and tear it down, even when a normal stop()/delete() can't because the + * engine is hung. Mirrors stop()'s lifecycle (stop-mark + cancel-reconnect + bounded, isolated + * teardown + Map reconciliation) but uses the engine's forceDestroy(). + */ + async forceKill(id: string): Promise { + const session = await this.findOne(id); + + // Mark as tearing down BEFORE cleanup so an in-flight reconnect can't resurrect it. + this.stoppingSessions.add(id); + this.cancelReconnect(id); + + const engine = this.engines.get(id); + if (engine) { + await this.teardownEngineSafely(id, engine, e => e.forceDestroy(), 'force-destroy'); + this.engines.delete(id); + } + + this.logger.warn(`Session force-killed: ${session.name}`, { + sessionId: id, + action: 'force_kill', + }); + await this.updateStatus(id, SessionStatus.DISCONNECTED); + return this.findOne(id); + } + async getQRCode(id: string): Promise<{ qrCode: string; status: SessionStatus }> { const session = await this.findOne(id); const engine = this.engines.get(id);