Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- **Force-kill a stuck session.** `POST /sessions/:id/force-kill` (OPERATOR) recovers a session whose engine
is wedged and won't respond to a normal stop/delete: the whatsapp-web.js engine **SIGKILLs its own Chromium
process directly** (never a process-wide kill that could take down other sessions), then best-effort tears
the client down; the Baileys engine ends its socket. The teardown is time-bounded and isolated, the session
is left `DISCONNECTED`, and it can be started again. Backs the dashboard's "Kill Stuck" button.

## [0.4.2] - 2026-06-19

Bug-fix and hardening release: access-control tightening, session-lifecycle resilience, data-migration
Expand Down
6 changes: 6 additions & 0 deletions src/engine/adapters/baileys.adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@ export class BaileysAdapter implements IWhatsAppEngine {
return Promise.resolve();
}

// Baileys has no separate Chromium process to SIGKILL (destroy() already ends the socket
// synchronously), so a force-destroy is just a destroy.
forceDestroy(): Promise<void> {
return this.destroy();
}

// ----- Status -----

getStatus(): EngineStatus {
Expand Down
40 changes: 40 additions & 0 deletions src/engine/adapters/whatsapp-web-js.adapter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,46 @@ describe('WhatsAppWebJsAdapter readiness guard (#100)', () => {
});
});

describe('WhatsAppWebJsAdapter.forceDestroy (recover a wedged session, #351)', () => {
const newAdapter = (): WhatsAppWebJsAdapter =>
new WhatsAppWebJsAdapter({ sessionId: 'sess-1', sessionDataPath: './data/sessions', puppeteer: {} });
const setClient = (adapter: WhatsAppWebJsAdapter, client: unknown): void => {
(adapter as unknown as { client: unknown }).client = client;
};
const getClient = (adapter: WhatsAppWebJsAdapter): unknown => (adapter as unknown as { client: unknown }).client;

it('SIGKILLs only its own browser process, then best-effort destroys the client', async () => {
const kill = jest.fn();
const destroy = jest.fn().mockResolvedValue(undefined);
const adapter = newAdapter();
setClient(adapter, { pupBrowser: { process: () => ({ kill }) }, destroy });

await adapter.forceDestroy();

expect(kill).toHaveBeenCalledWith('SIGKILL');
expect(destroy).toHaveBeenCalledTimes(1);
expect(getClient(adapter)).toBeNull();
expect(adapter.getStatus()).toBe(EngineStatus.DISCONNECTED);
});

it('still completes when the process handle is gone and destroy() rejects (best-effort)', async () => {
const adapter = newAdapter();
setClient(adapter, {
pupBrowser: { process: () => null },
destroy: jest.fn().mockRejectedValue(new Error('wedged')),
});

await expect(adapter.forceDestroy()).resolves.toBeUndefined();
expect(getClient(adapter)).toBeNull();
expect(adapter.getStatus()).toBe(EngineStatus.DISCONNECTED);
});

it('is a no-op when there is no client', async () => {
const adapter = newAdapter();
await expect(adapter.forceDestroy()).resolves.toBeUndefined();
});
});

describe('WhatsAppWebJsAdapter.resolveContactPhone (@lid -> phone, #263)', () => {
// Stub a "ready" adapter with a fake client so we exercise the mapping without a real browser.
const readyAdapter = (getContactLidAndPhone: jest.Mock): WhatsAppWebJsAdapter => {
Expand Down
30 changes: 30 additions & 0 deletions src/engine/adapters/whatsapp-web-js.adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,36 @@ export class WhatsAppWebJsAdapter extends EventEmitter implements IWhatsAppEngin
}
}

/**
* Force-recover a wedged session: SIGKILL THIS client's own Chromium process directly (not a
* process-wide `pkill`, which would also kill other sessions), then best-effort `client.destroy()`
* for the rest of the cleanup. Both steps are wrapped so a missing process handle or a hung destroy
* can't prevent the engine from being torn down and the status reset.
*/
async forceDestroy(): Promise<void> {
const client = this.client;
if (!client) return;

try {
// pupBrowser is the Puppeteer Browser; .process() is the Chromium ChildProcess (null if already gone).
const proc = (
client as unknown as { pupBrowser?: { process?: () => { kill?: (sig: string) => void } | null } }
).pupBrowser?.process?.();
proc?.kill?.('SIGKILL');
} catch (err) {
this.logger.warn('forceDestroy: failed to kill the browser process', { error: String(err) });
}

try {
await client.destroy();
} catch (err) {
this.logger.warn('forceDestroy: client.destroy() failed after the kill (continuing)', { error: String(err) });
}

this.client = null;
this.setStatus(EngineStatus.DISCONNECTED);
}

getStatus(): EngineStatus {
return this.status;
}
Expand Down
4 changes: 4 additions & 0 deletions src/engine/interfaces/whatsapp-engine.interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ export interface IWhatsAppEngine {
disconnect(): Promise<void>; // Closes browser but keeps session (can reconnect without QR)
logout(): Promise<void>; // Logs out and clears session data (requires QR scan again)
destroy(): Promise<void>;
// Force-kill THIS engine's own resources immediately (e.g. SIGKILL a wedged Chromium for a stuck
// session), then best-effort graceful teardown — used to recover a session that destroy() can't.
// Each adapter kills only its own resources (never a process-wide pkill).
forceDestroy(): Promise<void>;

// Status
getStatus(): EngineStatus;
Expand Down
1 change: 1 addition & 0 deletions src/modules/audit/entities/audit-log.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export enum AuditAction {
SESSION_CREATED = 'session_created',
SESSION_STARTED = 'session_started',
SESSION_STOPPED = 'session_stopped',
SESSION_FORCE_KILLED = 'session_force_killed',
SESSION_DELETED = 'session_deleted',
SESSION_QR_GENERATED = 'session_qr_generated',
SESSION_CONNECTED = 'session_connected',
Expand Down
19 changes: 19 additions & 0 deletions src/modules/session/session.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,25 @@ export class SessionController {
return this.transformSession(session);
}

@Post(':id/force-kill')
@RequireRole(ApiKeyRole.OPERATOR)
@ApiOperation({ summary: 'Force-kill a stuck session (SIGKILL its wedged engine, then tear it down)' })
@ApiParam({ name: 'id', description: 'Session ID' })
@ApiResponse({
status: 200,
description: 'Session force-killed',
type: SessionResponseDto,
})
@ApiResponse({ status: 404, description: 'Session not found' })
async forceKill(@Param('id') id: string): Promise<SessionResponseDto> {
const session = await this.sessionService.forceKill(id);
await this.auditService.logInfo(AuditAction.SESSION_FORCE_KILLED, {
sessionId: session.id,
sessionName: session.name,
});
return this.transformSession(session);
}

@Get(':id/qr')
@RequireRole(ApiKeyRole.OPERATOR)
@ApiOperation({ summary: 'Get QR code for session authentication' })
Expand Down
31 changes: 31 additions & 0 deletions src/modules/session/session.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,37 @@ describe('SessionService', () => {
await expect(service.delete('sess-uuid-1')).rejects.toThrow('db down');
expect(stoppingOf().has('sess-uuid-1')).toBe(false); // mark still cleared on failure
});

it('forceKill() force-destroys the engine, reconciles the map, and marks the session stopping', async () => {
(repository.findOne as jest.Mock).mockResolvedValue(createMockSession());
(repository.update as jest.Mock).mockResolvedValue({ affected: 1 });
const engine = { forceDestroy: jest.fn().mockResolvedValue(undefined) };
enginesOf().set('sess-uuid-1', engine);

const result = await service.forceKill('sess-uuid-1');

expect(engine.forceDestroy).toHaveBeenCalledTimes(1);
expect(enginesOf().has('sess-uuid-1')).toBe(false); // map reconciled
// Stop-mark stays set (like stop()): it blocks an in-flight reconnect from resurrecting the
// session we just killed; a later start() clears it.
expect(stoppingOf().has('sess-uuid-1')).toBe(true);
expect(result).toBeDefined();
});

it('forceKill() completes even when forceDestroy() rejects (best-effort recovery)', async () => {
(repository.findOne as jest.Mock).mockResolvedValue(createMockSession());
(repository.update as jest.Mock).mockResolvedValue({ affected: 1 });
const engine = { forceDestroy: jest.fn().mockRejectedValue(new Error('still wedged')) };
enginesOf().set('sess-uuid-1', engine);

await expect(service.forceKill('sess-uuid-1')).resolves.toBeDefined();
expect(enginesOf().has('sess-uuid-1')).toBe(false); // map reconciled despite the failure
});

it('forceKill() throws NotFoundException for an unknown session', async () => {
(repository.findOne as jest.Mock).mockResolvedValue(null);
await expect(service.forceKill('nope')).rejects.toThrow(NotFoundException);
});
});

// ── create ────────────────────────────────────────────────────────
Expand Down
29 changes: 28 additions & 1 deletion src/modules/session/session.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ export class SessionService implements OnModuleDestroy, OnModuleInit, OnApplicat
sessionId: string,
engine: IWhatsAppEngine,
teardown: (e: IWhatsAppEngine) => Promise<void>,
label: 'destroy' | 'disconnect',
label: 'destroy' | 'disconnect' | 'force-destroy',
): Promise<void> {
let timer: ReturnType<typeof setTimeout> | undefined;
try {
Expand Down Expand Up @@ -862,6 +862,33 @@ export class SessionService implements OnModuleDestroy, OnModuleInit, OnApplicat
return this.findOne(id);
}

/**
* Force-recover a stuck session: SIGKILL its engine's own resources (a wedged Chromium for the
* whatsapp-web.js engine) and tear it down, even when a normal stop()/delete() can't because the
* engine is hung. Mirrors stop()'s lifecycle (stop-mark + cancel-reconnect + bounded, isolated
* teardown + Map reconciliation) but uses the engine's forceDestroy().
*/
async forceKill(id: string): Promise<Session> {
const session = await this.findOne(id);

// Mark as tearing down BEFORE cleanup so an in-flight reconnect can't resurrect it.
this.stoppingSessions.add(id);
this.cancelReconnect(id);

const engine = this.engines.get(id);
if (engine) {
await this.teardownEngineSafely(id, engine, e => e.forceDestroy(), 'force-destroy');
this.engines.delete(id);
}

this.logger.warn(`Session force-killed: ${session.name}`, {
sessionId: id,
action: 'force_kill',
});
await this.updateStatus(id, SessionStatus.DISCONNECTED);
return this.findOne(id);
}

async getQRCode(id: string): Promise<{ qrCode: string; status: SessionStatus }> {
const session = await this.findOne(id);
const engine = this.engines.get(id);
Expand Down
Loading