diff --git a/integration/data_versioning_test.ts b/integration/data_versioning_test.ts index 00f10e24..91c69a08 100644 --- a/integration/data_versioning_test.ts +++ b/integration/data_versioning_test.ts @@ -326,6 +326,51 @@ Deno.test("Data Versioning: garbage collection preserves minimum versions", asyn }); }); +Deno.test("Data Versioning: garbage collection dry-run reports counts without deleting", async () => { + await withTempDir(async (repoDir) => { + await setupRepoDir(repoDir); + const repo = new FileSystemUnifiedDataRepository( + repoDir, + undefined, + new CatalogStore(join(repoDir, "_catalog.db")), + ); + const type = ModelType.create("test/model"); + const modelId = crypto.randomUUID(); + const owner = createOwner("test/model:gc-dryrun"); + + const data = Data.create({ + name: "gc-dryrun-test", + contentType: "text/plain", + lifetime: "infinite", + garbageCollection: 3, + tags: { type: "test" }, + ownerDefinition: owner, + }); + + // Write 7 versions — 4 over the retention count + for (let i = 1; i <= 7; i++) { + await repo.save( + type, + modelId, + data, + new TextEncoder().encode(`v${i}`), + ); + } + + const before = await repo.listVersions(type, modelId, "gc-dryrun-test"); + assertEquals(before, [1, 2, 3, 4, 5, 6, 7]); + + // Dry-run should report what would be pruned, but remove nothing + const gcResult = await repo.collectGarbage(type, modelId, { dryRun: true }); + assertEquals(gcResult.versionsRemoved, 4); + // Bytes reclaimed should be nonzero since the files still exist to stat + assertEquals(gcResult.bytesReclaimed > 0, true); + + const after = await repo.listVersions(type, modelId, "gc-dryrun-test"); + assertEquals(after, [1, 2, 3, 4, 5, 6, 7]); + }); +}); + Deno.test("Data Versioning: multiple data items with different GC policies", async () => { await withTempDir(async (repoDir) => { await setupRepoDir(repoDir); diff --git a/src/cli/commands/data_gc.ts b/src/cli/commands/data_gc.ts index da29dc5c..d5e82802 100644 --- a/src/cli/commands/data_gc.ts +++ b/src/cli/commands/data_gc.ts @@ -72,8 +72,10 @@ export const dataGcCommand = new Command() // Phase 1: Preview + Prompt (only in interactive mode without --force and not dry-run) if (cliCtx.outputMode === "log" && !options.force && !options.dryRun) { const preview = await dataGcPreview(ctx, deps); - if (preview.items.length === 0) { - console.log("No expired data found. Nothing to clean up."); + if ( + preview.items.length === 0 && preview.versionGcItems.length === 0 + ) { + console.log("Nothing to clean up."); return; } diff --git a/src/domain/data/data_lifecycle_service.ts b/src/domain/data/data_lifecycle_service.ts index 731e203c..7c0b5f74 100644 --- a/src/domain/data/data_lifecycle_service.ts +++ b/src/domain/data/data_lifecycle_service.ts @@ -20,6 +20,7 @@ import { getLogger } from "@logtape/logtape"; import type { Data } from "./data.ts"; import type { Lifetime } from "./data_metadata.ts"; +import { parseDataDuration } from "./duration.ts"; import type { UnifiedDataRepository } from "../../infrastructure/persistence/unified_data_repository.ts"; import type { WorkflowRunRepository } from "../workflows/repositories.ts"; import type { ModelType } from "../models/model_type.ts"; @@ -41,6 +42,17 @@ export interface ExpiredDataInfo { reason: "duration-expired" | "workflow-deleted" | "job-deleted"; } +/** + * A (modelType, modelId) pair that would have versions pruned by version GC, + * with the counts computed via a dry-run against the repository. + */ +export interface VersionGcPreviewInfo { + type: ModelType; + modelId: string; + versionsWouldBeRemoved: number; + bytesWouldBeReclaimed: number; +} + /** * Result of garbage collection operation. */ @@ -72,6 +84,13 @@ export interface DataLifecycleService { */ findExpiredData(): Promise; + /** + * Previews version-based garbage collection across all unique models without + * deleting anything. Returns one entry per (modelType, modelId) that has + * versions to prune. + */ + previewVersionGarbage(): Promise; + /** * Deletes expired data and applies version garbage collection. * @@ -127,7 +146,7 @@ export class DefaultDataLifecycleService implements DataLifecycleService { // Duration-based lifetime try { - const durationMs = this.parseDuration(lifetime); + const durationMs = parseDataDuration(lifetime); return new Date(createdAt.getTime() + durationMs); } catch (error) { logger.error("Failed to parse lifetime duration: {lifetime}", { @@ -223,6 +242,39 @@ export class DefaultDataLifecycleService implements DataLifecycleService { return expired; } + async previewVersionGarbage(): Promise { + const previews: VersionGcPreviewInfo[] = []; + const allData = await this.dataRepo.findAllGlobal(); + const seen = new Set(); + for (const { modelType, modelId } of allData) { + const key = `${modelType.toDirectoryPath()}/${modelId}`; + if (seen.has(key)) continue; + seen.add(key); + + try { + const result = await this.dataRepo.collectGarbage( + modelType, + modelId, + { dryRun: true }, + ); + if (result.versionsRemoved > 0) { + previews.push({ + type: modelType, + modelId, + versionsWouldBeRemoved: result.versionsRemoved, + bytesWouldBeReclaimed: result.bytesReclaimed, + }); + } + } catch (error) { + logger.error( + "Error previewing version GC on {path}", + { path: key, error }, + ); + } + } + return previews; + } + async deleteExpiredData(options?: { dryRun?: boolean; }): Promise { @@ -307,28 +359,29 @@ export class DefaultDataLifecycleService implements DataLifecycleService { }); } - // Phase 2: Version-based garbage collection on all unique models - // Reuses the allData result from the single findAllGlobal() call - if (!dryRun) { - const seen = new Set(); - for (const { modelType, modelId } of allData) { - const key = `${modelType.toDirectoryPath()}/${modelId}`; - if (seen.has(key)) continue; - seen.add(key); - - try { - const result = await this.dataRepo.collectGarbage( - modelType, - modelId, - ); - versionsDeleted += result.versionsRemoved; - bytesReclaimed += result.bytesReclaimed; - } catch (error) { - logger.error( - "Error running GC on {path}", - { path: `${modelType.toDirectoryPath()}/${modelId}`, error }, - ); - } + // Phase 2: Version-based garbage collection on all unique models. + // Reuses the allData result from the single findAllGlobal() call. Runs in + // both dry-run and real mode — the repository computes would-be counts + // without deleting when dryRun is true. + const seen = new Set(); + for (const { modelType, modelId } of allData) { + const key = `${modelType.toDirectoryPath()}/${modelId}`; + if (seen.has(key)) continue; + seen.add(key); + + try { + const result = await this.dataRepo.collectGarbage( + modelType, + modelId, + { dryRun }, + ); + versionsDeleted += result.versionsRemoved; + bytesReclaimed += result.bytesReclaimed; + } catch (error) { + logger.error( + "Error running GC on {path}", + { path: `${modelType.toDirectoryPath()}/${modelId}`, error }, + ); } } @@ -340,31 +393,4 @@ export class DefaultDataLifecycleService implements DataLifecycleService { expiredEntries, }; } - - private parseDuration(duration: string): number { - const match = duration.match(/^(\d+)(mo|y|h|m|d|w)$/); - if (!match) { - throw new Error(`Invalid duration format: ${duration}`); - } - - const value = parseInt(match[1], 10); - const unit = match[2]; - - switch (unit) { - case "mo": - return value * 30 * 24 * 60 * 60 * 1000; - case "y": - return value * 365 * 24 * 60 * 60 * 1000; - case "h": - return value * 60 * 60 * 1000; - case "m": - return value * 60 * 1000; - case "d": - return value * 24 * 60 * 60 * 1000; - case "w": - return value * 7 * 24 * 60 * 60 * 1000; - default: - throw new Error(`Unknown duration unit: ${unit}`); - } - } } diff --git a/src/domain/data/data_lifecycle_service_test.ts b/src/domain/data/data_lifecycle_service_test.ts index a23e03b1..0e99aa39 100644 --- a/src/domain/data/data_lifecycle_service_test.ts +++ b/src/domain/data/data_lifecycle_service_test.ts @@ -30,8 +30,23 @@ class MockDataRepository { > = () => Promise.resolve([]); listVersions = (): Promise => Promise.resolve([]); removeLatestMarker = () => Promise.resolve(); - collectGarbage = () => - Promise.resolve({ versionsRemoved: 0, bytesReclaimed: 0 }); + collectGarbageCalls: Array<{ + type: ModelType; + modelId: string; + dryRun: boolean; + }> = []; + collectGarbage = ( + type: ModelType, + modelId: string, + options?: { dryRun?: boolean }, + ) => { + this.collectGarbageCalls.push({ + type, + modelId, + dryRun: options?.dryRun ?? false, + }); + return Promise.resolve({ versionsRemoved: 0, bytesReclaimed: 0 }); + }; deleteCalls: Array<{ type: ModelType; modelId: string; @@ -508,7 +523,140 @@ Deno.test("deleteExpiredData - dry run does not call delete()", async () => { assertEquals(mockRepo.deleteCalls.length, 0); assertEquals(result.dryRun, true); assertEquals(result.dataEntriesExpired, 1); - // No versions deleted or bytes reclaimed in dry run + // Expired-data byte stat is skipped in dry run, but Phase 2 runs + // against collectGarbage with dryRun=true (the mock returns 0/0). assertEquals(result.versionsDeleted, 0); assertEquals(result.bytesReclaimed, 0); }); + +Deno.test("deleteExpiredData - dry run passes dryRun=true to collectGarbage", async () => { + const mockRepo = new MockDataRepository(); + const modelType = ModelType.create("test/model"); + + mockRepo.findAllGlobal = () => + Promise.resolve([ + { + data: createMockData({ name: "d1", lifetime: "infinite" }), + modelType, + modelId: "m1", + }, + ]); + + const service = new DefaultDataLifecycleService( + mockRepo as never, + new MockWorkflowRunRepository() as never, + ); + + await service.deleteExpiredData({ dryRun: true }); + + // Phase 2 should have invoked collectGarbage with dryRun=true for the unique model + assertEquals(mockRepo.collectGarbageCalls.length, 1); + assertEquals(mockRepo.collectGarbageCalls[0].modelId, "m1"); + assertEquals(mockRepo.collectGarbageCalls[0].dryRun, true); +}); + +Deno.test("deleteExpiredData - real run passes dryRun=false to collectGarbage", async () => { + const mockRepo = new MockDataRepository(); + const modelType = ModelType.create("test/model"); + + mockRepo.findAllGlobal = () => + Promise.resolve([ + { + data: createMockData({ name: "d1", lifetime: "infinite" }), + modelType, + modelId: "m1", + }, + ]); + + const service = new DefaultDataLifecycleService( + mockRepo as never, + new MockWorkflowRunRepository() as never, + ); + + await service.deleteExpiredData(); + + assertEquals(mockRepo.collectGarbageCalls.length, 1); + assertEquals(mockRepo.collectGarbageCalls[0].dryRun, false); +}); + +Deno.test("previewVersionGarbage - returns one entry per unique model with pending prunes", async () => { + const mockRepo = new MockDataRepository(); + const type1 = ModelType.create("test/model-a"); + const type2 = ModelType.create("test/model-b"); + + mockRepo.findAllGlobal = () => + Promise.resolve([ + { + data: createMockData({ name: "d1", lifetime: "infinite" }), + modelType: type1, + modelId: "m1", + }, + { + data: createMockData({ name: "d2", lifetime: "infinite" }), + modelType: type1, + modelId: "m1", + }, + { + data: createMockData({ name: "d3", lifetime: "infinite" }), + modelType: type2, + modelId: "m2", + }, + ]); + mockRepo.collectGarbage = (( + type: ModelType, + modelId: string, + options?: { dryRun?: boolean }, + ) => { + mockRepo.collectGarbageCalls.push({ + type, + modelId, + dryRun: options?.dryRun ?? false, + }); + if (modelId === "m1") { + return Promise.resolve({ versionsRemoved: 5, bytesReclaimed: 1024 }); + } + return Promise.resolve({ versionsRemoved: 0, bytesReclaimed: 0 }); + }) as MockDataRepository["collectGarbage"]; + + const service = new DefaultDataLifecycleService( + mockRepo as never, + new MockWorkflowRunRepository() as never, + ); + + const previews = await service.previewVersionGarbage(); + + // Unique models iterated once each + assertEquals(mockRepo.collectGarbageCalls.length, 2); + // All calls must be dry-run + for (const call of mockRepo.collectGarbageCalls) { + assertEquals(call.dryRun, true); + } + // Only m1 has versions to prune + assertEquals(previews.length, 1); + assertEquals(previews[0].modelId, "m1"); + assertEquals(previews[0].versionsWouldBeRemoved, 5); + assertEquals(previews[0].bytesWouldBeReclaimed, 1024); +}); + +Deno.test("previewVersionGarbage - returns empty when no prunes pending", async () => { + const mockRepo = new MockDataRepository(); + const modelType = ModelType.create("test/model"); + + mockRepo.findAllGlobal = () => + Promise.resolve([ + { + data: createMockData({ name: "d1", lifetime: "infinite" }), + modelType, + modelId: "m1", + }, + ]); + + const service = new DefaultDataLifecycleService( + mockRepo as never, + new MockWorkflowRunRepository() as never, + ); + + const previews = await service.previewVersionGarbage(); + + assertEquals(previews.length, 0); +}); diff --git a/src/domain/data/duration.ts b/src/domain/data/duration.ts new file mode 100644 index 00000000..6443c66c --- /dev/null +++ b/src/domain/data/duration.ts @@ -0,0 +1,53 @@ +// Swamp, an Automation Framework +// Copyright (C) 2026 System Initiative, Inc. +// +// This file is part of Swamp. +// +// Swamp is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation, with the Swamp +// Extension and Definition Exception (found in the "COPYING-EXCEPTION" +// file). +// +// Swamp is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Swamp. If not, see . + +/** + * Parses the swamp data duration grammar (e.g. "7d", "30mo", "1y") into + * milliseconds. The grammar is shared between lifetime values and + * garbageCollection policies. + * + * Valid units: `m` (minutes), `h` (hours), `d` (days), `w` (weeks), + * `mo` (months = 30d), `y` (years = 365d). + */ +export function parseDataDuration(duration: string): number { + const match = duration.match(/^(\d+)(mo|y|h|m|d|w)$/); + if (!match) { + throw new Error(`Invalid duration format: ${duration}`); + } + + const value = parseInt(match[1], 10); + const unit = match[2]; + + switch (unit) { + case "mo": + return value * 30 * 24 * 60 * 60 * 1000; + case "y": + return value * 365 * 24 * 60 * 60 * 1000; + case "h": + return value * 60 * 60 * 1000; + case "m": + return value * 60 * 1000; + case "d": + return value * 24 * 60 * 60 * 1000; + case "w": + return value * 7 * 24 * 60 * 60 * 1000; + default: + throw new Error(`Unknown duration unit: ${unit}`); + } +} diff --git a/src/domain/data/duration_test.ts b/src/domain/data/duration_test.ts new file mode 100644 index 00000000..66810d9c --- /dev/null +++ b/src/domain/data/duration_test.ts @@ -0,0 +1,89 @@ +// Swamp, an Automation Framework +// Copyright (C) 2026 System Initiative, Inc. +// +// This file is part of Swamp. +// +// Swamp is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation, with the Swamp +// Extension and Definition Exception (found in the "COPYING-EXCEPTION" +// file). +// +// Swamp is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Swamp. If not, see . + +import { assertEquals, assertThrows } from "@std/assert"; +import { parseDataDuration } from "./duration.ts"; + +Deno.test("parseDataDuration: parses minutes", () => { + assertEquals(parseDataDuration("5m"), 5 * 60 * 1000); +}); + +Deno.test("parseDataDuration: parses hours", () => { + assertEquals(parseDataDuration("2h"), 2 * 60 * 60 * 1000); +}); + +Deno.test("parseDataDuration: parses days", () => { + assertEquals(parseDataDuration("7d"), 7 * 24 * 60 * 60 * 1000); +}); + +Deno.test("parseDataDuration: parses weeks", () => { + assertEquals(parseDataDuration("3w"), 3 * 7 * 24 * 60 * 60 * 1000); +}); + +Deno.test("parseDataDuration: parses months as 30 days", () => { + assertEquals(parseDataDuration("1mo"), 30 * 24 * 60 * 60 * 1000); +}); + +Deno.test("parseDataDuration: parses years as 365 days", () => { + assertEquals(parseDataDuration("1y"), 365 * 24 * 60 * 60 * 1000); +}); + +Deno.test("parseDataDuration: multi-digit values", () => { + assertEquals(parseDataDuration("100d"), 100 * 24 * 60 * 60 * 1000); +}); + +Deno.test("parseDataDuration: rejects missing unit", () => { + assertThrows( + () => parseDataDuration("5"), + Error, + "Invalid duration format", + ); +}); + +Deno.test("parseDataDuration: rejects unknown unit", () => { + assertThrows( + () => parseDataDuration("5s"), + Error, + "Invalid duration format", + ); +}); + +Deno.test("parseDataDuration: rejects negative values", () => { + assertThrows( + () => parseDataDuration("-5d"), + Error, + "Invalid duration format", + ); +}); + +Deno.test("parseDataDuration: rejects empty string", () => { + assertThrows( + () => parseDataDuration(""), + Error, + "Invalid duration format", + ); +}); + +Deno.test("parseDataDuration: rejects whitespace-padded input", () => { + assertThrows( + () => parseDataDuration(" 5d "), + Error, + "Invalid duration format", + ); +}); diff --git a/src/domain/data/mod.ts b/src/domain/data/mod.ts index 0fca8719..8515355f 100644 --- a/src/domain/data/mod.ts +++ b/src/domain/data/mod.ts @@ -19,6 +19,8 @@ export { createDataId, type DataId, generateDataId } from "./data_id.ts"; +export { parseDataDuration } from "./duration.ts"; + export { type DataLifecycle, DataLifecycleSchema, diff --git a/src/infrastructure/persistence/unified_data_repository.ts b/src/infrastructure/persistence/unified_data_repository.ts index 6ee40921..de9a7ebf 100644 --- a/src/infrastructure/persistence/unified_data_repository.ts +++ b/src/infrastructure/persistence/unified_data_repository.ts @@ -30,6 +30,7 @@ import { generateDataId, isReservedDataName, type OwnerDefinition, + parseDataDuration, } from "../../domain/data/mod.ts"; import { ModelType } from "../../domain/models/model_type.ts"; import type { CatalogStore } from "./catalog_store.ts"; @@ -297,13 +298,18 @@ export interface UnifiedDataRepository { /** * Collects garbage according to each data's garbage collection policy. * + * When `options.dryRun` is true, no versions are deleted — the returned + * `versionsRemoved` and `bytesReclaimed` reflect what would be removed. + * * @param type - The model type * @param modelId - The model input ID + * @param options - Options for the operation * @returns The result of garbage collection */ collectGarbage( type: ModelType, modelId: string, + options?: { dryRun?: boolean }, ): Promise; /** @@ -1493,7 +1499,9 @@ export class FileSystemUnifiedDataRepository implements UnifiedDataRepository { async collectGarbage( type: ModelType, modelId: string, + options?: { dryRun?: boolean }, ): Promise { + const dryRun = options?.dryRun ?? false; let versionsRemoved = 0; let bytesReclaimed = 0; @@ -1514,7 +1522,7 @@ export class FileSystemUnifiedDataRepository implements UnifiedDataRepository { } } else { // Keep versions within duration - const duration = this.parseDuration(gc); + const duration = parseDataDuration(gc); const cutoff = Date.now() - duration; for (const version of versions) { @@ -1539,7 +1547,8 @@ export class FileSystemUnifiedDataRepository implements UnifiedDataRepository { versionDir: this.getPath(type, modelId, data.name, version), })); - // Execute in parallel batches + // Execute in parallel batches. For dry-run we still stat each path to + // accumulate bytesReclaimed but skip the actual remove. const GC_BATCH_CONCURRENCY = 20; for (let i = 0; i < removalTasks.length; i += GC_BATCH_CONCURRENCY) { const batch = removalTasks.slice(i, i + GC_BATCH_CONCURRENCY); @@ -1552,10 +1561,12 @@ export class FileSystemUnifiedDataRepository implements UnifiedDataRepository { } catch { // Ignore stat errors } - try { - await Deno.remove(versionDir, { recursive: true }); - } catch (error) { - if (!(error instanceof Deno.errors.NotFound)) throw error; + if (!dryRun) { + try { + await Deno.remove(versionDir, { recursive: true }); + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) throw error; + } } return bytes; }), @@ -1571,8 +1582,9 @@ export class FileSystemUnifiedDataRepository implements UnifiedDataRepository { } } - // Re-scan actual versions after parallel deletions to avoid stale marker - if (versionsToRemove.length > 0) { + // Re-scan actual versions after parallel deletions to avoid stale marker. + // Skip for dry-run — nothing was actually removed. + if (!dryRun && versionsToRemove.length > 0) { const currentVersions = await this.listVersions( type, modelId, @@ -1758,33 +1770,6 @@ export class FileSystemUnifiedDataRepository implements UnifiedDataRepository { .map((b) => b.toString(16).padStart(2, "0")) .join(""); } - - private parseDuration(duration: string): number { - const match = duration.match(/^(\d+)(mo|y|h|m|d|w)$/); - if (!match) { - throw new Error(`Invalid duration format: ${duration}`); - } - - const value = parseInt(match[1], 10); - const unit = match[2]; - - switch (unit) { - case "mo": - return value * 30 * 24 * 60 * 60 * 1000; - case "y": - return value * 365 * 24 * 60 * 60 * 1000; - case "h": - return value * 60 * 60 * 1000; - case "m": - return value * 60 * 1000; - case "d": - return value * 24 * 60 * 60 * 1000; - case "w": - return value * 7 * 24 * 60 * 60 * 1000; - default: - throw new Error(`Unknown duration unit: ${unit}`); - } - } } /** diff --git a/src/libswamp/data/gc.ts b/src/libswamp/data/gc.ts index 4c91c1dc..6faeabee 100644 --- a/src/libswamp/data/gc.ts +++ b/src/libswamp/data/gc.ts @@ -21,6 +21,7 @@ import { DefaultDataLifecycleService, type ExpiredDataInfo, type LifecycleGCResult, + type VersionGcPreviewInfo, } from "../../domain/data/data_lifecycle_service.ts"; import { FileSystemUnifiedDataRepository } from "../../infrastructure/persistence/unified_data_repository.ts"; import { YamlWorkflowRunRepository } from "../../infrastructure/persistence/yaml_workflow_run_repository.ts"; @@ -39,9 +40,18 @@ export interface DataGcPreviewItem { reason: string; } +/** Preview item for a model that has versions to prune via version GC. */ +export interface VersionGcPreviewItem { + type: string; + modelId: string; + versionsWouldBeRemoved: number; + bytesWouldBeReclaimed: number; +} + /** Preview data returned before confirmation. */ export interface DataGcPreview { items: DataGcPreviewItem[]; + versionGcItems: VersionGcPreviewItem[]; } /** Data structure for the data gc completed event. */ @@ -71,6 +81,7 @@ export interface DataGcInput { /** Dependencies for the data gc operation. */ export interface DataGcDeps { findExpiredData: () => Promise; + previewVersionGarbage: () => Promise; deleteExpiredData: (opts: { dryRun: boolean; }) => Promise; @@ -99,6 +110,7 @@ export function createDataGcDeps( ); return { findExpiredData: () => service.findExpiredData(), + previewVersionGarbage: () => service.previewVersionGarbage(), deleteExpiredData: (opts) => service.deleteExpiredData(opts), }; } @@ -108,8 +120,11 @@ export async function dataGcPreview( ctx: LibSwampContext, deps: DataGcDeps, ): Promise { - ctx.logger.debug`Finding expired data`; - const expired = await deps.findExpiredData(); + ctx.logger.debug`Finding expired data and previewing version GC`; + const [expired, versionGc] = await Promise.all([ + deps.findExpiredData(), + deps.previewVersionGarbage(), + ]); return { items: expired.map((item) => ({ type: item.type.toDirectoryPath(), @@ -117,6 +132,12 @@ export async function dataGcPreview( dataName: item.dataName, reason: item.reason, })), + versionGcItems: versionGc.map((item) => ({ + type: item.type.toDirectoryPath(), + modelId: item.modelId, + versionsWouldBeRemoved: item.versionsWouldBeRemoved, + bytesWouldBeReclaimed: item.bytesWouldBeReclaimed, + })), }; } diff --git a/src/libswamp/data/gc_test.ts b/src/libswamp/data/gc_test.ts index 2d39b87b..9258ba67 100644 --- a/src/libswamp/data/gc_test.ts +++ b/src/libswamp/data/gc_test.ts @@ -30,6 +30,7 @@ import { function makeDeps(overrides: Partial = {}): DataGcDeps { return { findExpiredData: () => Promise.resolve([]), + previewVersionGarbage: () => Promise.resolve([]), deleteExpiredData: () => Promise.resolve({ dataEntriesExpired: 0, @@ -48,6 +49,39 @@ Deno.test("dataGcPreview: returns empty preview when no expired data", async () const preview = await dataGcPreview(createLibSwampContext(), deps); assertEquals(preview.items.length, 0); + assertEquals(preview.versionGcItems.length, 0); +}); + +Deno.test("dataGcPreview: aggregates expired and version-gc items", async () => { + const deps = makeDeps({ + findExpiredData: () => + Promise.resolve([ + { + type: { toDirectoryPath: () => "aws/s3-bucket" }, + modelId: "m1", + dataName: "data1", + reason: "duration-expired", + }, + ] as unknown as import("../../domain/data/data_lifecycle_service.ts").ExpiredDataInfo[]), + previewVersionGarbage: () => + Promise.resolve([ + { + type: { toDirectoryPath: () => "command/shell" }, + modelId: "m2", + versionsWouldBeRemoved: 5, + bytesWouldBeReclaimed: 1024, + }, + ] as unknown as import("../../domain/data/data_lifecycle_service.ts").VersionGcPreviewInfo[]), + }); + + const preview = await dataGcPreview(createLibSwampContext(), deps); + + assertEquals(preview.items.length, 1); + assertEquals(preview.items[0].type, "aws/s3-bucket"); + assertEquals(preview.versionGcItems.length, 1); + assertEquals(preview.versionGcItems[0].type, "command/shell"); + assertEquals(preview.versionGcItems[0].versionsWouldBeRemoved, 5); + assertEquals(preview.versionGcItems[0].bytesWouldBeReclaimed, 1024); }); Deno.test("dataGcPreview: returns preview items for expired data", async () => { diff --git a/src/libswamp/mod.ts b/src/libswamp/mod.ts index 27041f62..19802b55 100644 --- a/src/libswamp/mod.ts +++ b/src/libswamp/mod.ts @@ -723,6 +723,7 @@ export { type DataGcPreview, dataGcPreview, type DataGcPreviewItem, + type VersionGcPreviewItem, } from "./data/gc.ts"; // Model create operations diff --git a/src/presentation/renderers/data_gc.ts b/src/presentation/renderers/data_gc.ts index 590340b0..13ab9194 100644 --- a/src/presentation/renderers/data_gc.ts +++ b/src/presentation/renderers/data_gc.ts @@ -84,10 +84,17 @@ export function renderDataGcPreview( mode: OutputMode, ): void { if (mode === "json") { + const totalVersions = preview.versionGcItems.reduce( + (sum, item) => sum + item.versionsWouldBeRemoved, + 0, + ); console.log(JSON.stringify( { expiredDataCount: preview.items.length, expiredData: preview.items, + versionGcModelCount: preview.versionGcItems.length, + versionGcVersionCount: totalVersions, + versionGcData: preview.versionGcItems, }, null, 2, @@ -95,6 +102,14 @@ export function renderDataGcPreview( } else { const logger = getSwampLogger(["data", "gc"]); logger.info`GC preview: ${preview.items.length} expired data items`; + if (preview.versionGcItems.length > 0) { + const totalVersions = preview.versionGcItems.reduce( + (sum, item) => sum + item.versionsWouldBeRemoved, + 0, + ); + logger + .info`version gc: ${preview.versionGcItems.length} models with ${totalVersions} excess versions`; + } } } diff --git a/src/presentation/renderers/data_gc_test.ts b/src/presentation/renderers/data_gc_test.ts new file mode 100644 index 00000000..16caece4 --- /dev/null +++ b/src/presentation/renderers/data_gc_test.ts @@ -0,0 +1,142 @@ +// Swamp, an Automation Framework +// Copyright (C) 2026 System Initiative, Inc. +// +// This file is part of Swamp. +// +// Swamp is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License version 3 +// as published by the Free Software Foundation, with the Swamp +// Extension and Definition Exception (found in the "COPYING-EXCEPTION" +// file). +// +// Swamp is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with Swamp. If not, see . + +import { assertEquals, assertStringIncludes } from "@std/assert"; +import { initializeLogging } from "../../infrastructure/logging/logger.ts"; +import { renderDataGcPreview } from "./data_gc.ts"; + +await initializeLogging({}); + +function captureStdout(fn: () => void): string { + const originalLog = console.log; + const lines: string[] = []; + console.log = (...args: unknown[]) => { + lines.push( + args.map((a) => typeof a === "string" ? a : String(a)).join(" "), + ); + }; + try { + fn(); + } finally { + console.log = originalLog; + } + return lines.join("\n"); +} + +Deno.test("renderDataGcPreview: json mode emits both expired and version-gc fields", () => { + const out = captureStdout(() => + renderDataGcPreview({ + items: [ + { + type: "aws/s3-bucket", + modelId: "m1", + dataName: "d1", + reason: "duration-expired", + }, + ], + versionGcItems: [ + { + type: "command/shell", + modelId: "m2", + versionsWouldBeRemoved: 3, + bytesWouldBeReclaimed: 256, + }, + { + type: "command/shell", + modelId: "m3", + versionsWouldBeRemoved: 2, + bytesWouldBeReclaimed: 128, + }, + ], + }, "json") + ); + const parsed = JSON.parse(out); + assertEquals(parsed.expiredDataCount, 1); + assertEquals(parsed.expiredData.length, 1); + assertEquals(parsed.versionGcModelCount, 2); + assertEquals(parsed.versionGcVersionCount, 5); + assertEquals(parsed.versionGcData.length, 2); +}); + +Deno.test("renderDataGcPreview: json mode with no version-gc work reports zeroes", () => { + const out = captureStdout(() => + renderDataGcPreview({ + items: [], + versionGcItems: [], + }, "json") + ); + const parsed = JSON.parse(out); + assertEquals(parsed.expiredDataCount, 0); + assertEquals(parsed.versionGcModelCount, 0); + assertEquals(parsed.versionGcVersionCount, 0); +}); + +Deno.test("renderDataGcPreview: log mode omits version-gc line when empty", () => { + // Should not throw; the log-mode branch writes via the logger (not console.log), + // so we just verify nothing was written to console.log. + const out = captureStdout(() => + renderDataGcPreview({ + items: [ + { + type: "aws/s3-bucket", + modelId: "m1", + dataName: "d1", + reason: "duration-expired", + }, + ], + versionGcItems: [], + }, "log") + ); + assertEquals(out, ""); +}); + +Deno.test("renderDataGcPreview: log mode includes version-gc data when present", () => { + // Log mode routes through the logger — this test just asserts the call doesn't + // throw with a populated versionGcItems array. + renderDataGcPreview({ + items: [], + versionGcItems: [ + { + type: "command/shell", + modelId: "m2", + versionsWouldBeRemoved: 3, + bytesWouldBeReclaimed: 256, + }, + ], + }, "log"); +}); + +Deno.test("renderDataGcPreview: json output is valid JSON", () => { + const out = captureStdout(() => + renderDataGcPreview({ + items: [], + versionGcItems: [ + { + type: "command/shell", + modelId: "m2", + versionsWouldBeRemoved: 7, + bytesWouldBeReclaimed: 4096, + }, + ], + }, "json") + ); + // Should parse; and the raw output should include the versionGc payload key. + JSON.parse(out); + assertStringIncludes(out, "versionGcData"); +});