diff --git a/ice-rest-catalog/src/test/resources/scenarios/basic-operations/run.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/basic-operations/run.sh.tmpl index 13f640b..b4ffde0 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/basic-operations/run.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/basic-operations/run.sh.tmpl @@ -108,6 +108,12 @@ if command -v aws &>/dev/null && [ -n "{{MINIO_ENDPOINT}}" ]; then fi fi +# Check for files(snapshot and manifest) in the table and write to /tmp/basic_ops_files.txt +{{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_IRIS} > /tmp/basic_ops_files.txt +{{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_PARTITIONED} >> /tmp/basic_ops_files.txt +{{ICE_CLI}} --config {{CLI_CONFIG}} files ${TABLE_SORTED} >> /tmp/basic_ops_files.txt +echo "OK Listed files in tables" + # Cleanup tables then namespace {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_IRIS} {{ICE_CLI}} --config {{CLI_CONFIG}} delete-table ${TABLE_PARTITIONED} diff --git a/ice-rest-catalog/src/test/resources/scenarios/basic-operations/verify.sh.tmpl b/ice-rest-catalog/src/test/resources/scenarios/basic-operations/verify.sh.tmpl index f99539f..59692b4 100644 --- a/ice-rest-catalog/src/test/resources/scenarios/basic-operations/verify.sh.tmpl +++ b/ice-rest-catalog/src/test/resources/scenarios/basic-operations/verify.sh.tmpl @@ -4,7 +4,7 @@ set -e # Verification script - checks that the test completed successfully # Exit code 0 = success, non-zero = failure # Expects run.sh to have written: basic_ops_describe.txt, basic_ops_scan_iris.txt, -# basic_ops_scan_partitioned.txt, basic_ops_scan_sorted.txt under /tmp +# basic_ops_scan_partitioned.txt, basic_ops_scan_sorted.txt, basic_ops_files.txt under /tmp echo "Verifying basic operations test..." @@ -31,8 +31,42 @@ for f in /tmp/basic_ops_scan_iris.txt /tmp/basic_ops_scan_partitioned.txt /tmp/b fi done +# Verify files output contains expected structure (Snapshots/Snapshot/Manifest/Datafile and S3 paths) +F="/tmp/basic_ops_files.txt" +if [ ! -f "$F" ] || [ ! -s "$F" ]; then + echo "FAIL $F not found or empty" + exit 1 +fi +if ! grep -q "Snapshots:" "$F"; then + echo "FAIL $F does not contain expected 'Snapshots:' header" + exit 1 +fi +if ! grep -qE "Snapshot [0-9]+" "$F"; then + echo "FAIL $F does not contain expected Snapshot entry with ID" + exit 1 +fi +if ! grep -q "Manifest:" "$F"; then + echo "FAIL $F does not contain expected 'Manifest:' entry" + exit 1 +fi +if ! grep -q "Datafile:" "$F"; then + echo "FAIL $F does not contain expected 'Datafile:' entry" + exit 1 +fi +if ! grep -qE "s3://test-bucket/warehouse/test_ns/.*/metadata/snap-.*\.avro" "$F"; then + echo "FAIL $F does not contain expected snapshot metadata path pattern" + exit 1 +fi +if ! grep -qE "s3://test-bucket/warehouse/test_ns/.*/metadata/.*-m0\.avro" "$F"; then + echo "FAIL $F does not contain expected manifest path pattern" + exit 1 +fi +if ! grep -qE "s3a://test-bucket/warehouse/test_ns/.*/data/.*\.parquet" "$F"; then + echo "FAIL $F does not contain expected datafile path pattern" + exit 1 +fi # Cleanup temp files -rm -f /tmp/basic_ops_describe.txt /tmp/basic_ops_scan_iris.txt /tmp/basic_ops_scan_partitioned.txt /tmp/basic_ops_scan_sorted.txt +rm -f /tmp/basic_ops_describe.txt /tmp/basic_ops_scan_iris.txt /tmp/basic_ops_scan_partitioned.txt /tmp/basic_ops_scan_sorted.txt /tmp/basic_ops_files.txt echo "OK Verification passed" exit 0 diff --git a/ice/src/main/java/com/altinity/ice/cli/Main.java b/ice/src/main/java/com/altinity/ice/cli/Main.java index a10e1da..e80511c 100644 --- a/ice/src/main/java/com/altinity/ice/cli/Main.java +++ b/ice/src/main/java/com/altinity/ice/cli/Main.java @@ -19,6 +19,7 @@ import com.altinity.ice.cli.internal.cmd.DeleteTable; import com.altinity.ice.cli.internal.cmd.Describe; import com.altinity.ice.cli.internal.cmd.DescribeParquet; +import com.altinity.ice.cli.internal.cmd.Files; import com.altinity.ice.cli.internal.cmd.Insert; import com.altinity.ice.cli.internal.cmd.InsertWatch; import com.altinity.ice.cli.internal.cmd.Scan; @@ -143,6 +144,19 @@ void describe( } } + @CommandLine.Command(name = "files", description = "List files in current snapshot.") + void files( + @CommandLine.Parameters( + arity = "1", + paramLabel = "", + description = "Table name (e.g. ns1.table1)") + String name) + throws IOException { + try (RESTCatalog catalog = loadCatalog()) { + Files.run(catalog, TableIdentifier.parse(name)); + } + } + @CommandLine.Command(name = "describe-parquet", description = "Describe parquet file metadata.") void describeParquet( @CommandLine.Parameters( diff --git a/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Files.java b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Files.java new file mode 100644 index 0000000..03450a9 --- /dev/null +++ b/ice/src/main/java/com/altinity/ice/cli/internal/cmd/Files.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2025 Altinity Inc and/or its affiliates. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +package com.altinity.ice.cli.internal.cmd; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.ManifestFiles; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.rest.RESTCatalog; + +public final class Files { + + private Files() {} + + public static void run(RESTCatalog catalog, TableIdentifier tableId) throws IOException { + Table table = catalog.loadTable(tableId); + Snapshot snapshot = table.currentSnapshot(); + + if (snapshot == null) { + System.out.println("Snapshots: " + tableId); + System.out.println("(no snapshots)"); + return; + } + + String tableName = tableId.toString(); + int schemaId = snapshot.schemaId() != null ? snapshot.schemaId() : 0; + String manifestListLocation = snapshot.manifestListLocation(); + String locationStr = manifestListLocation != null ? manifestListLocation : "(embedded)"; + + System.out.println("Snapshots: " + tableName); + System.out.println( + "└── Snapshot " + snapshot.snapshotId() + ", schema " + schemaId + ": " + locationStr); + + FileIO tableIO = table.io(); + List manifests; + try { + manifests = snapshot.allManifests(tableIO); + } catch (Exception e) { + System.out.println(" (failed to read manifests: " + e.getMessage() + ")"); + return; + } + + for (int m = 0; m < manifests.size(); m++) { + ManifestFile manifest = manifests.get(m); + boolean isLastManifest = (m == manifests.size() - 1); + String manifestPrefix = isLastManifest ? "└── " : "├── "; + String childConnector = isLastManifest ? " " : "│ "; + + List dataFileLocations = new ArrayList<>(); + try (CloseableIterable files = ManifestFiles.read(manifest, tableIO)) { + for (DataFile file : files) { + dataFileLocations.add(file.location()); + } + } catch (Exception e) { + dataFileLocations.add("(failed to read: " + e.getMessage() + ")"); + } + + System.out.println(" " + manifestPrefix + "Manifest: " + manifest.path()); + + String dataFileIndent = " " + childConnector; + for (int f = 0; f < dataFileLocations.size(); f++) { + boolean isLastFile = (f == dataFileLocations.size() - 1); + String filePrefix = isLastFile ? "└── " : "├── "; + System.out.println(dataFileIndent + filePrefix + "Datafile: " + dataFileLocations.get(f)); + } + } + } +}