From c0ff92a178a89dab9a7e8c9c517df33b45310d39 Mon Sep 17 00:00:00 2001 From: Odin Date: Thu, 25 Jun 2026 20:41:51 +0200 Subject: [PATCH] feat(input-mapping): AJDA-2714 add native description and schema to table manifest --- .../src/Configuration/Table/Manifest.php | 12 + .../src/Helper/ManifestCreator.php | 81 +++++ .../Table/Manifest/AdapterTest.php | 9 +- .../Table/TableManifestConfigurationTest.php | 1 + .../tests/Helper/ManifestCreatorTest.php | 276 +++++++++++++++++- 5 files changed, 375 insertions(+), 4 deletions(-) diff --git a/libs/input-mapping/src/Configuration/Table/Manifest.php b/libs/input-mapping/src/Configuration/Table/Manifest.php index 3ee3b9403..957b55ded 100644 --- a/libs/input-mapping/src/Configuration/Table/Manifest.php +++ b/libs/input-mapping/src/Configuration/Table/Manifest.php @@ -27,6 +27,7 @@ public static function configureNode(ArrayNodeDefinition $node): void ->scalarNode('id')->isRequired()->end() ->scalarNode('name')->end() ->scalarNode('uri')->end() + ->scalarNode('description')->end() ->arrayNode('primary_key')->prototype('scalar')->end()->end() ->arrayNode('distribution_key')->prototype('scalar')->end()->end() ->scalarNode('created')->end() @@ -80,6 +81,17 @@ public static function configureNode(ArrayNodeDefinition $node): void ->end() ->defaultValue([]) ->end() + ->arrayNode('schema') + ->prototype('array') + ->children() + ->scalarNode('name')->isRequired()->end() + ->variableNode('data_type')->end() + ->booleanNode('nullable')->end() + ->booleanNode('primary_key')->end() + ->scalarNode('description')->end() + ->end() + ->end() + ->end() ->end() ; // BEFORE MODIFICATION OF THIS CONFIGURATION, READ AND UNDERSTAND diff --git a/libs/input-mapping/src/Helper/ManifestCreator.php b/libs/input-mapping/src/Helper/ManifestCreator.php index 0510c1c33..6e7aba2ef 100644 --- a/libs/input-mapping/src/Helper/ManifestCreator.php +++ b/libs/input-mapping/src/Helper/ManifestCreator.php @@ -27,6 +27,9 @@ public function writeTableManifest( 'last_change_date' => $tableInfo['lastChangeDate'], 'last_import_date' => $tableInfo['lastImportDate'], ]; + if (isset($tableInfo['definition']['description']) && $tableInfo['definition']['description'] !== '') { + $manifest['description'] = $tableInfo['definition']['description']; + } if (isset($tableInfo['s3'])) { $manifest['s3'] = $tableInfo['s3']; } @@ -44,6 +47,11 @@ public function writeTableManifest( $manifest['column_metadata'][$column] = $columnMetadata; } + $schema = $this->buildSchema($tableInfo, $columns); + if ($schema !== null) { + $manifest['schema'] = $schema; + } + $adapter = new TableAdapter($format); try { $adapter->setConfig($manifest); @@ -61,6 +69,79 @@ public function writeTableManifest( } } + /** + * Builds the `schema` node describing each selected column's data type, nullability, primary-key flag and + * description from the table definition. Returns null when the table has no definition. + * + * @param string[] $columns + */ + private function buildSchema(array $tableInfo, array $columns): ?array + { + if (empty($tableInfo['definition']['columns'])) { + return null; + } + + $backend = $tableInfo['bucket']['backend'] ?? null; + $primaryKeysNames = $tableInfo['definition']['primaryKeysNames'] ?? []; + + $definitionColumns = []; + foreach ($tableInfo['definition']['columns'] as $definitionColumn) { + $definitionColumns[$definitionColumn['name']] = $definitionColumn; + } + + $schema = []; + foreach ($columns as $columnName) { + if (!isset($definitionColumns[$columnName])) { + continue; + } + $definitionColumn = $definitionColumns[$columnName]; + $definition = $definitionColumn['definition'] ?? []; + + $column = ['name' => $columnName]; + $dataType = $this->buildColumnDataType($definitionColumn, $backend); + if ($dataType !== []) { + $column['data_type'] = $dataType; + } + if (isset($definition['nullable'])) { + $column['nullable'] = $definition['nullable']; + } + $column['primary_key'] = in_array($columnName, $primaryKeysNames, true); + if (isset($definition['description']) && $definition['description'] !== '') { + $column['description'] = $definition['description']; + } + $schema[] = $column; + } + + return $schema; + } + + /** + * Builds the `data_type` node for a single column: the backend-agnostic `base` type plus the type as it exists + * on the table's actual backend (length and default are included only when present in the definition). Returns + * an empty array for non-typed columns, whose definition carries no type information. + */ + private function buildColumnDataType(array $definitionColumn, ?string $backend): array + { + $definition = $definitionColumn['definition'] ?? []; + + $dataType = []; + if (isset($definitionColumn['basetype'])) { + $dataType['base'] = ['type' => $definitionColumn['basetype']]; + } + if ($backend !== null && isset($definition['type'])) { + $backendType = ['type' => $definition['type']]; + if (isset($definition['length'])) { + $backendType['length'] = $definition['length']; + } + if (isset($definition['default'])) { + $backendType['default'] = $definition['default']; + } + $dataType[$backend] = $backendType; + } + + return $dataType; + } + public function createFileManifest(array $fileInfo): array { return [ diff --git a/libs/input-mapping/tests/Configuration/Table/Manifest/AdapterTest.php b/libs/input-mapping/tests/Configuration/Table/Manifest/AdapterTest.php index ca6581406..b982fb35a 100644 --- a/libs/input-mapping/tests/Configuration/Table/Manifest/AdapterTest.php +++ b/libs/input-mapping/tests/Configuration/Table/Manifest/AdapterTest.php @@ -45,7 +45,8 @@ public function setConfigAndSerializeData(): Generator "distribution_key": [], "columns": [], "metadata": [], - "column_metadata": [] + "column_metadata": [], + "schema": [] } EOF, ]; @@ -58,6 +59,7 @@ public function setConfigAndSerializeData(): Generator columns: { } metadata: { } column_metadata: { } +schema: { } EOF, ]; @@ -97,7 +99,8 @@ public function fileOperationsData(): Generator "distribution_key": [], "columns": [], "metadata": [], - "column_metadata": [] + "column_metadata": [], + "schema": [] } EOF, ]; @@ -110,6 +113,7 @@ public function fileOperationsData(): Generator columns: { } metadata: { } column_metadata: { } +schema: { } EOF, ]; @@ -139,6 +143,7 @@ public function testFileOperations( 'columns' => [], 'metadata' => [], 'column_metadata' => [], + 'schema' => [], ], $adapter->readFromFile($filePathname)); } diff --git a/libs/input-mapping/tests/Configuration/Table/TableManifestConfigurationTest.php b/libs/input-mapping/tests/Configuration/Table/TableManifestConfigurationTest.php index 8810547a3..826cc2be5 100644 --- a/libs/input-mapping/tests/Configuration/Table/TableManifestConfigurationTest.php +++ b/libs/input-mapping/tests/Configuration/Table/TableManifestConfigurationTest.php @@ -56,6 +56,7 @@ public function testConfiguration( 'column_metadata' => $columnsMetadata, ]; $expectedResponse = $config; + $expectedResponse['schema'] = []; $processedConfiguration = (new Manifest())->parse(['config' => $config]); self::assertEquals($expectedResponse, $processedConfiguration); diff --git a/libs/input-mapping/tests/Helper/ManifestCreatorTest.php b/libs/input-mapping/tests/Helper/ManifestCreatorTest.php index 5cc33014c..9964449f9 100644 --- a/libs/input-mapping/tests/Helper/ManifestCreatorTest.php +++ b/libs/input-mapping/tests/Helper/ManifestCreatorTest.php @@ -132,7 +132,8 @@ public function writeTableManifestData(): iterable ], "foo": [], "bar": [] - } + }, + "schema": [] } EOF, ]; @@ -175,7 +176,8 @@ public function writeTableManifestData(): iterable "timestamp": "2022-06-03T06:31:43+0200" } ] - } + }, + "schema": [] } EOF, ]; @@ -222,6 +224,7 @@ public function writeTableManifestData(): iterable timestamp: '2022-06-03T06:31:43+0200' foo: { } bar: { } +schema: { } EOF, ]; @@ -256,11 +259,280 @@ public function writeTableManifestData(): iterable value: TEXT provider: input-mapping timestamp: '2022-06-03T06:31:43+0200' +schema: { } EOF, ]; } + public function testWriteTableManifestBuildsSchemaFromDefinition(): void + { + $temp = new Temp('docker'); + $filePathname = (string) $temp->createTmpFile(); + + $tableInfo = $this->getTableInfoWithDefinition(); + + $manifestCreator = new ManifestCreator(); + $manifestCreator->writeTableManifest($tableInfo, $filePathname, [], FileFormat::Json); + + $manifest = (array) json_decode((string) file_get_contents($filePathname), true); + + // table description comes from the native definition field + self::assertSame('native table description', $manifest['description']); + + // schema is built from the native definition: data types reflect the table's backend, descriptions and + // defaults are native-only, columns without a native description simply omit the description key + self::assertEquals( + [ + [ + 'name' => 'id', + 'data_type' => [ + 'base' => ['type' => 'INTEGER'], + 'snowflake' => ['type' => 'NUMBER', 'length' => '38,0'], + ], + 'nullable' => false, + 'primary_key' => true, + 'description' => 'native id description', + ], + [ + 'name' => 'name', + 'data_type' => [ + 'base' => ['type' => 'STRING'], + 'snowflake' => ['type' => 'VARCHAR', 'length' => '16777216'], + ], + 'nullable' => true, + 'primary_key' => false, + ], + [ + 'name' => 'size', + 'data_type' => [ + 'base' => ['type' => 'INTEGER'], + 'snowflake' => ['type' => 'NUMBER', 'length' => '38,0'], + ], + 'nullable' => true, + 'primary_key' => false, + 'description' => 'native size description', + ], + [ + 'name' => 'flag', + 'data_type' => [ + 'base' => ['type' => 'INTEGER'], + 'snowflake' => ['type' => 'INT', 'default' => '12'], + ], + 'nullable' => true, + 'primary_key' => false, + ], + ], + $manifest['schema'], + ); + + // legacy metadata structures are left completely untouched (Connection backfills KBC.description there) + self::assertEquals($tableInfo['metadata'], $manifest['metadata']); + self::assertEquals($tableInfo['columnMetadata']['id'], $manifest['column_metadata']['id']); + } + + public function testWriteTableManifestEmptySchemaWhenNoDefinition(): void + { + $temp = new Temp('docker'); + $filePathname = (string) $temp->createTmpFile(); + + $tableInfo = $this->getTableInfoWithDefinition(); + unset($tableInfo['definition']); + + $manifestCreator = new ManifestCreator(); + $manifestCreator->writeTableManifest($tableInfo, $filePathname, [], FileFormat::Json); + + $manifest = (array) json_decode((string) file_get_contents($filePathname), true); + + self::assertSame([], $manifest['schema']); + self::assertArrayNotHasKey('description', $manifest); + self::assertEquals($tableInfo['metadata'], $manifest['metadata']); + } + + public function testWriteTableManifestEmptyTableDescriptionIsOmitted(): void + { + $temp = new Temp('docker'); + $filePathname = (string) $temp->createTmpFile(); + + $tableInfo = $this->getTableInfoWithDefinition(); + $tableInfo['definition']['description'] = ''; + $tableInfo['definition']['columns'][0]['definition']['description'] = ''; + + $manifestCreator = new ManifestCreator(); + $manifestCreator->writeTableManifest($tableInfo, $filePathname, [], FileFormat::Json); + + $manifest = (array) json_decode((string) file_get_contents($filePathname), true); + $schema = $manifest['schema']; + self::assertIsArray($schema); + self::assertIsArray($schema[0]); + self::assertIsArray($schema[2]); + + // empty native description is treated as absent + self::assertArrayNotHasKey('description', $manifest); + self::assertArrayNotHasKey('description', $schema[0]); + // a sibling column keeps its non-empty native description + self::assertSame('native size description', $schema[2]['description']); + } + + public function testWriteTableManifestSchemaRespectsColumnSelection(): void + { + $temp = new Temp('docker'); + $filePathname = (string) $temp->createTmpFile(); + + $tableInfo = $this->getTableInfoWithDefinition(); + + $manifestCreator = new ManifestCreator(); + $manifestCreator->writeTableManifest($tableInfo, $filePathname, ['name'], FileFormat::Json); + + $manifest = (array) json_decode((string) file_get_contents($filePathname), true); + $schema = $manifest['schema']; + self::assertIsArray($schema); + self::assertIsArray($schema[0]); + + // schema only contains the selected columns, in the selected order + self::assertCount(1, $schema); + self::assertSame('name', $schema[0]['name']); + } + + public function testWriteTableManifestBuildsSchemaForNonTypedTable(): void + { + $temp = new Temp('docker'); + $filePathname = (string) $temp->createTmpFile(); + + // a non-typed table still has a definition with column names and primary keys, but the column definitions + // carry no data types - only an optional native description + $tableInfo = [ + 'uri' => 'https://connection.keboola.com/v2/storage/tables/in.c-docker-test.test', + 'id' => 'in.c-docker-test.test', + 'name' => 'test', + 'primaryKey' => ['Id'], + 'distributionKey' => [], + 'created' => '2022-06-03T01:31:43+0200', + 'lastChangeDate' => '2022-06-03T02:31:43+0200', + 'lastImportDate' => '2022-06-03T03:31:43+0200', + 'columns' => ['Id', 'Name'], + 'bucket' => ['id' => 'in.c-docker-test', 'backend' => 'snowflake'], + 'metadata' => [], + 'columnMetadata' => ['Id' => [], 'Name' => []], + 'definition' => [ + 'primaryKeysNames' => ['Id'], + 'columns' => [ + ['name' => 'Id', 'definition' => ['description' => 'native id description']], + ['name' => 'Name', 'definition' => []], + ], + ], + ]; + + $manifestCreator = new ManifestCreator(); + $manifestCreator->writeTableManifest($tableInfo, $filePathname, [], FileFormat::Json); + + $manifest = (array) json_decode((string) file_get_contents($filePathname), true); + + // schema is emitted with no data types; native description still propagates, primary key still resolved + self::assertEquals( + [ + [ + 'name' => 'Id', + 'primary_key' => true, + 'description' => 'native id description', + ], + [ + 'name' => 'Name', + 'primary_key' => false, + ], + ], + $manifest['schema'], + ); + // no native table description -> top-level description omitted + self::assertArrayNotHasKey('description', $manifest); + } + + private function getTableInfoWithDefinition(): array + { + return [ + 'uri' => 'https://connection.keboola.com/v2/storage/tables/in.c-docker-test.test', + 'id' => 'in.c-docker-test.test', + 'name' => 'test', + 'primaryKey' => ['id'], + 'distributionKey' => [], + 'created' => '2022-06-03T01:31:43+0200', + 'lastChangeDate' => '2022-06-03T02:31:43+0200', + 'lastImportDate' => '2022-06-03T03:31:43+0200', + 'columns' => ['id', 'name', 'size', 'flag'], + 'bucket' => [ + 'id' => 'in.c-docker-test', + 'backend' => 'snowflake', + ], + 'metadata' => [ + [ + 'id' => '123', + 'key' => 'KBC.description', + 'value' => 'stale metadata description', + 'provider' => 'storage', + 'timestamp' => '2022-06-03T04:31:43+0200', + ], + ], + 'columnMetadata' => [ + 'id' => [ + [ + 'id' => '456', + 'key' => 'KBC.description', + 'value' => 'stale id metadata description', + 'provider' => 'storage', + 'timestamp' => '2022-06-03T05:31:43+0200', + ], + ], + 'name' => [], + 'size' => [], + 'flag' => [], + ], + 'definition' => [ + 'primaryKeysNames' => ['id'], + 'columns' => [ + [ + 'name' => 'id', + 'definition' => [ + 'type' => 'NUMBER', + 'nullable' => false, + 'length' => '38,0', + 'description' => 'native id description', + ], + 'basetype' => 'INTEGER', + ], + [ + 'name' => 'name', + 'definition' => [ + 'type' => 'VARCHAR', + 'nullable' => true, + 'length' => '16777216', + ], + 'basetype' => 'STRING', + ], + [ + 'name' => 'size', + 'definition' => [ + 'type' => 'NUMBER', + 'nullable' => true, + 'length' => '38,0', + 'description' => 'native size description', + ], + 'basetype' => 'INTEGER', + ], + [ + 'name' => 'flag', + 'definition' => [ + 'type' => 'INT', + 'nullable' => true, + 'default' => '12', + ], + 'basetype' => 'INTEGER', + ], + ], + 'description' => 'native table description', + ], + ]; + } + /** * @dataProvider writeTableManifestData */