diff --git a/.github/workflows/hive-postgres-tpcds-metastore.yml b/.github/workflows/hive-postgres-tpcds-metastore.yml new file mode 100644 index 000000000000..db4a62ad8092 --- /dev/null +++ b/.github/workflows/hive-postgres-tpcds-metastore.yml @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Build and Publish Postgres TPC-DS Metastore Image +permissions: + contents: read + +on: + workflow_dispatch: + inputs: + imageTag: + description: 'Docker image tag. Use "test" for experimentation purposes and proper semantic versioning (e.g., 1.4) when pushing to production. CAUTION: Choose the tag carefully to avoid overwriting existing images.' + required: false + default: 'test' + pushImage: + description: 'Push image to Docker Hub? (true/false)' + required: false + default: false + type: boolean + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd + + - name: Login to Docker Hub + if: ${{ github.event.inputs.pushImage == 'true' }} + uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and optionally push Docker image + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 + with: + context: ./standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/ + file: ./standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile + push: ${{ github.event.inputs.pushImage == 'true' }} + tags: ${{ github.repository_owner }}/hive-postgres-tpcds-metastore:${{ github.event.inputs.imageTag }} diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 77e875c28360..575b1fa89318 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3868,6 +3868,10 @@ public static enum ConfVars { "SSL certificate keystore location for HiveServer2 WebUI."), HIVE_SERVER2_WEBUI_SSL_KEYSTORE_PASSWORD("hive.server2.webui.keystore.password", "", "SSL certificate keystore password for HiveServer2 WebUI."), + HIVE_SERVER2_WEBUI_SSL_KEYSTORE_RELOAD_INTERVAL("hive.server2.webui.keystore.reload.interval", "0", + new TimeValidator(TimeUnit.MILLISECONDS), + "Interval at which HiveServer2 WebUI checks the SSL keystore file for changes; " + + "set to 0 to disable auto-reload. The default is 0."), HIVE_SERVER2_WEBUI_SSL_KEYSTORE_TYPE("hive.server2.webui.keystore.type", "", "SSL certificate keystore type for HiveServer2 WebUI."), HIVE_SERVER2_WEBUI_SSL_INCLUDE_CIPHERSUITES("hive.server2.webui.include.ciphersuites", "", @@ -5115,6 +5119,11 @@ public static enum ConfVars { LLAP_ZK_REGISTRY_NAMESPACE("hive.llap.zk.registry.namespace", null, "In the LLAP ZooKeeper-based registry, overrides the ZK path namespace. Note that\n" + "using this makes the path management (e.g. setting correct ACLs) your responsibility."), + LLAP_CLUSTER_ROUTING_RULES("hive.llap.cluster.routing.rules", "", + "Comma-separated rules mapping users/groups to LLAP cluster names.\n" + + "Format: user:=,group:=,default=.\n" + + "Per-cluster configs are read from hive.llap.cluster..sessions.namespace\n" + + "and hive.llap.cluster..registry.namespace."), // Note: do not rename to ..service.acl; Hadoop generates .hosts setting name from this, // resulting in a collision with existing hive.llap.daemon.service.hosts and bizarre errors. // These are read by Hadoop IPC, so you should check the usage and naming conventions (e.g. @@ -5569,7 +5578,8 @@ public static enum ConfVars { "hive.iceberg.allow.datafiles.in.table.location.only," + "hive.hook.proto.base-directory," + "hive.rewrite.data.policy," + - "hive.query.history.enabled", // Query History service is initialized on HS2 startup (HIVE-29170) + "hive.query.history.enabled," + // Query History service is initialized on HS2 startup (HIVE-29170) + "hive.llap.cluster.routing.rules", "Comma separated list of configuration options which are immutable at runtime"), HIVE_CONF_HIDDEN_LIST("hive.conf.hidden.list", METASTORE_PWD.varname + "," + HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 57f7cadac181..156d7c7ebe76 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -445,6 +445,7 @@ public enum ErrorMsg { @Deprecated // kept for backwards reference REPLACE_VIEW_WITH_MATERIALIZED(10400, "Attempt to replace view {0} with materialized view", true), REPLACE_MATERIALIZED_WITH_VIEW(10401, "Attempt to replace materialized view {0} with view", true), + VIEW_STORAGE_HANDLER_UNSUPPORTED(10448, "Storage handler {0} doesn't support external logical views", true), UPDATE_DELETE_VIEW(10402, "You cannot update or delete records in a view"), MATERIALIZED_VIEW_DEF_EMPTY(10403, "Query for the materialized view rebuild could not be retrieved"), MERGE_PREDIACTE_REQUIRED(10404, "MERGE statement with both UPDATE and DELETE clauses " + diff --git a/common/src/java/org/apache/hive/http/HttpServer.java b/common/src/java/org/apache/hive/http/HttpServer.java index dd9e66f92b6b..7169c0929eb4 100644 --- a/common/src/java/org/apache/hive/http/HttpServer.java +++ b/common/src/java/org/apache/hive/http/HttpServer.java @@ -35,6 +35,8 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.Timer; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -51,6 +53,7 @@ import javax.servlet.http.HttpServletRequestWrapper; import javax.servlet.http.HttpServletResponse; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.lang3.StringUtils; @@ -66,6 +69,7 @@ import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.security.http.CrossOriginFilter; +import org.apache.hadoop.security.ssl.FileMonitoringTimerTask; import org.apache.hive.http.security.PamAuthenticator; import org.apache.hive.http.security.PamConstraint; import org.apache.hive.http.security.PamConstraintMapping; @@ -140,6 +144,8 @@ public class HttpServer { private Server webServer; private QueuedThreadPool threadPool; private PortHandlerWrapper portHandlerWrapper; + @VisibleForTesting + Timer keystoreChangeMonitor; /** * Create a status server on the given port. @@ -360,6 +366,10 @@ public void start() throws Exception { } public void stop() throws Exception { + if (this.keystoreChangeMonitor != null) { + this.keystoreChangeMonitor.cancel(); + this.keystoreChangeMonitor = null; + } webServer.stop(); } @@ -695,6 +705,11 @@ ServerConnector createAndAddChannelConnector(int queueSize, Builder b) { new String[excludedSSLProtocols.size()])); sslContextFactory.setKeyStorePassword(b.keyStorePassword); connector = new ServerConnector(webServer, sslContextFactory, http); + + long reloadInterval = b.conf.getTimeVar(ConfVars.HIVE_SERVER2_WEBUI_SSL_KEYSTORE_RELOAD_INTERVAL, TimeUnit.MILLISECONDS); + if (reloadInterval > 0) { + this.keystoreChangeMonitor = createKeystoreChangeMonitor(reloadInterval, b.keyStorePath, sslContextFactory); + } } connector.setAcceptQueueSize(queueSize); @@ -706,6 +721,36 @@ ServerConnector createAndAddChannelConnector(int queueSize, Builder b) { return connector; } + @VisibleForTesting + void setKeystoreChangeMonitor(Timer monitor) { + keystoreChangeMonitor = monitor; + } + + @VisibleForTesting + Timer createKeystoreChangeMonitor(long reloadInterval, String keyStorePath, + SslContextFactory sslContextFactory) { + LOG.info("Starting SSL Certificates Store Monitor. reload interval: {}ms, keyStorePath: {}", reloadInterval, keyStorePath); + Timer timer = new Timer("SSL Certificates Store Monitor", true); + // + // The Jetty SSLContextFactory provides a 'reload' method which will reload both + // truststore and keystore certificates. + // + timer.schedule(new FileMonitoringTimerTask( + Paths.get(keyStorePath), + path -> { + LOG.info("Reloading certificates from store keystore {}", keyStorePath); + try { + sslContextFactory.reload(factory -> { }); + } catch (Exception ex) { + LOG.error("Failed to reload SSL keystore certificates", ex); + } + },null), + reloadInterval, + reloadInterval + ); + return timer; + } + /** * Secure the web server with PAM. */ diff --git a/common/src/test/org/apache/hive/http/TestHttpServer.java b/common/src/test/org/apache/hive/http/TestHttpServer.java new file mode 100644 index 000000000000..75e27f1e6429 --- /dev/null +++ b/common/src/test/org/apache/hive/http/TestHttpServer.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.http; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.eclipse.jetty.util.ssl.SslContextFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.FileTime; +import java.util.Timer; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.CALLS_REAL_METHODS; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.withSettings; + +/** + * Tests for the SSL keystore auto-reload feature wired in via + * {@code HttpServer#makeConfigurationChangeMonitor} and the surrounding + * {@code configurationChangeMonitor} field. See HiveConf + * {@code hive.server2.webui.keystore.reload.interval}. + */ +public class TestHttpServer { + + private Path keystore; + private Timer timer; + + @Before + public void setUp() throws Exception { + keystore = Files.createTempFile("test-keystore-", ".jks"); + Files.write(keystore, "initial-content".getBytes()); + } + + @After + public void tearDown() throws Exception { + if (timer != null) { + timer.cancel(); + } + if (keystore != null) { + Files.deleteIfExists(keystore); + } + } + + /** + * When the watched keystore file is modified, the scheduled + * {@code FileMonitoringTimerTask} must invoke + * {@code SslContextFactory#reload}. + */ + @Test(timeout = 10_000) + public void testMonitorReloadsSslContextOnKeystoreModification() throws Exception { + SslContextFactory sslContextFactory = mock(SslContextFactory.class); + CountDownLatch reloadCalled = new CountDownLatch(1); + doAnswer(invocation -> { + reloadCalled.countDown(); + return null; + }).when(sslContextFactory).reload(any()); + + timer = invokeMakeMonitor(100L, keystore.toString(), sslContextFactory); + + // Bump mtime to guarantee a detected change (FileMonitoringTimerTask compares mtimes). + Files.setLastModifiedTime(keystore, FileTime.fromMillis(System.currentTimeMillis() + 5_000)); + + assertTrue("SslContextFactory#reload was not called within 5s of keystore mtime change", + reloadCalled.await(5, TimeUnit.SECONDS)); + verify(sslContextFactory, atLeastOnce()).reload(any()); + } + + /** + * Reload failures must be swallowed so a transient bad keystore can't take HS2 down; + * the next mtime change should still trigger another reload attempt. + */ + @Test(timeout = 10_000) + public void testMonitorSurvivesReloadException() throws Exception { + SslContextFactory sslContextFactory = mock(SslContextFactory.class); + CountDownLatch reloadCalled = new CountDownLatch(2); + doAnswer(invocation -> { + reloadCalled.countDown(); + throw new RuntimeException("simulated keystore reload failure"); + }).when(sslContextFactory).reload(any()); + + timer = invokeMakeMonitor(100L, keystore.toString(), sslContextFactory); + + Files.setLastModifiedTime(keystore, FileTime.fromMillis(System.currentTimeMillis() + 5_000)); + Thread.sleep(300); + Files.setLastModifiedTime(keystore, FileTime.fromMillis(System.currentTimeMillis() + 10_000)); + + assertTrue("Monitor should keep firing reload attempts even after exceptions", + reloadCalled.await(5, TimeUnit.SECONDS)); + } + + /** + * {@code stop()} must cancel the monitor Timer when one was installed, + * so the daemon thread does not outlive HS2. + */ + @Test + public void testStopCancelsConfigurationChangeMonitor() throws Exception { + HttpServer server = mock(HttpServer.class, withSettings().defaultAnswer(CALLS_REAL_METHODS)); + + // Track whether cancel() was invoked on the installed timer. + boolean[] cancelled = {false}; + Timer installed = new Timer("test-monitor", true) { + @Override + public void cancel() { + cancelled[0] = true; + super.cancel(); + } + }; + server.setKeystoreChangeMonitor(installed); + + // stop() also calls webServer.stop(); webServer is null on a mock, so we expect + // a NullPointerException after the cancel path runs. + try { + server.stop(); + } catch (NullPointerException expected) { + // intentionally ignored — we only assert the monitor was cancelled + } + assertTrue("Timer#cancel should have been invoked from stop()", cancelled[0]); + } + + /** + * No monitor installed → stop() must not blow up trying to cancel a missing Timer. + * (Mockito skips field initializers, so we re-establish the production default + * {@code Optional.empty()} on the mock before exercising stop().) + */ + @Test + public void testStopWithoutMonitorDoesNotThrowFromCancelPath() throws Exception { + HttpServer server = mock(HttpServer.class, withSettings().defaultAnswer(CALLS_REAL_METHODS)); + server.setKeystoreChangeMonitor(null); + assertNull("keystoreChangeMonitor should be empty for this case", server.keystoreChangeMonitor); + + try { + server.stop(); + } catch (NullPointerException expectedFromWebServerStop) { + // ok — the monitor branch must not have thrown before reaching webServer.stop() + } + } + + // ---- reflection helpers ------------------------------------------------ + + private static Timer invokeMakeMonitor(long intervalMs, String keystorePath, + SslContextFactory sslContextFactory) throws Exception { + HttpServer server = mock(HttpServer.class, withSettings().defaultAnswer(CALLS_REAL_METHODS)); + return server.createKeystoreChangeMonitor(intervalMs, keystorePath, sslContextFactory); + } +} diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index fed4a71a01fa..34825412fdb6 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -1410,10 +1410,13 @@ private void addNotificationLogBatch(List eventList, List eventList, List catalogProps = IcebergCatalogProperties.getCatalogProperties(conf, catalogName); + Catalog catalog = CatalogUtil.buildIcebergCatalog(catalogName, catalogProps, conf); + runWithCatalog(catalog, () -> loadAndApplyView(hmsTable, conf, catalog, catalogName, identifier)); + } + + private static void loadAndApplyView( + org.apache.hadoop.hive.metastore.api.Table hmsTable, + Configuration conf, + Catalog catalog, + String catalogName, + TableIdentifier identifier) { + ViewCatalog viewCatalog = asViewCatalog(catalog, catalogName); + MetastoreUtil.applyIcebergViewToHmsTable(hmsTable, viewCatalog.loadView(identifier), conf); + } + + public static void createOrReplaceView( + Configuration conf, + String databaseName, + String viewName, + List fieldSchemas, + String viewSql, + Map tblProperties, + String comment) { + + TableIdentifier identifier = TableIdentifier.of(databaseName, viewName); + String catalogName = IcebergCatalogProperties.getCatalogName(conf); + Map catalogProps = IcebergCatalogProperties.getCatalogProperties(conf, catalogName); + Catalog catalog = CatalogUtil.buildIcebergCatalog(catalogName, catalogProps, conf); + runWithCatalog( + catalog, + () -> commitView(catalog, catalogName, identifier, fieldSchemas, viewSql, tblProperties, comment)); + } + + /** + * Runs {@code action} with {@code catalog}, then closes it when the catalog implements + * {@link Closeable} (e.g. REST catalog clients). + */ + private static void runWithCatalog(Catalog catalog, Runnable action) { + if (catalog instanceof Closeable closeable) { + try (Closeable ignored = closeable) { + action.run(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to close Iceberg catalog", e); + } + } else { + action.run(); + } + } + + private static void commitView( + Catalog catalog, + String catalogName, + TableIdentifier identifier, + List fieldSchemas, + String viewSql, + Map tblProperties, + String comment) { + ViewCatalog viewCatalog = asViewCatalog(catalog, catalogName); + + ViewBuilder builder = + viewCatalog + .buildView(identifier) + .withSchema(HiveSchemaUtil.convert(fieldSchemas, Collections.emptyMap(), true)) + .withDefaultNamespace(Namespace.of(identifier.namespace().level(0))) + .withQuery("hive", viewSql); + + if (StringUtils.isNotBlank(comment)) { + builder = builder.withProperty("comment", comment); + } + + Map tblProps = + tblProperties == null ? Maps.newHashMap() : Maps.newHashMap(tblProperties); + + builder.withProperties(tblProps); + + builder.createOrReplace(); + } + + private static ViewCatalog asViewCatalog(Catalog catalog, String catalogName) { + if (catalog instanceof ViewCatalog viewCatalog) { + return viewCatalog; + } + throw new UnsupportedOperationException( + String.format( + "Iceberg catalog '%s' does not implement ViewCatalog.", + catalogName) + + " Iceberg views require a catalog that implements ViewCatalog (e.g. HiveCatalog or REST)."); + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index 95e1e5b36623..94b89cb8c0e3 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -36,8 +36,10 @@ import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.BaseTable; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.Schema; @@ -46,6 +48,11 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.view.BaseView; +import org.apache.iceberg.view.SQLViewRepresentation; +import org.apache.iceberg.view.View; +import org.apache.iceberg.view.ViewMetadata; import org.apache.thrift.TException; public class MetastoreUtil { @@ -134,7 +141,11 @@ public static Table toHiveTable(org.apache.iceberg.Table table, Configuration co result.setDbName(tableName.getDb()); result.setTableName(tableName.getTable()); result.setTableType(TableType.EXTERNAL_TABLE.toString()); - result.setPartitionKeys(getPartitionKeys(table, table.spec().specId())); + + // TODO: Revert after HIVE-29633 is fixed + // result.setPartitionKeys(getPartitionKeys(table, table.spec().specId())); + result.setPartitionKeys(Lists.newArrayList()); + TableMetadata metadata = ((BaseTable) table).operations().current(); long maxHiveTablePropertySize = conf.getLong(HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE, HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); @@ -142,12 +153,89 @@ public static Table toHiveTable(org.apache.iceberg.Table table, Configuration co null, true, maxHiveTablePropertySize, null); String catalogType = IcebergCatalogProperties.getCatalogType(conf); if (!StringUtils.isEmpty(catalogType) && !IcebergCatalogProperties.NO_CATALOG_TYPE.equals(catalogType)) { - result.getParameters().put(CatalogUtil.ICEBERG_CATALOG_TYPE, IcebergCatalogProperties.getCatalogType(conf)); + result.getParameters().put(CatalogUtil.ICEBERG_CATALOG_TYPE, catalogType); } result.setSd(getHiveStorageDescriptor(table)); return result; } + /** + * Builds a minimal HMS {@link Table} shell for Iceberg view (identity, view type, + * and Iceberg storage-handler markers only). The storage handler {@code postGetTable} hook enriches + * this object via {@link IcebergViewSupport#enrichHmsTableFromIcebergView} (view SQL, + * schema, and Iceberg parameters). + */ + public static Table buildMinimalHMSView(String catName, String dbName, String tableName) { + Table result = new Table(); + result.setCatName(catName); + result.setDbName(dbName); + result.setTableName(tableName); + result.setTableType(TableType.VIRTUAL_VIEW.toString()); + + Map parameters = Maps.newHashMap(); + parameters.put( + BaseMetastoreTableOperations.TABLE_TYPE_PROP, HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE); + parameters.put( + hive_metastoreConstants.META_TABLE_STORAGE, HMSTablePropertyHelper.HIVE_ICEBERG_STORAGE_HANDLER); + result.setParameters(parameters); + return result; + } + + /** + * Applies Iceberg view metadata (SQL, schema, params) onto an existing HMS {@link Table}. + */ + public static void applyIcebergViewToHmsTable(Table hmsTable, View view, Configuration conf) { + ViewMetadata metadata = ((BaseView) view).operations().current(); + String sqlText = viewSqlText(view, metadata); + + boolean hiveEngineEnabled = false; + hmsTable.setSd(HiveOperationsBase.storageDescriptor(metadata.schema(), metadata.location(), hiveEngineEnabled)); + StorageDescriptor sd = hmsTable.getSd(); + + if (sd.getBucketCols() == null) { + sd.setBucketCols(Lists.newArrayList()); + } + + if (sd.getSortCols() == null) { + sd.setSortCols(Lists.newArrayList()); + } + + long maxHiveTablePropertySize = + conf.getLong( + HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE, + HiveOperationsBase.HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); + HMSTablePropertyHelper.updateHmsTableForIcebergView( + metadata.metadataFileLocation(), + hmsTable, + metadata, + Collections.emptySet(), + maxHiveTablePropertySize, + null); + + hmsTable.setCreateTime((int) (metadata.version(1).timestampMillis() / 1000)); + hmsTable.setLastAccessTime((int) (metadata.currentVersion().timestampMillis() / 1000)); + hmsTable.setOwner( + PropertyUtil.propertyAsString( + metadata.properties(), HiveCatalog.HMS_TABLE_OWNER, HiveHadoopUtil.currentUser())); + + // In-memory overlay for compile/describe: authoritative SQL comes from Iceberg metadata. + hmsTable.setViewOriginalText(sqlText); + hmsTable.setViewExpandedText(sqlText); + + String catalogType = IcebergCatalogProperties.getCatalogType(conf); + if (!StringUtils.isEmpty(catalogType) && !IcebergCatalogProperties.NO_CATALOG_TYPE.equals(catalogType)) { + hmsTable.getParameters().put(CatalogUtil.ICEBERG_CATALOG_TYPE, IcebergCatalogProperties.getCatalogType(conf)); + } + } + + private static String viewSqlText(View view, ViewMetadata metadata) { + SQLViewRepresentation hiveRepr = view.sqlFor("hive"); + if (hiveRepr != null) { + return hiveRepr.sql(); + } + return HiveViewOperations.sqlFor(metadata); + } + private static StorageDescriptor getHiveStorageDescriptor(org.apache.iceberg.Table table) { var result = new StorageDescriptor(); result.setCols(HiveSchemaUtil.convert(table.schema())); diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/client/HiveRESTCatalogClient.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/client/HiveRESTCatalogClient.java index 4390d5a0bca1..30f9e7dcb461 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/client/HiveRESTCatalogClient.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/client/HiveRESTCatalogClient.java @@ -21,12 +21,15 @@ import java.io.IOException; import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Properties; +import java.util.Set; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.CreateTableRequest; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.DropDatabaseRequest; @@ -38,16 +41,20 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.client.BaseMetaStoreClient; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.Schema; import org.apache.iceberg.SortOrder; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.catalog.ViewCatalog; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hive.HMSTablePropertyHelper; +import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.IcebergCatalogProperties; import org.apache.iceberg.hive.IcebergTableProperties; +import org.apache.iceberg.hive.IcebergViewSupport; import org.apache.iceberg.hive.MetastoreUtil; import org.apache.iceberg.hive.RuntimeMetaException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -68,10 +75,6 @@ public class HiveRESTCatalogClient extends BaseMetaStoreClient { private RESTCatalog restCatalog; - public HiveRESTCatalogClient(Configuration conf, boolean allowEmbedded) { - this(conf); - } - public HiveRESTCatalogClient(Configuration conf) { super(conf); reconnect(); @@ -123,10 +126,20 @@ public List getTables(String catName, String dbName, String tablePattern Pattern pattern = Pattern.compile(regex); // List tables from the specific database (namespace) and filter them. - return restCatalog.listTables(Namespace.of(dbName)).stream() + Set names = new LinkedHashSet<>(); + restCatalog.listTables(Namespace.of(dbName)).stream() .map(TableIdentifier::name) .filter(pattern.asPredicate()) - .toList(); + .forEach(names::add); + + if (restCatalog instanceof ViewCatalog viewCatalog) { + viewCatalog + .listViews(Namespace.of(dbName)).stream() + .map(TableIdentifier::name) + .filter(pattern.asPredicate()) + .forEach(names::add); + } + return Lists.newArrayList(names); } @Override @@ -136,7 +149,12 @@ public List getAllTables(String catName, String dbName) { @Override public void dropTable(Table table, boolean deleteData, boolean ignoreUnknownTab, boolean ifPurge) throws TException { - restCatalog.dropTable(TableIdentifier.of(table.getDbName(), table.getTableName())); + TableIdentifier id = TableIdentifier.of(table.getDbName(), table.getTableName()); + if (restCatalog instanceof ViewCatalog viewCatalog && viewCatalog.viewExists(id)) { + viewCatalog.dropView(id); + } else { + restCatalog.dropTable(id); + } } private void validateCurrentCatalog(String catName) { @@ -149,7 +167,11 @@ private void validateCurrentCatalog(String catName) { @Override public boolean tableExists(String catName, String dbName, String tableName) { validateCurrentCatalog(catName); - return restCatalog.tableExists(TableIdentifier.of(dbName, tableName)); + TableIdentifier id = TableIdentifier.of(dbName, tableName); + if (restCatalog.tableExists(id)) { + return true; + } + return restCatalog instanceof ViewCatalog viewCatalog && viewCatalog.viewExists(id); } @Override @@ -178,25 +200,58 @@ public Database getDatabase(String catName, String dbName) throws NoSuchObjectEx @Override public Table getTable(GetTableRequest tableRequest) throws TException { validateCurrentCatalog(tableRequest.getCatName()); - org.apache.iceberg.Table icebergTable; + TableIdentifier id = + TableIdentifier.of(tableRequest.getDbName(), tableRequest.getTblName()); try { - icebergTable = restCatalog.loadTable(TableIdentifier.of(tableRequest.getDbName(), - tableRequest.getTblName())); - } catch (NoSuchTableException exception) { + org.apache.iceberg.Table icebergTable = restCatalog.loadTable(id); + return MetastoreUtil.toHiveTable(icebergTable, conf); + } catch (NoSuchTableException tableMissing) { + if (restCatalog instanceof ViewCatalog viewCatalog) { + if (!viewCatalog.viewExists(id)) { + throw new NoSuchObjectException(); + } + return MetastoreUtil.buildMinimalHMSView( + tableRequest.getCatName(), tableRequest.getDbName(), tableRequest.getTblName()); + } throw new NoSuchObjectException(); } - return MetastoreUtil.toHiveTable(icebergTable, conf); + } + + private static boolean hasIcebergViewTableType(Table table) { + if (!TableType.VIRTUAL_VIEW.toString().equals(table.getTableType())) { + return false; + } + Map params = table.getParameters(); + if (params == null) { + return false; + } + return HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE.equalsIgnoreCase( + params.get(BaseMetastoreTableOperations.TABLE_TYPE_PROP)); + } + + @Override + public void alter_table(String catName, String dbName, String tblName, Table newTable, + EnvironmentContext envContext, String validWriteIdList) { + validateCurrentCatalog(catName); + if (hasIcebergViewTableType(newTable) && restCatalog instanceof ViewCatalog) { + createOrReplaceIcebergView(newTable, dbName, tblName); + } } @Override public void createTable(CreateTableRequest request) throws TException { Table table = request.getTable(); - List cols = Lists.newArrayList(table.getSd().getCols()); - if (table.isSetPartitionKeys() && !table.getPartitionKeys().isEmpty()) { - cols.addAll(table.getPartitionKeys()); + if (hasIcebergViewTableType(table) && restCatalog instanceof ViewCatalog) { + createOrReplaceIcebergView(table, table.getDbName(), table.getTableName()); + } else { + createIcebergTable(request); } + } + + private void createIcebergTable(CreateTableRequest request) { + Table table = request.getTable(); Properties tableProperties = IcebergTableProperties.getTableProperties(table, conf); - Schema schema = HiveSchemaUtil.convert(cols, Collections.emptyMap(), true); + Schema schema = HiveSchemaUtil.convert(hmsTableColumns(table), Collections.emptyMap(), true); Map envCtxProps = Optional.ofNullable(request.getEnvContext()) .map(EnvironmentContext::getProperties) .orElse(Collections.emptyMap()); @@ -204,7 +259,8 @@ public void createTable(CreateTableRequest request) throws TException { HMSTablePropertyHelper.getPartitionSpec(envCtxProps, schema); SortOrder sortOrder = HMSTablePropertyHelper.getSortOrder(tableProperties, schema); - restCatalog.buildTable(TableIdentifier.of(table.getDbName(), table.getTableName()), schema) + restCatalog + .buildTable(TableIdentifier.of(table.getDbName(), table.getTableName()), schema) .withPartitionSpec(partitionSpec) .withLocation(tableProperties.getProperty(IcebergTableProperties.LOCATION)) .withSortOrder(sortOrder) @@ -212,6 +268,23 @@ public void createTable(CreateTableRequest request) throws TException { .create(); } + private void createOrReplaceIcebergView(Table table, String dbName, String tableName) { + Map tblProps = + table.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(table.getParameters()); + String comment = tblProps.get("comment"); + List cols = Lists.newArrayList(table.getSd().getCols()); + IcebergViewSupport.createOrReplaceView( + conf, dbName, tableName, cols, table.getViewExpandedText(), tblProps, comment); + } + + private static List hmsTableColumns(Table table) { + List cols = Lists.newArrayList(table.getSd().getCols()); + if (table.isSetPartitionKeys() && !table.getPartitionKeys().isEmpty()) { + cols.addAll(table.getPartitionKeys()); + } + return cols; + } + @Override public void createDatabase(Database db) { validateCurrentCatalog(db.getCatalogName()); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestIcebergViewSupport.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestIcebergViewSupport.java new file mode 100644 index 000000000000..f8691f18e250 --- /dev/null +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestIcebergViewSupport.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.view.BaseView; +import org.apache.iceberg.view.View; +import org.apache.iceberg.view.ViewMetadata; +import org.apache.thrift.TException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import static org.apache.iceberg.CatalogUtil.ICEBERG_CATALOG_TYPE; +import static org.apache.iceberg.CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE; +import static org.assertj.core.api.Assertions.assertThat; + +class TestIcebergViewSupport { + + private static final String DB = "native_vw_db"; + private static final String VIEW = "native_vw"; + + @RegisterExtension + private static final HiveMetastoreExtension HIVE_METASTORE_EXTENSION = + HiveMetastoreExtension.builder().withDatabase(DB).build(); + + @AfterEach + void dropView() { + HiveCatalog cat = loadCatalog(); + TableIdentifier id = TableIdentifier.of(DB, VIEW); + cat.dropView(id); + } + + private HiveConf nativeViewConf() { + HiveConf conf = new HiveConf(HIVE_METASTORE_EXTENSION.hiveConf()); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT, "hive"); + conf.set( + IcebergCatalogProperties.catalogPropertyConfigKey("hive", ICEBERG_CATALOG_TYPE), + ICEBERG_CATALOG_TYPE_HIVE); + return conf; + } + + private HiveCatalog loadCatalog() { + return (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of( + CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, + String.valueOf(TimeUnit.SECONDS.toMillis(10))), + HIVE_METASTORE_EXTENSION.hiveConf()); + } + + @Test + void testCreateCommitsNativeViewWithUserProperties() { + HiveConf conf = nativeViewConf(); + List cols = + Arrays.asList(new FieldSchema("id", "int", null), new FieldSchema("name", "string", null)); + String sql = String.format("select id, name from %s.src_tbl", DB); + Map props = Collections.singletonMap("k1", "v1"); + + IcebergViewSupport.createOrReplaceView( + conf, DB, VIEW, cols, sql, props, "hello-view"); + + HiveCatalog cat = loadCatalog(); + TableIdentifier id = TableIdentifier.of(DB, VIEW); + assertThat(cat.viewExists(id)).isTrue(); + View view = cat.loadView(id); + assertThat(view.properties()) + .containsEntry("comment", "hello-view") + .containsEntry("k1", "v1") + .doesNotContainKey("hive.storage.external.logical.view.handler"); + HiveViewOperations ops = (HiveViewOperations) ((BaseView) view).operations(); + assertThat(ops.current().currentVersion().representations()).isNotEmpty(); + } + + @Test + void testCreateOrReplaceViewReplacesExisting() { + HiveConf conf = nativeViewConf(); + List cols = Collections.singletonList(new FieldSchema("id", "int", null)); + TableIdentifier id = TableIdentifier.of(DB, VIEW); + + IcebergViewSupport.createOrReplaceView( + conf, DB, VIEW, cols, "select 1 as id", null, null); + View afterCreate = loadCatalog().loadView(id); + assertThat(afterCreate.sqlFor("hive").sql().trim()).isEqualTo("select 1 as id"); + + IcebergViewSupport.createOrReplaceView( + conf, DB, VIEW, cols, "select 2 as id", null, null); + + assertThat(loadCatalog().viewExists(id)).isTrue(); + View afterReplace = loadCatalog().loadView(id); + assertThat(afterReplace.sqlFor("hive").sql().trim()).isEqualTo("select 2 as id"); + } + + @Test + void testEnrichHmsTableFromIcebergViewOverridesStaleHmsSql() throws TException { + HiveConf conf = nativeViewConf(); + List cols = Collections.singletonList(new FieldSchema("id", "int", null)); + String sql = "select 42 as id"; + + IcebergViewSupport.createOrReplaceView( + conf, DB, VIEW, cols, sql, null, null); + + org.apache.hadoop.hive.metastore.api.Table hmsTable = + HIVE_METASTORE_EXTENSION.metastoreClient().getTable(DB, VIEW); + hmsTable.setViewOriginalText("select 0"); + hmsTable.setViewExpandedText("select 0"); + + IcebergViewSupport.enrichHmsTableFromIcebergView(hmsTable, conf); + assertThat(hmsTable.getViewExpandedText()).isEqualTo(sql); + assertThat(hmsTable.getViewOriginalText()).isEqualTo(sql); + + ViewMetadata metadata = ((BaseView) loadCatalog().loadView(TableIdentifier.of(DB, VIEW))).operations().current(); + assertThat(hmsTable.getCreateTime()).isEqualTo((int) (metadata.version(1).timestampMillis() / 1000)); + } +} diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/client/TestHiveRESTCatalogClient.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/client/TestHiveRESTCatalogClient.java index 1ae7e742774c..4f21f46b2e28 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/client/TestHiveRESTCatalogClient.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/client/TestHiveRESTCatalogClient.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.CreateTableRequest; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.BaseTable; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.PartitionSpec; @@ -43,7 +45,10 @@ import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.catalog.ViewCatalog; +import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.hive.HiveSchemaUtil; +import org.apache.iceberg.hive.IcebergViewSupport; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.relocated.com.google.common.collect.Maps; @@ -59,8 +64,9 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; -public class TestHiveRESTCatalogClient { +class TestHiveRESTCatalogClient { private static HiveRESTCatalogClient spyHiveRESTCatalogClient; private static RESTCatalog mockRestCatalog; @@ -137,13 +143,13 @@ public void after() { } @Test - public void testGetTable() throws TException { + void testGetTable() throws TException { spyHiveRESTCatalogClient.getTable("default", "tableName"); Mockito.verify(mockRestCatalog).loadTable(TableIdentifier.of("default", "tableName")); } @Test - public void testCreateTable() throws TException { + void testCreateTable() throws TException { Table table = new Table(); table.setTableName("tableName"); table.setDbName("default"); @@ -155,7 +161,7 @@ public void testCreateTable() throws TException { } @Test - public void testCreatePartitionedTable() throws TException { + void testCreatePartitionedTable() throws TException { Table table = new Table(); table.setTableName("tableName"); table.setDbName("default"); @@ -192,9 +198,54 @@ public void testCreatePartitionedTable() throws TException { } @Test - public void testGetDatabase() throws TException { + void testGetDatabase() throws TException { Database aDefault = spyHiveRESTCatalogClient.getDatabase("default"); assertThat(aDefault.getName()).isEqualTo("default"); Mockito.verify(mockRestCatalog).listNamespaces(Namespace.empty()); } + + @Test + void testAlterIcebergView() { + RESTCatalog viewCapableCatalog = + Mockito.mock(RESTCatalog.class, Mockito.withSettings().extraInterfaces(ViewCatalog.class)); + Mockito.doReturn("hive").when(viewCapableCatalog).name(); + mockCatalogUtil.when(() -> CatalogUtil.buildIcebergCatalog(any(), any(), any())) + .thenReturn(viewCapableCatalog); + + Configuration configuration = new Configuration(); + configuration.set("iceberg.catalog", "ice01"); + configuration.set("iceberg.catalog.ice01.uri", "http://localhost"); + HiveRESTCatalogClient client = new HiveRESTCatalogClient(configuration); + + try (MockedStatic viewSupport = + Mockito.mockStatic(IcebergViewSupport.class)) { + client.alter_table("hive", "ice_db", "ice_v1", createIcebergView(), null, null); + viewSupport.verify( + () -> + IcebergViewSupport.createOrReplaceView( + any(), + eq("ice_db"), + eq("ice_v1"), + any(), + eq("select 1"), + any(), + eq(null))); + } + } + + private static Table createIcebergView() { + Table view = new Table(); + view.setTableName("ice_v1"); + view.setDbName("ice_db"); + view.setTableType(TableType.VIRTUAL_VIEW.toString()); + view.setViewExpandedText("select 1"); + view.setSd(new StorageDescriptor()); + view.getSd().setCols(Collections.singletonList(new FieldSchema("x", "int", ""))); + view.setParameters( + Maps.newHashMap( + Map.of( + BaseMetastoreTableOperations.TABLE_TYPE_PROP, + HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE))); + return view; + } } diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java index 69fbe5bf99c2..917829ac304d 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java @@ -34,6 +34,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.CreateTableRequest; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -60,9 +61,11 @@ import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.hive.HMSTablePropertyHelper; +import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.IcebergCatalogProperties; import org.apache.iceberg.hive.IcebergTableProperties; +import org.apache.iceberg.hive.IcebergViewSupport; import org.apache.iceberg.mr.Catalogs; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -89,7 +92,6 @@ public class BaseHiveIcebergMetaHook implements HiveMetaHook { ); private static final Set PARAMETERS_TO_REMOVE = ImmutableSet .of(InputFormatConfig.TABLE_SCHEMA, Catalogs.LOCATION, Catalogs.NAME, InputFormatConfig.PARTITION_SPEC); - static final String ORC_FILES_ONLY = "iceberg.orc.files.only"; private static final String ZORDER_FIELDS_JSON_KEY = "zorderFields"; protected final Configuration conf; @@ -115,6 +117,16 @@ public BaseHiveIcebergMetaHook(Configuration conf) { this.conf = conf; } + public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) { + if (hmsTable == null || + hmsTable.getParameters() == null || + !TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) { + return false; + } + String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE); + return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler); + } + @Override public void preCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { CreateTableRequest request = new CreateTableRequest(hmsTable); @@ -127,6 +139,10 @@ public void preCreateTable(CreateTableRequest request) { if (hmsTable.isTemporary()) { throw new UnsupportedOperationException("Creation of temporary iceberg tables is not supported."); } + if (isIcebergView(hmsTable)) { + preCreateIcebergView(request); + return; + } this.tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); // Set the table type even for non HiveCatalog based tables @@ -197,13 +213,23 @@ public void preCreateTable(CreateTableRequest request) { assertFileFormat(tableProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT)); - // Set whether the format is ORC, to be used during vectorization. - setOrcOnlyFilesParam(hmsTable); // Remove hive primary key columns from table request, as iceberg doesn't support hive primary key. request.setPrimaryKeys(null); setSortOrder(hmsTable, schema, tableProperties); } + private void preCreateIcebergView(CreateTableRequest request) { + + org.apache.hadoop.hive.metastore.api.Table hmsTable = request.getTable(); + tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); + + hmsTable + .getParameters() + .put( + BaseMetastoreTableOperations.TABLE_TYPE_PROP, + HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE); + } + /** * Method for verification that necessary catalog configs are defined in Session Conf. * @@ -456,14 +482,6 @@ protected static PartitionSpec spec(Configuration configuration, Schema schema, return HMSTablePropertyHelper.getPartitionSpec(hmsTable.getParameters(), schema); } - protected void setOrcOnlyFilesParam(org.apache.hadoop.hive.metastore.api.Table hmsTable) { - hmsTable.getParameters().put(ORC_FILES_ONLY, String.valueOf(isOrcOnlyFiles(hmsTable))); - } - - protected boolean isOrcOnlyFiles(org.apache.hadoop.hive.metastore.api.Table hmsTable) { - return !"FALSE".equalsIgnoreCase(hmsTable.getParameters().get(ORC_FILES_ONLY)) && isOrcFileFormat(hmsTable); - } - static boolean isOrcFileFormat(org.apache.hadoop.hive.metastore.api.Table hmsTable) { return hmsTable.getSd().getInputFormat() != null && hmsTable.getSd().getInputFormat().toUpperCase() .contains(org.apache.iceberg.FileFormat.ORC.name()) || org.apache.iceberg.FileFormat.ORC.name() @@ -504,6 +522,10 @@ protected void setWriteModeDefaults(Table icebergTbl, Map newPro public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { if (hmsTable != null) { try { + if (isIcebergView(hmsTable)) { + IcebergViewSupport.enrichHmsTableFromIcebergView(hmsTable, conf); + return; + } Table tbl = IcebergTableUtil.getTable(conf, hmsTable); String formatVersion = String.valueOf(TableUtil.formatVersion(tbl)); hmsTable.getParameters().put(TableProperties.FORMAT_VERSION, formatVersion); @@ -531,4 +553,5 @@ private static boolean isHiveIcebergStorageHandler(String storageHandler) { throw new RuntimeException("Error checking storage handler class", e); } } + } diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java index 5c9781132a35..dbf693304d7a 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java @@ -46,7 +46,6 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.iceberg.FileScanTask; -import org.apache.iceberg.TableProperties; import org.apache.iceberg.data.Record; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; @@ -69,7 +68,7 @@ public class HiveIcebergInputFormat extends MapredIcebergInputFormat LlapCacheOnlyInputFormatInterface.VectorizedOnly { private static final Logger LOG = LoggerFactory.getLogger(HiveIcebergInputFormat.class); - public static final String ICEBERG_DISABLE_VECTORIZATION_PREFIX = "iceberg.disable.vectorization."; + public static final String ICEBERG_DISABLE_DECIMAL64_PREFIX = "iceberg.disable.decimal64."; /** * Encapsulates planning-time and reader-time Iceberg filter expressions derived from Hive predicates. @@ -249,16 +248,17 @@ public VectorizedSupport.Support[] getSupportedFeatures() { @Override public VectorizedSupport.Support[] getSupportedFeatures(HiveConf hiveConf, TableDesc tableDesc) { - // disabling VectorizedSupport.Support.DECIMAL_64 for Parquet as it doesn't support it - boolean isORCOnly = - Boolean.parseBoolean(tableDesc.getProperties().getProperty(HiveIcebergMetaHook.DECIMAL64_VECTORIZATION)) && - Boolean.parseBoolean(tableDesc.getProperties().getProperty(HiveIcebergMetaHook.ORC_FILES_ONLY)) && - org.apache.iceberg.FileFormat.ORC.name() - .equalsIgnoreCase(tableDesc.getProperties().getProperty(TableProperties.DEFAULT_FILE_FORMAT)); - if (!isORCOnly) { - final String vectorizationConfName = getVectorizationConfName(tableDesc.getTableName()); - LOG.debug("Setting {} for table: {} to true", vectorizationConfName, tableDesc.getTableName()); - hiveConf.set(vectorizationConfName, "true"); + // Both vectorizable file formats (ORC and Parquet) now support DECIMAL_64 reads, so advertise it + // whenever decimal64 vectorization is enabled for the table, regardless of file format. + boolean decimal64Enabled = + Boolean.parseBoolean(tableDesc.getProperty(HiveIcebergMetaHook.DECIMAL64_VECTORIZATION)); + if (!decimal64Enabled) { + // Keep the LLAP ORC reader from emitting decimal64 so it stays consistent with the full-decimal + // operator pipeline; consumed in HiveVectorizedReader#orcRecordReader. + final String decimal64DisableConfName = getDecimal64DisableConfName(tableDesc.getTableName()); + LOG.debug("Setting {} for table: {} to true", decimal64DisableConfName, tableDesc.getTableName()); + hiveConf.set(decimal64DisableConfName, "true"); + return new VectorizedSupport.Support[] {}; } return new VectorizedSupport.Support[] { VectorizedSupport.Support.DECIMAL_64 }; @@ -269,9 +269,9 @@ public void injectCaches(FileMetadataCache metadataCache, DataCache dataCache, C // no-op for Iceberg } - public static String getVectorizationConfName(String tableName) { + public static String getDecimal64DisableConfName(String tableName) { String dbAndTableName = TableName.fromString(tableName, null, null).getNotEmptyDbTable(); - return ICEBERG_DISABLE_VECTORIZATION_PREFIX + dbAndTableName; + return ICEBERG_DISABLE_DECIMAL64_PREFIX + dbAndTableName; } @Override diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 58c2d19373dd..5a2bbbf70219 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -118,6 +118,7 @@ import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.HiveTableOperations; import org.apache.iceberg.hive.IcebergTableProperties; +import org.apache.iceberg.hive.IcebergViewSupport; import org.apache.iceberg.hive.MetastoreLock; import org.apache.iceberg.hive.NoLock; import org.apache.iceberg.io.CloseableIterable; @@ -176,13 +177,23 @@ public HiveIcebergMetaHook(Configuration conf) { super(conf); } - @Override - public void rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { - // do nothing - } - @Override public void commitCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { + if (isIcebergView(hmsTable)) { + tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); + Map tblProps = + hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters()); + String comment = tblProps.get("comment"); + IcebergViewSupport.createOrReplaceView( + conf, + hmsTable.getDbName(), + hmsTable.getTableName(), + hmsTable.getSd().getCols(), + hmsTable.getViewExpandedText(), + tblProps, + comment); + return; + } if (icebergTable == null) { setFileFormat(tableProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT)); @@ -208,11 +219,6 @@ public void commitCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTabl } } - @Override - public void preDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { - // do nothing - } - @Override public void preDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, boolean deleteData) { this.tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); @@ -235,11 +241,6 @@ public void preDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, bo } } - @Override - public void rollbackDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { - // do nothing - } - @Override public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, boolean deleteData) { if (deleteData && deleteIcebergTable) { @@ -265,6 +266,15 @@ public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, @Override public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context) throws MetaException { + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) { + currentAlterTableOp = null; + if (commitLock == null) { + commitLock = new NoLock(); + } + commitLock.lock(); + tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); + return; + } tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); setupAlterOperationType(hmsTable, context); if (AlterTableType.RENAME.equals(currentAlterTableOp)) { @@ -311,8 +321,6 @@ private void doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable // If so, we will create the iceberg table in commitAlterTable and go ahead with the migration assertTableCanBeMigrated(hmsTable); isTableMigration = true; - // Set whether the format is ORC, to be used during vectorization. - setOrcOnlyFilesParam(hmsTable); StorageDescriptor sd = hmsTable.getSd(); preAlterTableProperties = new PreAlterTableProperties(); @@ -375,13 +383,6 @@ private void doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable assertNotCrossTableMetadataLocationChange(hmsTable.getParameters(), context); } - // Migration case is already handled above, in case of migration we don't have all the properties set till this - // point. - if (!isTableMigration) { - // Set whether the format is ORC, to be used during vectorization. - setOrcOnlyFilesParam(hmsTable); - } - } /** @@ -493,6 +494,21 @@ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable if (commitLock == null) { throw new IllegalStateException("Hive commit lock should already be set"); } + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) { + tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); + Map tblProps = + hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters()); + String comment = tblProps.get("comment"); + IcebergViewSupport.createOrReplaceView( + conf, + hmsTable.getDbName(), + hmsTable.getTableName(), + hmsTable.getSd().getCols(), + hmsTable.getViewExpandedText(), + tblProps, + comment); + return; + } commitLock.unlock(); if (isTableMigration) { tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 3a4b1f1f8d7d..fbc704a36197 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -383,6 +383,11 @@ public boolean supportsPartitioning() { return true; } + @Override + public boolean supportsExternalViewCatalog() { + return true; + } + /** * @param jobConf Job configuration for InputFormat to access * @param deserializer Deserializer @@ -428,6 +433,9 @@ public boolean canProvidePartitionStatistics(org.apache.hadoop.hive.ql.metadata. if (!getStatsSource().equals(HiveMetaHook.ICEBERG)) { return false; } + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) { + return false; + } Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, hmsTable); if (snapshot != null) { @@ -891,6 +899,9 @@ public boolean supportsPartitionTransform() { @Override public List getPartitionTransformSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) { + return Collections.emptyList(); + } Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); return table.spec().fields().stream() .filter(f -> !f.transform().isVoid()) @@ -905,6 +916,9 @@ public List getPartitionTransformSpec(org.apache.hadoop.hive.ql.m @Override public Map> getPartitionTransformSpecs( org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) { + return Collections.emptyMap(); + } Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); return table.specs().entrySet().stream().flatMap(e -> e.getValue().fields().stream() @@ -1570,6 +1584,9 @@ public boolean supportsSortColumns() { @Override public List sortColumns(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) { + return Collections.emptyList(); + } TableDesc tableDesc = Utilities.getTableDesc(hmsTable); Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties()); if (table.sortOrder().isUnsorted()) { @@ -2130,6 +2147,10 @@ public List getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm } public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) { + List partCols = hmsTable.getPartCols(); + return partCols != null && !partCols.isEmpty(); + } if (!hmsTable.getTTable().isSetId()) { return false; } @@ -2275,6 +2296,10 @@ public boolean canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table @Override public List getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) { + List partCols = hmsTable.getPartCols(); + return partCols != null ? partCols : Collections.emptyList(); + } if (!hmsTable.getTTable().isSetId()) { return Collections.emptyList(); } @@ -2306,6 +2331,7 @@ public List getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta } Set partitions = Sets.newHashSet(); + String defaultPartitionName = HiveConf.getVar(conf, ConfVars.DEFAULT_PARTITION_NAME); try (CloseableIterable tasks = scan.planFiles()) { FluentIterable.from(tasks) @@ -2316,8 +2342,8 @@ public List getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta PartitionData partitionData = IcebergTableUtil.toPartitionData(task.partition(), spec.partitionType()); String partName = spec.partitionToPath(partitionData); - Map partSpecMap = Maps.newLinkedHashMap(); - Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null); + Map partSpecMap = + IcebergTableUtil.makeSpecFromName(partName, spec, partitionData, defaultPartitionName); DummyPartition partition = new DummyPartition(hmsTable, partName, partSpecMap); partitions.add(partition); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java index fe1a06d32acb..0240135c92b7 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java @@ -452,6 +452,24 @@ public static void performMetadataDelete(Table icebergTable, String branchName, deleteFiles.deleteFromRowFilter(exp).commit(); } + /** + * Parses an Iceberg partition path into a Hive-compatible spec map, representing null partition + * values with the Hive default partition name. + */ + public static Map makeSpecFromName(String partName, PartitionSpec spec, PartitionData data, + String defaultPartitionName) { + Map partSpecMap = Maps.newLinkedHashMap(); + Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null); + + List fields = spec.fields(); + for (int i = 0; i < fields.size(); i++) { + if (data.get(i) == null) { + partSpecMap.put(fields.get(i).name(), defaultPartitionName); + } + } + return partSpecMap; + } + public static PartitionData toPartitionData(StructLike key, Types.StructType keyType) { PartitionData keyTemplate = new PartitionData(keyType); return keyTemplate.copyFor(key); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java index e38184b2e6ae..834d762062da 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java @@ -213,10 +213,11 @@ private static RecordReader orcRecordReader(Jo // TODO: add support for reading files with positional deletes with LLAP (LLAP would need to provide file row num) if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon()) && LlapProxy.getIo() != null && task.deletes().isEmpty() && !InputFormatConfig.fetchVirtualColumns(job)) { - boolean isDisableVectorization = - job.getBoolean(HiveIcebergInputFormat.getVectorizationConfName(tableName), false); - if (isDisableVectorization) { - // Required to prevent LLAP from dealing with decimal64, HiveIcebergInputFormat.getSupportedFeatures() + boolean isDecimal64Disabled = + job.getBoolean(HiveIcebergInputFormat.getDecimal64DisableConfName(tableName), false); + if (isDecimal64Disabled) { + // The LLAP ORC reader derives decimal64 support from this job var, not the plan; clear it so + // it emits full decimal instead. HiveConf.setVar(job, HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED, ""); } recordReader = LlapProxy.getIo().llapVectorizedOrcReaderForPath(fileId, path, null, readColumnIds, diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java index c05f8bc62ab7..34b99d6b00b5 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java @@ -79,7 +79,7 @@ public static ByteBuffer getSerializedOrcTail(Path path, SyntheticFileId fileId, // Note: Since Hive doesn't know about partition information of Iceberg tables, partitionDesc is only used to // deduct the table (and DB) name here. CacheTag cacheTag = HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_TRACK_CACHE_USAGE) ? - LlapHiveUtils.getDbAndTableNameForMetrics(path, true, partitionDesc) : null; + LlapHiveUtils.getCacheTag(path, true, partitionDesc) : null; try { // Schema has to be serialized and deserialized as it is passed between different packages of TypeDescription: diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_pcr_null_partition.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_pcr_null_partition.q new file mode 100644 index 000000000000..c77e4500a720 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_pcr_null_partition.q @@ -0,0 +1,18 @@ +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.fetch.task.conversion=none; +set hive.explain.user=false; + +drop table if exists ice_01; +create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg; + +insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'; +insert into ice_01 partition (ds) select 'B', 'V2', 'null'; +insert into ice_01 partition (ds) select 'C', 'V3', null; + +explain select key, value, ds from ice_01 where ds is null; +select key, value, ds from ice_01 where ds is null; + +explain select key, value, ds from ice_01 where ds is not null; +select key, value, ds from ice_01 where ds is not null order by key; + +select key, value, ds from ice_01 where ds = 'null'; \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_gravitino.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_gravitino.q index 81982ca44d98..91aa14fdc170 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_gravitino.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_gravitino.q @@ -15,8 +15,6 @@ --! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ --! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ --! qt:replace:/(MAJOR\s+refused\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ --- Mask compaction id as they will be allocated in parallel threads ---! qt:replace:/^[0-9]/#Masked#/ -- Mask removed file size --! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ -- Mask iceberg version @@ -47,7 +45,7 @@ partitioned by (company_id bigint) stored by iceberg stored as orc; ----------------------------------------------------------------------------- ---! Creating table with a valid catalog name in table properties +--! Creating a table with a valid catalog name in table properties ----------------------------------------------------------------------------- create table ice_orc2 ( @@ -74,6 +72,51 @@ VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30); describe formatted ice_orc2; select * from ice_orc2; +--------------------------------------------------------------------------------------------------------------------- +--! Iceberg views tests +--------------------------------------------------------------------------------------------------------------------- + +----------------------------------------------------------------------------------------------------- +--! Iceberg view with TBLPROPERTIES ('view-format'='iceberg') on a REST catalog table +----------------------------------------------------------------------------------------------------- + +create view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name, last_name from ice_orc2 where dept_id in (1, 3); + +select * from ice_v1; +desc formatted ice_v1; + +------- if-not-exists view test - view should not change ------------------------- + +create view if not exists ice_v1 tblproperties ('view-format'='iceberg') +as select * from ice_orc2 where dept_id = 10000; + +select * from ice_v1; +desc formatted ice_v1; + +------- replace view test - view should be replaced ------------------------------ + +create or replace view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name || '-' || dept_id from ice_orc2 where dept_id = 2; + +select * from ice_v1; +desc formatted ice_v1; + +drop view ice_v1; + +----------------------------------------------------------------------------------------------- +--! Iceberg view with default Iceberg storage handler and REST catalog table +----------------------------------------------------------------------------------------------- + +set hive.default.storage.handler.class=org.apache.iceberg.mr.hive.HiveIcebergStorageHandler; + +create view ice_v2 +as select first_name, last_name || '-' || dept_id from ice_orc2 where team_id in (20, 30); + +select * from ice_v2; +desc formatted ice_v2; +drop view ice_v2; + ----------------------------------------------------------------------------- show tables; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_hms.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_hms.q index 27f23122240b..a1d8cb6b9056 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_hms.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_hms.q @@ -15,8 +15,6 @@ --! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ --! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ --! qt:replace:/(MAJOR\s+refused\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ --- Mask compaction id as they will be allocated in parallel threads ---! qt:replace:/^[0-9]/#Masked#/ -- Mask removed file size --! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ -- Mask iceberg version @@ -47,7 +45,7 @@ partitioned by (company_id bigint) stored by iceberg stored as orc; ----------------------------------------------------------------------------- ---! Creating table with a valid catalog name in table properties +--! Creating a table with a valid catalog name in table properties ----------------------------------------------------------------------------- create table ice_orc2 ( @@ -69,6 +67,31 @@ VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30); describe formatted ice_orc2; select * from ice_orc2; +----------------------------------------------------------------------------------------------- +--! Iceberg view with TBLPROPERTIES ('view-format'='iceberg') on a REST catalog table +----------------------------------------------------------------------------------------------- + +create view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name, last_name from ice_orc2 where dept_id in (1,2); + +select * from ice_v1; +desc formatted ice_v1; +drop view ice_v1; + +----------------------------------------------------------------------------------------------- +--! Iceberg view: 'view-format' table properly omitted, Hive config 'hive.default.storage.handler.class' +--! set to 'HiveIcebergStorageHandler' +----------------------------------------------------------------------------------------------- + +set hive.default.storage.handler.class=org.apache.iceberg.mr.hive.HiveIcebergStorageHandler; + +create view ice_v2 +as select dept_id, team_id from ice_orc2 where company_id = 100; + +select * from ice_v2; +desc formatted ice_v2; +drop view ice_v2; + ----------------------------------------------------------------------------- show tables; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_view.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_view.q new file mode 100644 index 000000000000..037b67371b13 --- /dev/null +++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_view.q @@ -0,0 +1,87 @@ +-- SORT_QUERY_RESULTS +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ + +create database ice_native_view_db; +use ice_native_view_db; + +create table src_ice ( + first_name string, + last_name string + ) +partitioned by (dept_id bigint) +stored by iceberg stored as orc; + +INSERT INTO src_ice VALUES + ('fn1','ln1', 1), + ('fn2','ln2', 1), + ('fn3','ln3', 1), + ('fn4','ln4', 1), + ('fn5','ln5', 2), + ('fn6','ln6', 2), + ('fn7','ln7', 2); + +------------------------------------------------------------------------------- +-- Native Iceberg view via TBLPROPERTIES +------------------------------------------------------------------------------- + +-- TEST VIEW CREATION -- + +create view v_ice tblproperties ('view-format'='iceberg') +as select * from src_ice; + +select * from v_ice; + +-- TEST VIEW REPLACEMENT -- + +create or replace view v_ice tblproperties ('view-format'='iceberg') +as select first_name || '-' || dept_id from src_ice where dept_id = 1; + +select * from v_ice; +desc formatted v_ice; + +------------------------------------------------------------------------------- +-- Native Iceberg view when default storage handler is Iceberg +-- and no 'view-format' property in TBLPROPERTIES +------------------------------------------------------------------------------- + +set hive.default.storage.handler.class=org.apache.iceberg.mr.hive.HiveIcebergStorageHandler; + +-- TEST VIEW CREATION WITH IF EXISTS -- + +create view if not exists v_def +as select first_name, last_name, dept_id from src_ice where dept_id = 2; + +select * from v_def; + +-- TEST VIEW IS NOT CREATED BECAUSE IT ALREADY EXISTS -- + +create view if not exists v_def +as select first_name, last_name, dept_id from src_ice; + +select * from v_def; + +desc formatted v_def; +drop view v_def; + +----------------------------------------------------------------------------------------- +-- Classic Hive view when the base table is Iceberg and default storage handler is unset +----------------------------------------------------------------------------------------- + +set hive.default.storage.handler.class=; + +create view v_hive as select * from src_ice; +select * from v_hive; +desc formatted v_hive; +drop view v_hive; + +----------------------------------------------------------------------------------------- +-- Replace Iceberg logical view with a Hive-native logical view +----------------------------------------------------------------------------------------- + +create or replace view v_ice +as select first_name from src_ice where dept_id = 2; + +select * from v_ice; +desc formatted v_ice; +drop view v_ice; \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_multitable.q b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_multitable.q index 349b02f706c4..73c6c05fe8f1 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_multitable.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_multitable.q @@ -11,8 +11,6 @@ insert into customer_ice values (10); create external table orders(o_orderkey int, o_custkey int) stored as orc; insert into orders values (10, 10); -alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false'); - select sum(1 - l_discount) as revenue FROM customer_ice, orders, lineitem WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20; @@ -21,16 +19,6 @@ create external table lineitem_ice(l_discount decimal(15,2), l_orderkey int) STO TBLPROPERTIES ('iceberg.decimal64.vectorization'='true'); insert into lineitem_ice values (100.2, 10); -select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem_ice -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20; - -alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true'); - -select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20; - select sum(1 - l_discount) as revenue FROM customer_ice, orders, lineitem_ice WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20; \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out index 910e48e4214e..346f8b8cd108 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_multi_part_table_to_iceberg.q.out @@ -206,7 +206,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000},{\"name\":\"c\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 7 @@ -482,7 +481,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000},{\"name\":\"c\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 7 @@ -758,7 +756,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000},{\"name\":\"c\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 7 @@ -1098,7 +1095,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000},{\"name\":\"c\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001},{\"name\":\"d\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 10 @@ -1545,7 +1541,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000},{\"name\":\"c\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001},{\"name\":\"d\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 10 @@ -1992,7 +1987,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000},{\"name\":\"c\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001},{\"name\":\"d\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 10 diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out index 55bfee6eb031..813ec51708e8 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_part_table_to_iceberg.q.out @@ -163,7 +163,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 4 @@ -441,7 +440,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 7 @@ -796,7 +794,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 7 @@ -1151,7 +1148,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 7 @@ -1452,7 +1448,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out index 88ad3396dd70..6198ce9405d4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/alter_table_to_iceberg.q.out @@ -113,7 +113,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"5\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"5\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 1 @@ -284,7 +283,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"5\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"5\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 1 @@ -455,7 +453,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"5\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"5\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 1 diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out index 287c95f18fa8..e64b74f0cffb 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table.q.out @@ -30,7 +30,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out index a26573047807..69e18f01cb86 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out @@ -36,7 +36,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -109,7 +108,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -182,7 +180,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -255,7 +252,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -323,7 +319,6 @@ Table Parameters: current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} dummy dummy_value format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out index 287c95f18fa8..e64b74f0cffb 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg.q.out @@ -30,7 +30,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out index 3e6850812ff3..6ce589351ccc 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_by_iceberg_with_serdeproperties.q.out @@ -30,7 +30,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out index 0d1700ff07a9..0f60cd08c28b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_partitioned_orc.q.out @@ -303,7 +303,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"a_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000},{\"name\":\"b_trunc\",\"transform\":\"truncate[3]\",\"source-id\":2,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out index 93b114614928..f4ab854b0ed5 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out @@ -43,7 +43,6 @@ TBLPROPERTIES ( 'created_with_ctlt'='true', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"a","required":false,"type":"int"}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'snapshot-count'='0', @@ -129,7 +128,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"company","required":false,"type":"string"}]}', 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company","transform":"identity","source-id":2,"field-id":1000}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -174,7 +172,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"company","required":false,"type":"string"}]}', 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company","transform":"identity","source-id":2,"field-id":1000}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'snapshot-count'='0', @@ -247,7 +244,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"company","required":false,"type":"string"}]}', 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company","transform":"identity","source-id":2,"field-id":1000}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'snapshot-count'='0', diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out index 58dde9ecad2a..cacb4e27adc7 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_all_iceberg.q.out @@ -117,7 +117,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# format-version 2 iceberg.delete.skiprowdata false - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -269,7 +268,6 @@ Table Parameters: current-snapshot-summary {\"deleted-data-files\":\"5\",\"deleted-records\":\"20\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"0\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"0\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -421,7 +419,6 @@ Table Parameters: current-snapshot-summary {\"deleted-data-files\":\"5\",\"deleted-records\":\"20\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"0\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"0\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out index 707be189e497..9a5350c2e0f2 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/delete_iceberg_copy_on_write_unpartitioned.q.out @@ -48,10 +48,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_ice - filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean) + filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean) + predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string) diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out index fb1cdbcaf12a..185fd2a27322 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out @@ -78,7 +78,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"i\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"s\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":4,\"name\":\"d\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -150,7 +149,6 @@ Table Parameters: current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"year_field\",\"required\":false,\"type\":\"date\"},{\"id\":2,\"name\":\"month_field\",\"required\":false,\"type\":\"date\"},{\"id\":3,\"name\":\"day_field\",\"required\":false,\"type\":\"date\"},{\"id\":4,\"name\":\"hour_field\",\"required\":false,\"type\":\"timestamp\"},{\"id\":5,\"name\":\"truncate_field\",\"required\":false,\"type\":\"string\"},{\"id\":6,\"name\":\"bucket_field\",\"required\":false,\"type\":\"int\"},{\"id\":7,\"name\":\"identity_field\",\"required\":false,\"type\":\"int\"}]} default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"year_field_year\",\"transform\":\"year\",\"source-id\":1,\"field-id\":1000},{\"name\":\"month_field_month\",\"transform\":\"month\",\"source-id\":2,\"field-id\":1001},{\"name\":\"day_field_day\",\"transform\":\"day\",\"source-id\":3,\"field-id\":1002},{\"name\":\"hour_field_hour\",\"transform\":\"hour\",\"source-id\":4,\"field-id\":1003},{\"name\":\"truncate_field_trunc\",\"transform\":\"truncate[2]\",\"source-id\":5,\"field-id\":1004},{\"name\":\"bucket_field_bucket\",\"transform\":\"bucket[2]\",\"source-id\":6,\"field-id\":1005},{\"name\":\"identity_field\",\"transform\":\"identity\",\"source-id\":7,\"field-id\":1006}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -223,7 +221,6 @@ Table Parameters: current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"year_field\",\"required\":false,\"type\":\"date\"},{\"id\":3,\"name\":\"month_field\",\"required\":false,\"type\":\"date\"},{\"id\":4,\"name\":\"day_field\",\"required\":false,\"type\":\"date\"},{\"id\":5,\"name\":\"hour_field\",\"required\":false,\"type\":\"timestamp\"},{\"id\":6,\"name\":\"truncate_field\",\"required\":false,\"type\":\"string\"},{\"id\":7,\"name\":\"bucket_field\",\"required\":false,\"type\":\"int\"},{\"id\":8,\"name\":\"identity_field\",\"required\":false,\"type\":\"int\"}]} default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"year_field_year\",\"transform\":\"year\",\"source-id\":2,\"field-id\":1000},{\"name\":\"month_field_month\",\"transform\":\"month\",\"source-id\":3,\"field-id\":1001},{\"name\":\"day_field_day\",\"transform\":\"day\",\"source-id\":4,\"field-id\":1002},{\"name\":\"hour_field_hour\",\"transform\":\"hour\",\"source-id\":5,\"field-id\":1003},{\"name\":\"truncate_field_trunc\",\"transform\":\"truncate[2]\",\"source-id\":6,\"field-id\":1004},{\"name\":\"bucket_field_bucket\",\"transform\":\"bucket[2]\",\"source-id\":7,\"field-id\":1005},{\"name\":\"identity_field\",\"transform\":\"identity\",\"source-id\":8,\"field-id\":1006}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -278,7 +275,6 @@ Table Parameters: current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"b\",\"required\":false,\"type\":\"string\"}]} default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out index c68cb256cd60..1f417296d01d 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition.q.out @@ -505,7 +505,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 4 @@ -1705,7 +1704,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"country\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000},{\"name\":\"state\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 4 @@ -2762,7 +2760,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"country\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000},{\"name\":\"state\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 4 @@ -3456,7 +3453,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 3 @@ -4150,7 +4146,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 3 @@ -4844,7 +4839,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 3 diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out index fcd1c17b24ed..758c774405d9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_transforms.q.out @@ -636,7 +636,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_year\",\"transform\":\"year\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 @@ -1332,7 +1331,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_month\",\"transform\":\"month\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 @@ -2028,7 +2026,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_day\",\"transform\":\"day\",\"source-id\":1,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 5 @@ -2483,7 +2480,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_trunc\",\"transform\":\"truncate[2]\",\"source-id\":1,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 @@ -2922,7 +2918,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 @@ -3149,7 +3144,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out index de49a0d3b366..956d89494fc4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_into_partition_with_evolution.q.out @@ -188,7 +188,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"b_trunc_2\",\"transform\":\"truncate[2]\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out index 063b6389863e..c19c6331c074 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition.q.out @@ -287,7 +287,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 2 @@ -1261,7 +1260,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"country\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000},{\"name\":\"state\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 4 @@ -1741,7 +1739,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 2 @@ -2209,7 +2206,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 2 @@ -2677,7 +2673,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 2 @@ -3145,7 +3140,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out index 12745b17f098..09bff4eaf55c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_insert_overwrite_partition_transforms.q.out @@ -632,7 +632,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_year\",\"transform\":\"year\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 @@ -1302,7 +1301,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_month\",\"transform\":\"month\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 @@ -1976,7 +1974,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_day\",\"transform\":\"day\",\"source-id\":1,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 5 @@ -2431,7 +2428,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"pcol_trunc\",\"transform\":\"truncate[2]\",\"source-id\":1,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles #Masked# numPartitions 1 diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_pcr_null_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_pcr_null_partition.q.out new file mode 100644 index 000000000000..fae1fb8c26df --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_pcr_null_partition.q.out @@ -0,0 +1,150 @@ +PREHOOK: query: drop table if exists ice_01 +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists ice_01 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ice_01 +POSTHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ice_01 +PREHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_01 +POSTHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_01 +PREHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_01 +POSTHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_01 +PREHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@ice_01 +POSTHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@ice_01 +PREHOOK: query: explain select key, value, ds from ice_01 where ds is null +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select key, value, ds from ice_01 where ds is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ice_01 + filterExpr: ds is null (type: boolean) + Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from ice_01 where ds is null +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select key, value, ds from ice_01 where ds is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +C V3 NULL +PREHOOK: query: explain select key, value, ds from ice_01 where ds is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select key, value, ds from ice_01 where ds is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ice_01 + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from ice_01 where ds is not null order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select key, value, ds from ice_01 where ds is not null order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +A V1 2000-04-08 +B V2 null +PREHOOK: query: select key, value, ds from ice_01 where ds = 'null' +PREHOOK: type: QUERY +PREHOOK: Input: default@ice_01 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select key, value, ds from ice_01 where ds = 'null' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ice_01 +POSTHOOK: Output: hdfs://### HDFS PATH ### +B V2 null diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out index a00317017d43..60e4125a31c0 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_v2_deletes.q.out @@ -28,7 +28,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"}]}', 'format-version'='2', 'iceberg.delete.skiprowdata'='false', - 'iceberg.orc.files.only'='true', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -140,7 +139,6 @@ TBLPROPERTIES ( 'current-snapshot-timestamp-ms'='#Masked#', 'format-version'='2', 'iceberg.delete.skiprowdata'='true', - 'iceberg.orc.files.only'='true', #### A masked pattern was here #### 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', @@ -288,7 +286,6 @@ TBLPROPERTIES ( 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"part","transform":"identity","source-id":2,"field-id":1000}]}', 'format-version'='2', 'iceberg.delete.skiprowdata'='true', - 'iceberg.orc.files.only'='true', #### A masked pattern was here #### 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_v3_deletion_vectors.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_v3_deletion_vectors.q.out index cd64a41b9422..81ddec580730 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_v3_deletion_vectors.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_v3_deletion_vectors.q.out @@ -27,7 +27,6 @@ TBLPROPERTIES ( 'bucketing_version'='2', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"}]}', 'format-version'='3', - 'iceberg.orc.files.only'='true', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_view.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_view.q.out new file mode 100644 index 000000000000..ad072ca1c23b --- /dev/null +++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_view.q.out @@ -0,0 +1,380 @@ +PREHOOK: query: create database ice_native_view_db +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:ice_native_view_db +POSTHOOK: query: create database ice_native_view_db +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:ice_native_view_db +PREHOOK: query: use ice_native_view_db +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:ice_native_view_db +POSTHOOK: query: use ice_native_view_db +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:ice_native_view_db +PREHOOK: query: create table src_ice ( + first_name string, + last_name string + ) +partitioned by (dept_id bigint) +stored by iceberg stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@src_ice +POSTHOOK: query: create table src_ice ( + first_name string, + last_name string + ) +partitioned by (dept_id bigint) +stored by iceberg stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@src_ice +PREHOOK: query: INSERT INTO src_ice VALUES + ('fn1','ln1', 1), + ('fn2','ln2', 1), + ('fn3','ln3', 1), + ('fn4','ln4', 1), + ('fn5','ln5', 2), + ('fn6','ln6', 2), + ('fn7','ln7', 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: ice_native_view_db@src_ice +POSTHOOK: query: INSERT INTO src_ice VALUES + ('fn1','ln1', 1), + ('fn2','ln2', 1), + ('fn3','ln3', 1), + ('fn4','ln4', 1), + ('fn5','ln5', 2), + ('fn6','ln6', 2), + ('fn7','ln7', 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: ice_native_view_db@src_ice +PREHOOK: query: create view v_ice tblproperties ('view-format'='iceberg') +as select * from src_ice +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@v_ice +POSTHOOK: query: create view v_ice tblproperties ('view-format'='iceberg') +as select * from src_ice +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@v_ice +POSTHOOK: Lineage: v_ice.dept_id SIMPLE [(src_ice)src_ice.FieldSchema(name:dept_id, type:bigint, comment:null), ] +POSTHOOK: Lineage: v_ice.first_name SIMPLE [(src_ice)src_ice.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: v_ice.last_name SIMPLE [(src_ice)src_ice.FieldSchema(name:last_name, type:string, comment:null), ] +PREHOOK: query: select * from v_ice +PREHOOK: type: QUERY +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Input: ice_native_view_db@v_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from v_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Input: ice_native_view_db@v_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +fn1 ln1 1 +fn2 ln2 1 +fn3 ln3 1 +fn4 ln4 1 +fn5 ln5 2 +fn6 ln6 2 +fn7 ln7 2 +PREHOOK: query: create or replace view v_ice tblproperties ('view-format'='iceberg') +as select first_name || '-' || dept_id from src_ice where dept_id = 1 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@v_ice +POSTHOOK: query: create or replace view v_ice tblproperties ('view-format'='iceberg') +as select first_name || '-' || dept_id from src_ice where dept_id = 1 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@v_ice +PREHOOK: query: select * from v_ice +PREHOOK: type: QUERY +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Input: ice_native_view_db@v_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from v_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Input: ice_native_view_db@v_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +fn1-1 +fn2-1 +fn3-1 +fn4-1 +PREHOOK: query: desc formatted v_ice +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_native_view_db@v_ice +POSTHOOK: query: desc formatted v_ice +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_native_view_db@v_ice +# col_name data_type comment +_c0 string + +# Detailed Table Information +Database: ice_native_view_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":1,\"fields\":[{\"id\":1,\"name\":\"_c0\",\"required\":false,\"type\":\"string\"}]} + metadata_location hdfs://### HDFS PATH ### + previous_metadata_location hdfs://### HDFS PATH ### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW +#### A masked pattern was here #### + uuid #Masked# + view-format iceberg + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `src_ice`.`first_name` || '-' || `src_ice`.`dept_id` from `ice_native_view_db`.`src_ice` where `src_ice`.`dept_id` = 1 +Expanded Query: select `src_ice`.`first_name` || '-' || `src_ice`.`dept_id` from `ice_native_view_db`.`src_ice` where `src_ice`.`dept_id` = 1 +PREHOOK: query: create view if not exists v_def +as select first_name, last_name, dept_id from src_ice where dept_id = 2 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@v_def +POSTHOOK: query: create view if not exists v_def +as select first_name, last_name, dept_id from src_ice where dept_id = 2 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@v_def +POSTHOOK: Lineage: v_def.dept_id SIMPLE [] +POSTHOOK: Lineage: v_def.first_name SIMPLE [(src_ice)src_ice.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: v_def.last_name SIMPLE [(src_ice)src_ice.FieldSchema(name:last_name, type:string, comment:null), ] +PREHOOK: query: select * from v_def +PREHOOK: type: QUERY +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Input: ice_native_view_db@v_def +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from v_def +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Input: ice_native_view_db@v_def +POSTHOOK: Output: hdfs://### HDFS PATH ### +fn5 ln5 2 +fn6 ln6 2 +fn7 ln7 2 +PREHOOK: query: create view if not exists v_def +as select first_name, last_name, dept_id from src_ice +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@v_def +POSTHOOK: query: create view if not exists v_def +as select first_name, last_name, dept_id from src_ice +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@v_def +PREHOOK: query: select * from v_def +PREHOOK: type: QUERY +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Input: ice_native_view_db@v_def +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from v_def +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Input: ice_native_view_db@v_def +POSTHOOK: Output: hdfs://### HDFS PATH ### +fn5 ln5 2 +fn6 ln6 2 +fn7 ln7 2 +PREHOOK: query: desc formatted v_def +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_native_view_db@v_def +POSTHOOK: query: desc formatted v_def +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_native_view_db@v_def +# col_name data_type comment +first_name string +last_name string +dept_id bigint + +# Detailed Table Information +Database: ice_native_view_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"}]} + metadata_location hdfs://### HDFS PATH ### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW +#### A masked pattern was here #### + uuid #Masked# + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `src_ice`.`first_name`, `src_ice`.`last_name`, `src_ice`.`dept_id` from `ice_native_view_db`.`src_ice` where `src_ice`.`dept_id` = 2 +Expanded Query: select `src_ice`.`first_name`, `src_ice`.`last_name`, `src_ice`.`dept_id` from `ice_native_view_db`.`src_ice` where `src_ice`.`dept_id` = 2 +PREHOOK: query: drop view v_def +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_native_view_db@v_def +PREHOOK: Output: ice_native_view_db@v_def +POSTHOOK: query: drop view v_def +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_native_view_db@v_def +POSTHOOK: Output: ice_native_view_db@v_def +PREHOOK: query: create view v_hive as select * from src_ice +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@v_hive +POSTHOOK: query: create view v_hive as select * from src_ice +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@v_hive +POSTHOOK: Lineage: v_hive.dept_id SIMPLE [(src_ice)src_ice.FieldSchema(name:dept_id, type:bigint, comment:null), ] +POSTHOOK: Lineage: v_hive.first_name SIMPLE [(src_ice)src_ice.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: v_hive.last_name SIMPLE [(src_ice)src_ice.FieldSchema(name:last_name, type:string, comment:null), ] +PREHOOK: query: select * from v_hive +PREHOOK: type: QUERY +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Input: ice_native_view_db@v_hive +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from v_hive +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Input: ice_native_view_db@v_hive +POSTHOOK: Output: hdfs://### HDFS PATH ### +fn1 ln1 1 +fn2 ln2 1 +fn3 ln3 1 +fn4 ln4 1 +fn5 ln5 2 +fn6 ln6 2 +fn7 ln7 2 +PREHOOK: query: desc formatted v_hive +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_native_view_db@v_hive +POSTHOOK: query: desc formatted v_hive +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_native_view_db@v_hive +# col_name data_type comment +first_name string +last_name string +dept_id bigint + +# Detailed Table Information +Database: ice_native_view_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: null +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] + +# View Information +Original Query: select * from src_ice +Expanded Query: select `src_ice`.`first_name`, `src_ice`.`last_name`, `src_ice`.`dept_id` from `ice_native_view_db`.`src_ice` +PREHOOK: query: drop view v_hive +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_native_view_db@v_hive +PREHOOK: Output: ice_native_view_db@v_hive +POSTHOOK: query: drop view v_hive +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_native_view_db@v_hive +POSTHOOK: Output: ice_native_view_db@v_hive +PREHOOK: query: create or replace view v_ice +as select first_name from src_ice where dept_id = 2 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Output: database:ice_native_view_db +PREHOOK: Output: ice_native_view_db@v_ice +POSTHOOK: query: create or replace view v_ice +as select first_name from src_ice where dept_id = 2 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Output: database:ice_native_view_db +POSTHOOK: Output: ice_native_view_db@v_ice +PREHOOK: query: select * from v_ice +PREHOOK: type: QUERY +PREHOOK: Input: ice_native_view_db@src_ice +PREHOOK: Input: ice_native_view_db@v_ice +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from v_ice +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_native_view_db@src_ice +POSTHOOK: Input: ice_native_view_db@v_ice +POSTHOOK: Output: hdfs://### HDFS PATH ### +fn5 +fn6 +fn7 +PREHOOK: query: desc formatted v_ice +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_native_view_db@v_ice +POSTHOOK: query: desc formatted v_ice +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_native_view_db@v_ice +# col_name data_type comment +first_name string + +# Detailed Table Information +Database: ice_native_view_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":1,\"fields\":[{\"id\":1,\"name\":\"_c0\",\"required\":false,\"type\":\"string\"}]} + metadata_location hdfs://### HDFS PATH ### + previous_metadata_location hdfs://### HDFS PATH ### + table_type ICEBERG-VIEW +#### A masked pattern was here #### + uuid #Masked# + view-format iceberg + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Num Buckets: 0 +Bucket Columns: [] +Sort Columns: [] + +# View Information +Original Query: select first_name from src_ice where dept_id = 2 +Expanded Query: select `src_ice`.`first_name` from `ice_native_view_db`.`src_ice` where `src_ice`.`dept_id` = 2 +PREHOOK: query: drop view v_ice +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_native_view_db@v_ice +PREHOOK: Output: ice_native_view_db@v_ice +POSTHOOK: query: drop view v_ice +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_native_view_db@v_ice +POSTHOOK: Output: ice_native_view_db@v_ice diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out index 5036ca420f88..146cda56a500 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/hadoop_catalog_create_table.q.out @@ -114,7 +114,6 @@ Table Parameters: bucketing_version 2 format-version 2 iceberg.catalog ice01 - iceberg.orc.files.only true numFiles 10 numPartitions 10 numRows 21 @@ -197,7 +196,6 @@ Table Parameters: bucketing_version 2 format-version 2 iceberg.catalog ice01 - iceberg.orc.files.only true numFiles 10 numPartitions 10 numRows 21 @@ -377,7 +375,6 @@ Table Parameters: bucketing_version 2 format-version 2 iceberg.catalog location_based_table - iceberg.orc.files.only true numFiles 10 numPartitions 10 numRows 21 @@ -452,7 +449,6 @@ Table Parameters: bucketing_version 2 format-version 2 iceberg.catalog location_based_table - iceberg.orc.files.only true numFiles 10 numRows 21 rawDataSize 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_ordered_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_ordered_table.q.out index 74a2945b82d6..0d40faea7aa3 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_ordered_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_ordered_table.q.out @@ -30,7 +30,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 @@ -88,7 +87,6 @@ Table Parameters: current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]} default-sort-order {\"order-id\":1,\"fields\":[{\"transform\":\"identity\",\"source-id\":1,\"direction\":\"desc\",\"null-order\":\"nulls-first\"},{\"transform\":\"identity\",\"source-id\":2,\"direction\":\"asc\",\"null-order\":\"nulls-last\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out index 53293d3798c4..cd7174eaed3f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out @@ -30,7 +30,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 @@ -192,7 +191,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out index e32e34094e80..6701fbaf4109 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_7.q.out @@ -150,27 +150,27 @@ Stage-0 File Output Operator [FS_61] Limit [LIM_60] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_59] (rows=473 width=447) + Select Operator [SEL_59] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_58] - Top N Key Operator [TNK_57] (rows=473 width=447) + Top N Key Operator [TNK_57] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_56] (rows=473 width=447) + Map Join Operator [MAPJOIN_56] (rows=791 width=447) BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_53] PartitionCols:_col0, _col1 - Select Operator [SEL_52] (rows=387 width=178) + Select Operator [SEL_52] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_51] (rows=387 width=178) - predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null) + Filter Operator [FIL_51] (rows=500 width=178) + predicate:((key <> '0') and (key <> '100') and value is not null) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_55] (rows=387 width=269) + <-Select Operator [SEL_55] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=387 width=269) - predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null) + Filter Operator [FIL_54] (rows=500 width=269) + predicate:((key1 <> '0') and (key1 <> '100') and key2 is not null) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"] @@ -346,27 +346,27 @@ Stage-0 File Output Operator [FS_41] Limit [LIM_40] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=473 width=447) + Select Operator [SEL_39] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=473 width=447) + Top N Key Operator [TNK_37] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=473 width=447) + Map Join Operator [MAPJOIN_36] (rows=791 width=447) BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_33] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=178) + Select Operator [SEL_32] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_31] (rows=387 width=178) - predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100'))) + Filter Operator [FIL_31] (rows=500 width=178) + predicate:((key <> '0') and (key <> '100')) TableScan [TS_3] (rows=500 width=178) default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_35] (rows=387 width=269) + <-Select Operator [SEL_35] (rows=500 width=269) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=387 width=269) - predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100'))) + Filter Operator [FIL_34] (rows=500 width=269) + predicate:((key1 <> '0') and (key1 <> '100')) TableScan [TS_0] (rows=500 width=269) default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] @@ -435,40 +435,40 @@ POSTHOOK: Input: default@srcbucket_big Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (BROADCAST_EDGE) -Reducer 3 <- Map 2 (SIMPLE_EDGE) +Map 1 <- Map 3 (CUSTOM_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:20 Stage-1 - Reducer 3 vectorized, llap + Reducer 2 vectorized, llap File Output Operator [FS_41] Limit [LIM_40] (rows=20 width=447) Number of rows:20 - Select Operator [SEL_39] (rows=612 width=447) + Select Operator [SEL_39] (rows=791 width=447) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 2 [SIMPLE_EDGE] vectorized, llap + <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_38] - Top N Key Operator [TNK_37] (rows=612 width=447) + Top N Key Operator [TNK_37] (rows=791 width=447) keys:_col0,top n:20 - Map Join Operator [MAPJOIN_36] (rows=612 width=447) - Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_33] + Map Join Operator [MAPJOIN_36] (rows=791 width=447) + BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 3 [CUSTOM_EDGE] vectorized, llap + MULTICAST [RS_33] PartitionCols:_col0 - Select Operator [SEL_32] (rows=387 width=269) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_31] (rows=387 width=269) - predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null) - TableScan [TS_0] (rows=500 width=269) - default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"] - <-Select Operator [SEL_35] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_34] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Select Operator [SEL_32] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_31] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_35] (rows=500 width=269) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_34] (rows=500 width=269) + predicate:((key2 <> 'val_0') and (key2 <> 'val_100') and key1 is not null) + TableScan [TS_0] (rows=500 width=269) + default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"] PREHOOK: query: SELECT * FROM srcbucket_big a diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered_by.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered_by.q.out index 53b11da616ce..cce9b0da04de 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered_by.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_clustered_by.q.out @@ -41,7 +41,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### numFiles #Masked# numRows 0 @@ -285,7 +284,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"customer_id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"order_id\",\"required\":false,\"type\":\"long\"},{\"id\":3,\"name\":\"product\",\"required\":false,\"type\":\"string\"}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### numFiles #Masked# numRows 0 @@ -496,7 +494,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"2\",\"added-records\":\"2\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"2\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"#Masked#\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles #Masked# numRows 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_ordered_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_ordered_table.q.out index ed49513445f1..305451686c30 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_ordered_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_ordered_table.q.out @@ -39,7 +39,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"9\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"9\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 9 @@ -134,7 +133,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-sort-order {\"order-id\":1,\"fields\":[{\"transform\":\"identity\",\"source-id\":1,\"direction\":\"desc\",\"null-order\":\"nulls-first\"},{\"transform\":\"identity\",\"source-id\":2,\"direction\":\"asc\",\"null-order\":\"nulls-last\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 9 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out index 42f0631140fc..3636b845c563 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_create_locally_zordered_table.q.out @@ -38,7 +38,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"text\",\"required\":false,\"type\":\"string\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 @@ -283,7 +282,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"text\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"bool_val\",\"required\":false,\"type\":\"boolean\"},{\"id\":4,\"name\":\"date_val\",\"required\":false,\"type\":\"date\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 @@ -409,7 +407,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":2,\"name\":\"dd\",\"required\":false,\"type\":\"double\"},{\"id\":3,\"name\":\"ll\",\"required\":false,\"type\":\"long\"}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 @@ -543,7 +540,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"20\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"20\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 20 @@ -663,7 +659,6 @@ Table Parameters: current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"ts\",\"required\":false,\"type\":\"timestamp\"},{\"id\":2,\"name\":\"dd\",\"required\":false,\"type\":\"double\"},{\"id\":3,\"name\":\"ll\",\"required\":false,\"type\":\"int\"}]} default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"ll_bucket\",\"transform\":\"bucket[4]\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out index 981b8269ad51..6a28128817c9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution.q.out @@ -301,7 +301,6 @@ Table Parameters: default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 hive.compactor.worker.pool iceberg - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 17 numRows 17 @@ -898,7 +897,6 @@ Table Parameters: default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 hive.compactor.worker.pool iceberg - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 3 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out index 3af63c58b6c2..a541f9350bf1 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution2.q.out @@ -165,7 +165,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numRows 4 @@ -252,7 +251,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out index 924c6d9953fa..835a2af5e577 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_ordered.q.out @@ -159,7 +159,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 3 @@ -309,7 +308,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 3 numPartitions 3 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out index 61a931d74a33..cf41867c1d2b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_dyn_spec_w_filter.q.out @@ -255,7 +255,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"event_src_trunc\",\"transform\":\"truncate[3]\",\"source-id\":3,\"field-id\":1000},{\"name\":\"event_time_month\",\"transform\":\"month\",\"source-id\":2,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 13 numPartitions 8 @@ -366,7 +365,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"event_src_trunc\",\"transform\":\"truncate[3]\",\"source-id\":3,\"field-id\":1000},{\"name\":\"event_time_month\",\"transform\":\"month\",\"source-id\":2,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 8 numPartitions 8 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out index bc786c1e8dc8..d391b9ebd3d6 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partition_evolution_w_id_spec_w_filter.q.out @@ -207,7 +207,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 8 numPartitions 5 @@ -320,7 +319,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 8 numPartitions 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out index 5508bdca124c..435a56d23294 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_partitioned.q.out @@ -200,7 +200,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 11 numRows 11 @@ -303,7 +302,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 2 numPartitions 2 @@ -541,7 +539,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 14 numRows 16 @@ -648,7 +645,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_query_metadata.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_query_metadata.q.out index 26d7eca677b5..970c81e6e1c0 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_query_metadata.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_query_metadata.q.out @@ -99,7 +99,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge true format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 7 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out index 440f6334f114..365e446af9ba 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_schema_evolution.q.out @@ -236,7 +236,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 10 numRows 10 @@ -340,7 +339,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out index 4e120fb8c50d..5efe2d0f536f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition.q.out @@ -210,7 +210,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 11 numRows 11 @@ -322,7 +321,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 6 numPartitions 2 @@ -441,7 +439,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out index 1d1143f4b635..8b9310b71a33 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution.q.out @@ -191,7 +191,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 8 numPartitions 4 @@ -308,7 +307,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 7 numPartitions 4 @@ -425,7 +423,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 6 numPartitions 4 @@ -541,7 +538,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 5 numPartitions 3 @@ -657,7 +653,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":4,\"field-id\":1000},{\"name\":\"city\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1001},{\"name\":\"registration_date\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1002}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out index b01185bb6911..0eb2ba637a43 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_single_partition_with_evolution2.q.out @@ -134,7 +134,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"a\",\"transform\":\"identity\",\"source-id\":1,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 2 @@ -228,7 +227,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"a\",\"transform\":\"identity\",\"source-id\":1,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 2 @@ -336,7 +334,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"a\",\"transform\":\"identity\",\"source-id\":1,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 3 numPartitions 2 @@ -444,7 +441,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"a\",\"transform\":\"identity\",\"source-id\":1,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 5 numPartitions 5 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned.q.out index 7bf7f084572f..2868b83339da 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned.q.out @@ -188,7 +188,6 @@ Table Parameters: current-snapshot-summary {\"deleted-data-files\":\"3\",\"deleted-records\":\"3\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"11\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"11\",\"total-delete-files\":\"7\",\"total-position-deletes\":\"7\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 11 numRows 11 @@ -301,7 +300,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"deleted-data-files\":\"11\",\"removed-position-delete-files\":\"7\",\"removed-delete-files\":\"7\",\"added-records\":\"4\",\"deleted-records\":\"11\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"7\",\"changed-partition-count\":\"1\",\"total-records\":\"4\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_ordered.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_ordered.q.out index 978928209269..9f6cb1107730 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_ordered.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_ordered.q.out @@ -85,7 +85,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# format-version 2 hive.compactor.worker.pool iceberg - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 4 @@ -199,7 +198,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# format-version 2 hive.compactor.worker.pool iceberg - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_w_filter.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_w_filter.q.out index ce004c9547ef..5fe81cf1f72a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_w_filter.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_major_compaction_unpartitioned_w_filter.q.out @@ -188,7 +188,6 @@ Table Parameters: current-snapshot-summary {\"deleted-data-files\":\"3\",\"deleted-records\":\"3\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"11\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"11\",\"total-delete-files\":\"7\",\"total-position-deletes\":\"7\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 11 numRows 11 @@ -280,7 +279,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"deleted-data-files\":\"11\",\"removed-position-delete-files\":\"7\",\"removed-delete-files\":\"7\",\"added-records\":\"4\",\"deleted-records\":\"11\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"7\",\"changed-partition-count\":\"1\",\"total-records\":\"4\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out index fa5dcd05e101..a177af309c6a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_bucket.q.out @@ -93,7 +93,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"key_bucket_8\",\"transform\":\"bucket[8]\",\"source-id\":2,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### numFiles 7 numPartitions 6 @@ -211,7 +210,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"key_bucket_8\",\"transform\":\"bucket[8]\",\"source-id\":2,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### numFiles 8 numPartitions 4 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out index 62070278505d..c372ea96c222 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_partition_evolution.q.out @@ -120,7 +120,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 1 @@ -205,7 +204,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 4 numPartitions 1 @@ -314,7 +312,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":1,\"fields\":[{\"name\":\"dept_id\",\"transform\":\"identity\",\"source-id\":3,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_unpartitioned.q.out index fabbe1a82c76..2b782f5a0f15 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_minor_compaction_unpartitioned.q.out @@ -96,7 +96,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"2\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"7\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"3\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 3 numRows 7 @@ -180,7 +179,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"deleted-data-files\":\"3\",\"added-records\":\"7\",\"deleted-records\":\"7\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"7\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 7 @@ -278,7 +276,6 @@ Table Parameters: current-snapshot-summary {\"added-data-files\":\"1\",\"deleted-data-files\":\"2\",\"removed-position-delete-files\":\"1\",\"removed-delete-files\":\"1\",\"added-records\":\"7\",\"deleted-records\":\"8\",\"added-files-size\":\"#Masked#\",\"removed-files-size\":\"#Masked#\",\"removed-position-deletes\":\"1\",\"changed-partition-count\":\"1\",\"total-records\":\"7\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} current-snapshot-timestamp-ms #Masked# format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### numFiles 1 numRows 7 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_gravitino.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_gravitino.q.out index 8bba659e8fd1..5821007cd3cb 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_gravitino.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_gravitino.q.out @@ -68,8 +68,6 @@ CREATE EXTERNAL TABLE `ice_orc2`( `dept_id` bigint, `team_id` bigint, `company_id` bigint) -PARTITIONED BY ( - `company_id` bigint COMMENT 'Transform: identity') PARTITIONED BY SPEC ( `company_id`) ROW FORMAT SERDE @@ -86,7 +84,6 @@ TBLPROPERTIES ( 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company_id","transform":"identity","source-id":5,"field-id":1000}]}', 'format-version'='2', 'iceberg.catalog'='ice01', - 'iceberg.orc.files.only'='true', #### A masked pattern was here #### 'name'='ice_rest.ice_orc2', 'parquet.compression'='zstd', @@ -142,7 +139,6 @@ Table Parameters: default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000}]} format-version 2 iceberg.catalog ice01 - iceberg.orc.files.only true #### A masked pattern was here #### name ice_rest.ice_orc2 numFiles 1 @@ -180,6 +176,259 @@ POSTHOOK: Input: ice_rest@ice_orc2 fn1 ln1 1 10 100 fn2 ln2 2 20 100 fn3 ln3 3 30 100 +PREHOOK: query: create view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name, last_name from ice_orc2 where dept_id in (1, 3) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Output: database:ice_rest +PREHOOK: Output: ice_rest@ice_v1 +POSTHOOK: query: create view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name, last_name from ice_orc2 where dept_id in (1, 3) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Output: database:ice_rest +POSTHOOK: Output: ice_rest@ice_v1 +POSTHOOK: Lineage: ice_v1.first_name SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: ice_v1.last_name SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:last_name, type:string, comment:null), ] +PREHOOK: query: select * from ice_v1 +PREHOOK: type: QUERY +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +POSTHOOK: query: select * from ice_v1 +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +fn1 ln1 +fn3 ln3 +PREHOOK: query: desc formatted ice_v1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_rest@ice_v1 +POSTHOOK: query: desc formatted ice_v1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_rest@ice_v1 +# col_name data_type comment +first_name string +last_name string + +# Detailed Table Information +Database: ice_rest +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"}]} +#### A masked pattern was here #### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW + type rest + uuid #Masked# + view-format iceberg + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` in (1, 3) +Expanded Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` in (1, 3) +PREHOOK: query: create view if not exists ice_v1 tblproperties ('view-format'='iceberg') +as select * from ice_orc2 where dept_id = 10000 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Output: database:ice_rest +PREHOOK: Output: ice_rest@ice_v1 +POSTHOOK: query: create view if not exists ice_v1 tblproperties ('view-format'='iceberg') +as select * from ice_orc2 where dept_id = 10000 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Output: database:ice_rest +POSTHOOK: Output: ice_rest@ice_v1 +PREHOOK: query: select * from ice_v1 +PREHOOK: type: QUERY +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +POSTHOOK: query: select * from ice_v1 +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +fn1 ln1 +fn3 ln3 +PREHOOK: query: desc formatted ice_v1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_rest@ice_v1 +POSTHOOK: query: desc formatted ice_v1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_rest@ice_v1 +# col_name data_type comment +first_name string +last_name string + +# Detailed Table Information +Database: ice_rest +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"}]} +#### A masked pattern was here #### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW + type rest + uuid #Masked# + view-format iceberg + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` in (1, 3) +Expanded Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` in (1, 3) +PREHOOK: query: create or replace view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name || '-' || dept_id from ice_orc2 where dept_id = 2 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Output: database:ice_rest +PREHOOK: Output: ice_rest@ice_v1 +POSTHOOK: query: create or replace view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name || '-' || dept_id from ice_orc2 where dept_id = 2 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Output: database:ice_rest +POSTHOOK: Output: ice_rest@ice_v1 +PREHOOK: query: select * from ice_v1 +PREHOOK: type: QUERY +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +POSTHOOK: query: select * from ice_v1 +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +fn2-2 +PREHOOK: query: desc formatted ice_v1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_rest@ice_v1 +POSTHOOK: query: desc formatted ice_v1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_rest@ice_v1 +# col_name data_type comment +_c0 string + +# Detailed Table Information +Database: ice_rest +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":1,\"fields\":[{\"id\":1,\"name\":\"_c0\",\"required\":false,\"type\":\"string\"}]} +#### A masked pattern was here #### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW + type rest + uuid #Masked# + view-format iceberg + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `ice_orc2`.`first_name` || '-' || `ice_orc2`.`dept_id` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` = 2 +Expanded Query: select `ice_orc2`.`first_name` || '-' || `ice_orc2`.`dept_id` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` = 2 +PREHOOK: query: drop view ice_v1 +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_rest@ice_v1 +PREHOOK: Output: ice_rest@ice_v1 +POSTHOOK: query: drop view ice_v1 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_rest@ice_v1 +POSTHOOK: Output: ice_rest@ice_v1 +PREHOOK: query: create view ice_v2 +as select first_name, last_name || '-' || dept_id from ice_orc2 where team_id in (20, 30) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Output: database:ice_rest +PREHOOK: Output: ice_rest@ice_v2 +POSTHOOK: query: create view ice_v2 +as select first_name, last_name || '-' || dept_id from ice_orc2 where team_id in (20, 30) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Output: database:ice_rest +POSTHOOK: Output: ice_rest@ice_v2 +POSTHOOK: Lineage: ice_v2._c1 EXPRESSION [(ice_orc2)ice_orc2.FieldSchema(name:last_name, type:string, comment:null), (ice_orc2)ice_orc2.FieldSchema(name:dept_id, type:bigint, comment:null), ] +POSTHOOK: Lineage: ice_v2.first_name SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:first_name, type:string, comment:null), ] +PREHOOK: query: select * from ice_v2 +PREHOOK: type: QUERY +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Input: ice_rest@ice_v2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ice_v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Input: ice_rest@ice_v2 +#### A masked pattern was here #### +fn2 ln2-2 +fn3 ln3-3 +PREHOOK: query: desc formatted ice_v2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_rest@ice_v2 +POSTHOOK: query: desc formatted ice_v2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_rest@ice_v2 +# col_name data_type comment +first_name string +_c1 string + +# Detailed Table Information +Database: ice_rest +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"_c1\",\"required\":false,\"type\":\"string\"}]} +#### A masked pattern was here #### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW + type rest + uuid #Masked# + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` || '-' || `ice_orc2`.`dept_id` from `ice_rest`.`ice_orc2` where `ice_orc2`.`team_id` in (20, 30) +Expanded Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` || '-' || `ice_orc2`.`dept_id` from `ice_rest`.`ice_orc2` where `ice_orc2`.`team_id` in (20, 30) +PREHOOK: query: drop view ice_v2 +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_rest@ice_v2 +PREHOOK: Output: ice_rest@ice_v2 +POSTHOOK: query: drop view ice_v2 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_rest@ice_v2 +POSTHOOK: Output: ice_rest@ice_v2 PREHOOK: query: show tables PREHOOK: type: SHOWTABLES PREHOOK: Input: database:ice_rest diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_hms.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_hms.q.out index 409eb484480b..30f55d35d7ed 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_hms.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_hms.q.out @@ -68,8 +68,6 @@ CREATE EXTERNAL TABLE `ice_orc2`( `dept_id` bigint, `team_id` bigint, `company_id` bigint) -PARTITIONED BY ( - `company_id` bigint COMMENT 'Transform: identity') PARTITIONED BY SPEC ( `company_id`) ROW FORMAT SERDE @@ -86,7 +84,6 @@ TBLPROPERTIES ( 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company_id","transform":"identity","source-id":5,"field-id":1000}]}', 'format-version'='2', 'iceberg.catalog'='ice01', - 'iceberg.orc.files.only'='true', #### A masked pattern was here #### 'name'='ice_rest.ice_orc2', 'parquet.compression'='zstd', @@ -142,7 +139,6 @@ Table Parameters: default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000}]} format-version 2 iceberg.catalog ice01 - iceberg.orc.files.only true #### A masked pattern was here #### name ice_rest.ice_orc2 numFiles 1 @@ -180,6 +176,144 @@ POSTHOOK: Input: ice_rest@ice_orc2 fn1 ln1 1 10 100 fn2 ln2 2 20 100 fn3 ln3 3 30 100 +PREHOOK: query: create view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name, last_name from ice_orc2 where dept_id in (1,2) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Output: database:ice_rest +PREHOOK: Output: ice_rest@ice_v1 +POSTHOOK: query: create view ice_v1 tblproperties ('view-format'='iceberg') +as select first_name, last_name from ice_orc2 where dept_id in (1,2) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Output: database:ice_rest +POSTHOOK: Output: ice_rest@ice_v1 +POSTHOOK: Lineage: ice_v1.first_name SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:first_name, type:string, comment:null), ] +POSTHOOK: Lineage: ice_v1.last_name SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:last_name, type:string, comment:null), ] +PREHOOK: query: select * from ice_v1 +PREHOOK: type: QUERY +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +POSTHOOK: query: select * from ice_v1 +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Input: ice_rest@ice_v1 +#### A masked pattern was here #### +fn1 ln1 +fn2 ln2 +PREHOOK: query: desc formatted ice_v1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_rest@ice_v1 +POSTHOOK: query: desc formatted ice_v1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_rest@ice_v1 +# col_name data_type comment +first_name string +last_name string + +# Detailed Table Information +Database: ice_rest +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"}]} +#### A masked pattern was here #### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW + type rest + uuid #Masked# + view-format iceberg + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` in (1,2) +Expanded Query: select `ice_orc2`.`first_name`, `ice_orc2`.`last_name` from `ice_rest`.`ice_orc2` where `ice_orc2`.`dept_id` in (1,2) +PREHOOK: query: drop view ice_v1 +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_rest@ice_v1 +PREHOOK: Output: ice_rest@ice_v1 +POSTHOOK: query: drop view ice_v1 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_rest@ice_v1 +POSTHOOK: Output: ice_rest@ice_v1 +PREHOOK: query: create view ice_v2 +as select dept_id, team_id from ice_orc2 where company_id = 100 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Output: database:ice_rest +PREHOOK: Output: ice_rest@ice_v2 +POSTHOOK: query: create view ice_v2 +as select dept_id, team_id from ice_orc2 where company_id = 100 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Output: database:ice_rest +POSTHOOK: Output: ice_rest@ice_v2 +POSTHOOK: Lineage: ice_v2.dept_id SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:dept_id, type:bigint, comment:null), ] +POSTHOOK: Lineage: ice_v2.team_id SIMPLE [(ice_orc2)ice_orc2.FieldSchema(name:team_id, type:bigint, comment:null), ] +PREHOOK: query: select * from ice_v2 +PREHOOK: type: QUERY +PREHOOK: Input: ice_rest@ice_orc2 +PREHOOK: Input: ice_rest@ice_v2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ice_v2 +POSTHOOK: type: QUERY +POSTHOOK: Input: ice_rest@ice_orc2 +POSTHOOK: Input: ice_rest@ice_v2 +#### A masked pattern was here #### +1 10 +2 20 +3 30 +PREHOOK: query: desc formatted ice_v2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: ice_rest@ice_v2 +POSTHOOK: query: desc formatted ice_v2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: ice_rest@ice_v2 +# col_name data_type comment +dept_id bigint +team_id bigint + +# Detailed Table Information +Database: ice_rest +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":2,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"}]} +#### A masked pattern was here #### + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG-VIEW + type rest + uuid #Masked# + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.FileInputFormat +OutputFormat: org.apache.hadoop.mapred.FileOutputFormat +Compressed: No +Sort Columns: [] + +# View Information +Original Query: select `ice_orc2`.`dept_id`, `ice_orc2`.`team_id` from `ice_rest`.`ice_orc2` where `ice_orc2`.`company_id` = 100 +Expanded Query: select `ice_orc2`.`dept_id`, `ice_orc2`.`team_id` from `ice_rest`.`ice_orc2` where `ice_orc2`.`company_id` = 100 +PREHOOK: query: drop view ice_v2 +PREHOOK: type: DROPVIEW +PREHOOK: Input: ice_rest@ice_v2 +PREHOOK: Output: ice_rest@ice_v2 +POSTHOOK: query: drop view ice_v2 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: ice_rest@ice_v2 +POSTHOOK: Output: ice_rest@ice_v2 PREHOOK: query: show tables PREHOOK: type: SHOWTABLES PREHOOK: Input: database:ice_rest diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out index 5f31e752db3f..5d197263b37b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out @@ -259,8 +259,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -507,7 +507,7 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2), 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] + vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2)/DECIMAL_64, 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] Select Vectorization: className: VectorSelectOperator native: true @@ -516,7 +516,7 @@ STAGE PLANS: aggregators: VectorUDAFMaxDouble(col 0:float) -> float className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, col 9:decimal(4,2) + keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 15:decimal(4,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -531,8 +531,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -540,9 +540,9 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 10 includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2) + dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [decimal(4,2)] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -940,8 +940,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_multitable.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_multitable.q.out index 7ec8f5c23b97..7728fb532cc9 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_multitable.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_multitable.q.out @@ -52,14 +52,6 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@orders POSTHOOK: Lineage: orders.o_custkey SCRIPT [] POSTHOOK: Lineage: orders.o_orderkey SCRIPT [] -PREHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: default@customer_ice -PREHOOK: Output: default@customer_ice -POSTHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false') -POSTHOOK: type: ALTERTABLE_PROPERTIES -POSTHOOK: Input: default@customer_ice -POSTHOOK: Output: default@customer_ice PREHOOK: query: select sum(1 - l_discount) as revenue FROM customer_ice, orders, lineitem WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 @@ -112,45 +104,3 @@ POSTHOOK: Input: default@lineitem_ice POSTHOOK: Input: default@orders #### A masked pattern was here #### -99.20 -PREHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: default@customer_ice -PREHOOK: Output: default@customer_ice -POSTHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true') -POSTHOOK: type: ALTERTABLE_PROPERTIES -POSTHOOK: Input: default@customer_ice -POSTHOOK: Output: default@customer_ice -PREHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_ice -PREHOOK: Input: default@lineitem -PREHOOK: Input: default@orders -#### A masked pattern was here #### -POSTHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_ice -POSTHOOK: Input: default@lineitem -POSTHOOK: Input: default@orders -#### A masked pattern was here #### --99.20 -PREHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem_ice -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_ice -PREHOOK: Input: default@lineitem_ice -PREHOOK: Input: default@orders -#### A masked pattern was here #### -POSTHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem_ice -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_ice -POSTHOOK: Input: default@lineitem_ice -POSTHOOK: Input: default@orders -#### A masked pattern was here #### --99.20 diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out index 2feda580b67a..3b0d1940c1be 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_parquet.q.out @@ -150,8 +150,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -348,7 +348,7 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2), 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] + vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2)/DECIMAL_64, 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] Select Vectorization: className: VectorSelectOperator native: true @@ -357,7 +357,7 @@ STAGE PLANS: aggregators: VectorUDAFMaxDouble(col 0:float) -> float className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, col 9:decimal(4,2) + keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 15:decimal(4,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -372,8 +372,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -381,9 +381,9 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 10 includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2) + dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [decimal(4,2)] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -582,8 +582,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out index 7ea7605467ca..e58b7ffd846a 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out @@ -73,7 +73,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 1 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 2 numPartitions 2 @@ -165,7 +164,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out index 770cc967d0bc..c9b3f1af8d82 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out @@ -75,7 +75,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000},{\"name\":\"c_trunc\",\"transform\":\"truncate[3]\",\"source-id\":2,\"field-id\":1001}]} format-version 1 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 2 numPartitions 2 @@ -169,7 +168,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b_bucket\",\"transform\":\"bucket[16]\",\"source-id\":1,\"field-id\":1000},{\"name\":\"c_trunc\",\"transform\":\"truncate[3]\",\"source-id\":2,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 2 numPartitions 2 diff --git a/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out b/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out index a44d9394025b..e46dfab99143 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/row_count.q.out @@ -111,7 +111,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"p1\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"p2\",\"transform\":\"identity\",\"source-id\":6,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 10 numPartitions 10 @@ -203,7 +202,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"p1\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000},{\"name\":\"p2\",\"transform\":\"identity\",\"source-id\":6,\"field-id\":1001}]} format-version 2 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 10 numPartitions 10 diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out index 3ecbc531d271..5c67ce2129f6 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_create_iceberg_table.q.out @@ -34,7 +34,6 @@ TBLPROPERTIES ( 'bucketing_version'='2', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"i","required":false,"type":"int"},{"id":2,"name":"s","required":false,"type":"string"},{"id":3,"name":"ts","required":false,"type":"timestamp"},{"id":4,"name":"d","required":false,"type":"date"}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -82,7 +81,6 @@ TBLPROPERTIES ( 'bucketing_version'='2', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"i","required":false,"type":"int"},{"id":2,"name":"s","required":false,"type":"string"},{"id":3,"name":"ts","required":false,"type":"timestamp"},{"id":4,"name":"d","required":false,"type":"date"}]}', 'format-version'='1', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -127,7 +125,6 @@ TBLPROPERTIES ( 'bucketing_version'='2', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"i","required":false,"type":"int"},{"id":2,"name":"s","required":false,"type":"string"},{"id":3,"name":"ts","required":false,"type":"timestamp"},{"id":4,"name":"d","required":false,"type":"date"}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -187,7 +184,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"year_field","required":false,"type":"date"},{"id":2,"name":"month_field","required":false,"type":"date"},{"id":3,"name":"day_field","required":false,"type":"date"},{"id":4,"name":"hour_field","required":false,"type":"timestamp"},{"id":5,"name":"truncate_field","required":false,"type":"string"},{"id":6,"name":"bucket_field","required":false,"type":"int"},{"id":7,"name":"identity_field","required":false,"type":"int"}]}', 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"year_field_year","transform":"year","source-id":1,"field-id":1000},{"name":"month_field_month","transform":"month","source-id":2,"field-id":1001},{"name":"day_field_day","transform":"day","source-id":3,"field-id":1002},{"name":"hour_field_hour","transform":"hour","source-id":4,"field-id":1003},{"name":"truncate_field_trunc","transform":"truncate[2]","source-id":5,"field-id":1004},{"name":"bucket_field_bucket","transform":"bucket[2]","source-id":6,"field-id":1005},{"name":"identity_field","transform":"identity","source-id":7,"field-id":1006}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -248,7 +244,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"year_field","required":false,"type":"date"},{"id":3,"name":"month_field","required":false,"type":"date"},{"id":4,"name":"day_field","required":false,"type":"date"},{"id":5,"name":"hour_field","required":false,"type":"timestamp"},{"id":6,"name":"truncate_field","required":false,"type":"string"},{"id":7,"name":"bucket_field","required":false,"type":"int"},{"id":8,"name":"identity_field","required":false,"type":"int"}]}', 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"year_field_year","transform":"year","source-id":2,"field-id":1000},{"name":"month_field_month","transform":"month","source-id":3,"field-id":1001},{"name":"day_field_day","transform":"day","source-id":4,"field-id":1002},{"name":"hour_field_hour","transform":"hour","source-id":5,"field-id":1003},{"name":"truncate_field_trunc","transform":"truncate[2]","source-id":6,"field-id":1004},{"name":"bucket_field_bucket","transform":"bucket[2]","source-id":7,"field-id":1005},{"name":"identity_field","transform":"identity","source-id":8,"field-id":1006}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -297,7 +292,6 @@ TBLPROPERTIES ( 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"a","required":false,"type":"int"},{"id":2,"name":"b","required":false,"type":"string"}]}', 'default-partition-spec'='{"spec-id":0,"fields":[{"name":"b","transform":"identity","source-id":2,"field-id":1000}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', @@ -354,7 +348,6 @@ TBLPROPERTIES ( 'current-snapshot-summary'='{"added-data-files":"1","added-records":"3","added-files-size":"#Masked#","changed-partition-count":"1","total-records":"3","total-files-size":"#Masked#","total-data-files":"1","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0","iceberg-version":"#Masked#"}', 'current-snapshot-timestamp-ms'='#Masked#', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'previous_metadata_location'='hdfs://### HDFS PATH ###', @@ -401,7 +394,6 @@ TBLPROPERTIES ( 'bucketing_version'='2', 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"i","required":false,"type":"int"},{"id":2,"name":"s","required":false,"type":"string"}]}', 'format-version'='2', - 'iceberg.orc.files.only'='false', 'metadata_location'='hdfs://### HDFS PATH ###', 'parquet.compression'='zstd', 'serialization.format'='1', diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_iceberg_materialized_views.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_iceberg_materialized_views.q.out index 1dc569be87c9..756a634026dc 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_iceberg_materialized_views.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_iceberg_materialized_views.q.out @@ -366,7 +366,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"key\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"value\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ds\",\"required\":false,\"type\":\"string\"}]} format-version 1 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -419,7 +418,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"key\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"value\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ds\",\"required\":false,\"type\":\"string\"}]} format-version 1 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -471,7 +469,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"key\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"value\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"ds\",\"required\":false,\"type\":\"string\"}]} format-version 1 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -577,7 +574,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]} format-version 1 - iceberg.orc.files.only true metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out index 44d2c059a19e..ba1b3d724782 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_force_iceberg_table.q.out @@ -96,7 +96,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge false format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 3 @@ -172,7 +171,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge false format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out index 6c5831383bab..56897e0ea67f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out @@ -96,7 +96,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge true format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 3 @@ -172,7 +171,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge true format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 0 @@ -246,7 +244,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge true format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 1 @@ -322,7 +319,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge true format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 0 @@ -414,7 +410,6 @@ Table Parameters: current-snapshot-timestamp-ms #Masked# external.table.purge false format-version 2 - iceberg.orc.files.only true #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out index be765c27120a..f28166ec3b6b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/truncate_partitioned_iceberg_table.q.out @@ -107,7 +107,6 @@ Table Parameters: default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} external.table.purge true format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 4 @@ -215,7 +214,6 @@ Table Parameters: default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"b\",\"transform\":\"identity\",\"source-id\":2,\"field-id\":1000}]} external.table.purge true format-version 2 - iceberg.orc.files.only false #### A masked pattern was here #### metadata_location hdfs://### HDFS PATH ### numFiles 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out index 5d4e328faf21..d0ba154e1464 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_partitioned.q.out @@ -71,10 +71,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_ice - filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean) + filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean) + predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string) diff --git a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out index 6a149603f73a..150fa60ce166 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/update_iceberg_copy_on_write_unpartitioned.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_ice - filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean) + filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((a = 22) or (b) IN ('four', 'one')) (type: boolean) @@ -93,7 +93,7 @@ STAGE PLANS: Map-reduce partition columns: FILE__PATH (type: string) Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean) + predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((b <> 'four') and (b <> 'one') and (a <> 22))) and FILE__PATH is not null) (type: boolean) Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string) diff --git a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out index a0ba02dbf037..a9ad0fc8da75 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out @@ -161,7 +161,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 @@ -206,7 +205,6 @@ Table Parameters: bucketing_version 2 current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]} format-version 2 - iceberg.orc.files.only false metadata_location hdfs://### HDFS PATH ### numFiles 0 numRows 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out index cd0ce562a725..e91e55d0d034 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out @@ -220,8 +220,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -467,7 +467,7 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2), 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] + vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2)/DECIMAL_64, 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] Select Vectorization: className: VectorSelectOperator native: true @@ -476,7 +476,7 @@ STAGE PLANS: aggregators: VectorUDAFMaxDouble(col 0:float) -> float className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, col 9:decimal(4,2) + keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 15:decimal(4,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -490,8 +490,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -499,9 +499,9 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 10 includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2) + dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [decimal(4,2)] Reducer 2 Execution mode: vectorized Reduce Vectorization: @@ -820,8 +820,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_multitable.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_multitable.q.out index 2f8ce7ad42b1..fa939bb08ba4 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_multitable.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_multitable.q.out @@ -52,14 +52,6 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@orders POSTHOOK: Lineage: orders.o_custkey SCRIPT [] POSTHOOK: Lineage: orders.o_orderkey SCRIPT [] -PREHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: default@customer_ice -PREHOOK: Output: default@customer_ice -POSTHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'false') -POSTHOOK: type: ALTERTABLE_PROPERTIES -POSTHOOK: Input: default@customer_ice -POSTHOOK: Output: default@customer_ice PREHOOK: query: select sum(1 - l_discount) as revenue FROM customer_ice, orders, lineitem WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 @@ -112,45 +104,3 @@ POSTHOOK: Input: default@lineitem_ice POSTHOOK: Input: default@orders POSTHOOK: Output: hdfs://### HDFS PATH ### -99.20 -PREHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true') -PREHOOK: type: ALTERTABLE_PROPERTIES -PREHOOK: Input: default@customer_ice -PREHOOK: Output: default@customer_ice -POSTHOOK: query: alter table customer_ice set tblproperties ( 'iceberg.orc.files.only' = 'true') -POSTHOOK: type: ALTERTABLE_PROPERTIES -POSTHOOK: Input: default@customer_ice -POSTHOOK: Output: default@customer_ice -PREHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_ice -PREHOOK: Input: default@lineitem -PREHOOK: Input: default@orders -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_ice -POSTHOOK: Input: default@lineitem -POSTHOOK: Input: default@orders -POSTHOOK: Output: hdfs://### HDFS PATH ### --99.20 -PREHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem_ice -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@customer_ice -PREHOOK: Input: default@lineitem_ice -PREHOOK: Input: default@orders -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select sum(1 - l_discount) as revenue -FROM customer_ice, orders, lineitem_ice -WHERE c_custkey = o_custkey and l_orderkey = o_orderkey limit 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@customer_ice -POSTHOOK: Input: default@lineitem_ice -POSTHOOK: Input: default@orders -POSTHOOK: Output: hdfs://### HDFS PATH ### --99.20 diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out index acc7794e12ce..ba3f66f093c0 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_parquet.q.out @@ -112,8 +112,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -271,7 +271,7 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2), 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] + vectorizationSchemaColumns: [0:t_float:float, 1:t_double:double, 2:t_boolean:boolean, 3:t_int:int, 4:t_bigint:bigint, 5:t_binary:binary, 6:t_string:string, 7:t_timestamp:timestamp, 8:t_date:date, 9:t_decimal:decimal(4,2)/DECIMAL_64, 10:PARTITION__SPEC__ID:int, 11:PARTITION__HASH:bigint, 12:FILE__PATH:string, 13:ROW__POSITION:bigint, 14:PARTITION__PROJECTION:string] Select Vectorization: className: VectorSelectOperator native: true @@ -280,7 +280,7 @@ STAGE PLANS: aggregators: VectorUDAFMaxDouble(col 0:float) -> float className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, col 9:decimal(4,2) + keyExpressions: col 1:double, col 2:boolean, col 3:int, col 4:bigint, col 5:binary, col 6:string, col 7:timestamp, col 8:date, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 15:decimal(4,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -294,8 +294,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false @@ -303,9 +303,9 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 10 includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2) + dataColumns: t_float:float, t_double:double, t_boolean:boolean, t_int:int, t_bigint:bigint, t_binary:binary, t_string:string, t_timestamp:timestamp, t_date:date, t_decimal:decimal(4,2)/DECIMAL_64 partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [decimal(4,2)] Reducer 2 Execution mode: vectorized Reduce Vectorization: @@ -503,8 +503,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.iceberg.mr.hive.HiveIcebergInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/itests/hive-iceberg/src/test/java/org/apache/hive/TestHiveRESTCatalogClientITBase.java b/itests/hive-iceberg/src/test/java/org/apache/hive/TestHiveRESTCatalogClientITBase.java index fd30223c9934..03885a54e1c6 100644 --- a/itests/hive-iceberg/src/test/java/org/apache/hive/TestHiveRESTCatalogClientITBase.java +++ b/itests/hive-iceberg/src/test/java/org/apache/hive/TestHiveRESTCatalogClientITBase.java @@ -30,7 +30,9 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -38,13 +40,15 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; import org.apache.iceberg.TableProperties; -import org.apache.iceberg.hive.IcebergCatalogProperties; +import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.hive.HiveSchemaUtil; +import org.apache.iceberg.hive.IcebergCatalogProperties; import org.apache.iceberg.rest.extension.HiveRESTCatalogServerExtension; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -67,6 +71,8 @@ public abstract class TestHiveRESTCatalogClientITBase { static final String DB_NAME = "ice_db"; + static final String VIEW_DB_NAME = "ice_db_view"; + static final String NATIVE_VIEW_NAME = "native_rest_v"; static final String TABLE_NAME = "ice_tbl"; static final String CATALOG_NAME = "ice01"; static final String HIVE_ICEBERG_STORAGE_HANDLER = "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler"; @@ -109,6 +115,20 @@ void setup() throws Exception { msClient = new HiveMetaStoreClient(conf, hookLoader); hiveConf = new HiveConf(conf, HiveConf.class); hive = Hive.get(hiveConf); + dropDatabaseIfExists(VIEW_DB_NAME); + } + + private void dropDatabaseIfExists(String dbName) { + try { + msClient.dropTable(CATALOG_NAME, dbName, NATIVE_VIEW_NAME); + } catch (Exception ignored) { + // view may not exist + } + try { + msClient.dropDatabase(dbName); + } catch (Exception ignored) { + // database may not exist + } } @AfterEach @@ -173,8 +193,14 @@ public void testIceberg() throws Exception { Assertions.assertEquals(TABLE_NAME, table.getTableName()); Assertions.assertEquals(HIVE_ICEBERG_STORAGE_HANDLER, table.getParameters().get("storage_handler")); Assertions.assertNotNull(table.getParameters().get(TableProperties.DEFAULT_PARTITION_SPEC)); - Assertions.assertEquals(1, table.getPartitionKeys().size()); - Assertions.assertEquals("city", table.getPartitionKeys().getFirst().getName()); + + // TODO: Revert after HIVE-29633 is fixed + // Assertions.assertEquals(1, table.getPartitionKeys().size()); + Assertions.assertTrue(table.getPartitionKeys().isEmpty()); + + List columnNames = + table.getSd().getCols().stream().map(FieldSchema::getName).toList(); + Assertions.assertTrue(columnNames.contains("city")); // --- Get Tables --- List tables = msClient.getTables(CATALOG_NAME, DB_NAME, "ice_*"); @@ -195,7 +221,69 @@ public void testIceberg() throws Exception { Assertions.assertFalse(msClient.getAllDatabases(CATALOG_NAME).contains(DB_NAME)); } - private static Table createPartitionedTable(IMetaStoreClient db, String catName, String dbName, String tableName, + @Test + public void testIcebergView() throws Exception { + Database db = new Database(); + db.setCatalogName(CATALOG_NAME); + db.setName(VIEW_DB_NAME); + db.setOwnerType(PrincipalType.USER); + db.setOwnerName(System.getProperty("user.name")); + String warehouseDir = MetastoreConf.get(conf, MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL.getVarname()); + db.setLocationUri(warehouseDir + "/" + VIEW_DB_NAME + ".db"); + hive.createDatabase(db, true); + + List cols = Collections.singletonList(new FieldSchema("x", "int", "")); + createIcebergView(VIEW_DB_NAME, NATIVE_VIEW_NAME, cols, "select 1 as x", "rest-native-view"); + + Assertions.assertTrue(msClient.tableExists(CATALOG_NAME, VIEW_DB_NAME, NATIVE_VIEW_NAME)); + + GetTableRequest getTableRequest = new GetTableRequest(); + getTableRequest.setCatName(CATALOG_NAME); + getTableRequest.setDbName(VIEW_DB_NAME); + getTableRequest.setTblName(NATIVE_VIEW_NAME); + Table view = msClient.getTable(getTableRequest); + + Assertions.assertEquals(TableType.VIRTUAL_VIEW.name(), view.getTableType()); + String tableTypeProp = view.getParameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP); + Assertions.assertNotNull(tableTypeProp); + Assertions.assertEquals(HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE, tableTypeProp.toLowerCase()); + + List names = msClient.getTables(CATALOG_NAME, VIEW_DB_NAME, "*"); + Assertions.assertTrue(names.contains(NATIVE_VIEW_NAME)); + + msClient.dropTable(CATALOG_NAME, VIEW_DB_NAME, NATIVE_VIEW_NAME); + Assertions.assertFalse(msClient.tableExists(CATALOG_NAME, VIEW_DB_NAME, NATIVE_VIEW_NAME)); + + msClient.dropDatabase(VIEW_DB_NAME); + } + + private void createIcebergView( + String dbName, String viewName, List cols, String viewSql, String comment) throws Exception { + Table view = new Table(); + view.setCatName(CATALOG_NAME); + view.setDbName(dbName); + view.setTableName(viewName); + view.setTableType(TableType.VIRTUAL_VIEW.toString()); + view.setViewOriginalText(viewSql); + view.setViewExpandedText(viewSql); + + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(cols); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setParameters(new java.util.HashMap<>()); + view.setSd(sd); + + view.setParameters(new java.util.HashMap<>()); + view.getParameters().put(hive_metastoreConstants.META_TABLE_STORAGE, HIVE_ICEBERG_STORAGE_HANDLER); + view.getParameters().put("view-format", "iceberg"); + if (comment != null) { + view.getParameters().put("comment", comment); + } + + msClient.createTable(view); + } + + private static Table createPartitionedTable(IMetaStoreClient db, String catName, String dbName, String tableName, Map tableParameters) throws Exception { db.dropTable(catName, dbName, tableName); Table table = new Table(); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java index a0d5bd2c99b3..aac44f76b411 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/AbstractTestAuthorizationApiAuthorizer.java @@ -86,11 +86,11 @@ private void testFunction(FunctionInvoker mscFunctionInvoker) throws Exception { } catch (RuntimeException e) { // A hack to verify that authorization check passed. Exception can be thrown be cause // the functions are not being called with valid params. - // verify that exception has come from ObjectStore code, which means that the + // verify that exception has come from RawStore code, which means that the // authorization checks passed. String exStackString = ExceptionUtils.getStackTrace(e); assertTrue("Verifying this exception came after authorization check", - exStackString.contains("org.apache.hadoop.hive.metastore.ObjectStore")); + exStackString.contains("org.apache.hadoop.hive.metastore.RawStore")); // If its not an exception caused by auth check, ignore it } assertFalse("Authz Exception should have been thrown in remote mode", isRemoteMetastoreMode); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetastoreTransformer.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetastoreTransformer.java index c3c174bcc348..1dc8b1776dd2 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetastoreTransformer.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetastoreTransformer.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Locale; +import java.util.Optional; import org.apache.hadoop.hive.metastore.api.GetPartitionsByNamesRequest; import org.apache.hadoop.hive.metastore.client.builder.GetTablesRequestBuilder; @@ -821,6 +822,7 @@ public void testGetTablesExt() throws Exception { count = 300; tProps.put("TBLNAME", "test_limit"); tProps.put("TABLECOUNT", count); + tProps.remove("CAPABILITIES"); // CAPABILITIES are already appended to PROPERTIES tables = createTables(tProps); assertEquals("Unexpected number of tables created", count, tables.size()); @@ -934,7 +936,8 @@ public void testGetPartitionsByNames() throws Exception { properties.append("transactional_properties=insert_only"); tProps.put("TBLNAME", tblName); tProps.put("PROPERTIES", properties.toString()); - setHMSClient("createTable", new String[] {"HIVEMANAGEDINSERTWRITE,HIVEFULLACIDWRITE"}); + tProps.put("TBLTYPE", type); + setHMSClient("createTable", new String[] {"HIVEMANAGEDINSERTWRITE", "HIVEFULLACIDWRITE"}); table = createTableWithCapabilities(tProps); resetHMSClient(); @@ -1772,7 +1775,7 @@ private List createTables(Map props) throws Exception { String tblName = (String)props.get("TBLNAME"); List caps = (List)props.get("CAPABILITIES"); StringBuilder table_params = new StringBuilder(); - table_params.append((String)props.get("PROPERTIES")); + Optional.ofNullable(props.get("PROPERTIES")).ifPresent(table_params::append); if (caps != null) table_params.append(CAPABILITIES_KEY).append("=").append(String.join(",", caps)); props.put("PROPERTIES", table_params.toString()); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java index c8166d8ee7d7..ace6fa5d6950 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.ql.txn.compactor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.Constants; @@ -48,6 +51,8 @@ import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -58,8 +63,8 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_COMPACTOR_CLEANER_RETENTION_TIME; import static org.apache.hadoop.hive.ql.txn.compactor.CompactorTestUtil.executeStatementOnDriverAndReturnResults; -import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.dropTables; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; /** * Superclass for Test[Crud|Mm]CompactorOnTez, for setup and helper classes. @@ -184,7 +189,8 @@ protected void verifySuccessfulCompaction(int expectedSuccessfulCompactions) thr protected HiveHookEvents.HiveHookEventProto getRelatedTezEvent(String dbTableName) throws Exception { int retryCount = 3; while (retryCount-- > 0) { - List> readers = TestHiveProtoLoggingHook.getTestReader(conf, tmpFolder); + List> + readers = TestHiveProtoLoggingHook.getTestReader(conf, tmpFolder); for (ProtoMessageReader reader : readers) { do { HiveHookEvents.HiveHookEventProto event; @@ -541,9 +547,40 @@ protected List getBucketData(String tblName, String bucketId) throws Exc "select ROW__ID, * from " + tblName + " where ROW__ID.bucketid = " + bucketId + " order by ROW__ID, a, b", driver); } + protected List getStructuredBucketData(String tblName, String bucketId) throws Exception { + List getBucketData = getBucketData(tblName, bucketId); + + List result = new ArrayList<>(getBucketData.size()); + for (String row : getBucketData) { + result.add(RowInfo.fromRawString(row)); + } + + return result; + } + protected void dropTable(String tblName) throws Exception { executeStatementOnDriver("drop table " + tblName, driver); } + + protected record RowInfo(long writeId, long bucketId, long rowId, TestRebalanceCompactor.RowData rowData) { + private static final ObjectMapper MAPPER = new ObjectMapper(); + + static RowInfo fromRawString(String row) throws JsonProcessingException { + // Example row data to parse: "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4", + + String[] parts = row.split("\t"); + + JsonNode json = MAPPER.readTree(parts[0]); + + return new RowInfo( + json.get("writeid").asLong(), + json.get("bucketid").asLong(), + json.get("rowid").asLong(), + + new TestRebalanceCompactor.RowData(Arrays.copyOfRange(parts, 1, parts.length)) + ); + } + } } protected Initiator createInitiator() throws Exception { diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java index 98121f7df019..ec860e90b54d 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java @@ -25,8 +25,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -54,17 +52,13 @@ import org.apache.hadoop.hive.metastore.txn.entities.CompactionInfo; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; -import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverFactory; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook; import org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents; -import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.BucketCodec; -import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.processors.CommandProcessorException; @@ -81,7 +75,6 @@ import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; -import org.mockito.Mockito; import org.apache.hive.common.util.ReflectionUtil; import static java.util.Collections.emptyMap; @@ -90,7 +83,14 @@ import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; import static org.apache.hadoop.hive.ql.txn.compactor.CompactorTestUtil.executeStatementOnDriverAndReturnResults; import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactorBase.dropTables; -import static org.mockito.Mockito.*; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.nullable; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; @SuppressWarnings("deprecation") public class TestCrudCompactorOnTez extends CompactorOnTezTest { @@ -98,483 +98,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest { private static final String DB = "default"; private static final String TABLE1 = "t1"; - @Test - public void testRebalanceCompactionWithParallelDeleteAsSecondOptimisticLock() throws Exception { - testRebalanceCompactionWithParallelDeleteAsSecond(true); - } - - @Test - public void testRebalanceCompactionWithParallelDeleteAsSecondPessimisticLock() throws Exception { - testRebalanceCompactionWithParallelDeleteAsSecond(false); - } - - private void testRebalanceCompactionWithParallelDeleteAsSecond(boolean optimisticLock) throws Exception { - conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); - conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); - conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); - conf.setBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK, optimisticLock); - - //set grouping size to have 3 buckets, and re-create driver with the new config - conf.set("tez.grouping.min-size", "400"); - conf.set("tez.grouping.max-size", "5000"); - driver = new Driver(conf); - - final String tableName = "rebalance_test"; - TestDataProvider testDataProvider = prepareRebalanceTestData(tableName); - - //Try to do a rebalancing compaction - executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance' ORDER BY b DESC", driver); - - CountDownLatch startDelete = new CountDownLatch(1); - CountDownLatch endDelete = new CountDownLatch(1); - CompactorFactory factory = Mockito.spy(CompactorFactory.getInstance()); - doAnswer(invocation -> { - Object result = invocation.callRealMethod(); - startDelete.countDown(); - Thread.sleep(1000); - return result; - }).when(factory).getCompactorPipeline(any(), any(), any(), any()); - - Worker worker = new Worker(factory); - worker.setConf(conf); - worker.init(new AtomicBoolean(true)); - worker.start(); - - if (!startDelete.await(10, TimeUnit.SECONDS)) { - throw new RuntimeException("Waiting for the compaction to start timed out!"); - } - - boolean aborted = false; - try { - executeStatementOnDriver("DELETE FROM " + tableName + " WHERE b = 12", driver); - } catch (CommandProcessorException e) { - if (optimisticLock) { - Assert.fail("In case of TXN_WRITE_X_LOCK = true, the transaction must be retried instead of being aborted."); - } - aborted = true; - Assert.assertEquals(LockException.class, e.getCause().getClass()); - Assert.assertEquals( "Transaction manager has aborted the transaction txnid:19. Reason: Aborting [txnid:19,19] due to a write conflict on default/rebalance_test committed by [txnid:18,19] d/u", e.getCauseMessage()); - // Delete the record, so the rest of the test can be the same in both cases - executeStatementOnDriver("DELETE FROM " + tableName + " WHERE b = 12", driver); - } finally { - if(!optimisticLock && !aborted) { - Assert.fail("In case of TXN_WRITE_X_LOCK = false, the transaction must be aborted instead of being retried."); - } - } - endDelete.countDown(); - - worker.join(); - - driver.close(); - driver = new Driver(conf); - - List result = execSelectAndDumpData("select * from " + tableName + " WHERE b = 12", driver, - "Dumping data for " + tableName + " after load:"); - Assert.assertEquals(0, result.size()); - - //Check if the compaction succeed - verifyCompaction(1, TxnStore.CLEANING_RESPONSE); - - String[][] expectedBuckets = new String[][] { - { - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":1}\t16\t16", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":2}\t15\t15", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":3}\t14\t14", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":4}\t13\t13", - }, - { - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4", - }, - { - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3", - }, - { - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2", - }, - }; - verifyRebalance(testDataProvider, tableName, null, expectedBuckets, - new String[] {"bucket_00000", "bucket_00001", "bucket_00002", "bucket_00003"}, "base_0000007_v0000018"); - } - - @Test - public void testRebalanceCompactionOfNotPartitionedImplicitlyBucketedTableWithOrder() throws Exception { - conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); - conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); - conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); - - //set grouping size to have 3 buckets, and re-create driver with the new config - conf.set("tez.grouping.min-size", "400"); - conf.set("tez.grouping.max-size", "5000"); - driver = new Driver(conf); - - final String tableName = "rebalance_test"; - TestDataProvider testDataProvider = prepareRebalanceTestData(tableName); - - //Try to do a rebalancing compaction - executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance' ORDER BY b DESC", driver); - runWorker(conf); - - driver.close(); - driver = new Driver(conf); - - //Check if the compaction succeed - verifyCompaction(1, TxnStore.CLEANING_RESPONSE); - - String[][] expectedBuckets = new String[][] { - { - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":1}\t16\t16", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":2}\t15\t15", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":3}\t14\t14", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":4}\t13\t13", - }, - { - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":5}\t12\t12", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":6}\t6\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":7}\t3\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":8}\t4\t4", - "{\"writeid\":7,\"bucketid\":536936448,\"rowid\":9}\t2\t4", - }, - { - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":10}\t5\t4", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":11}\t2\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":12}\t3\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":13}\t6\t3", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":14}\t4\t3", - }, - { - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":15}\t5\t3", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":16}\t6\t2", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t5\t2", - }, - }; - verifyRebalance(testDataProvider, tableName, null, expectedBuckets, - new String[] {"bucket_00000", "bucket_00001", "bucket_00002","bucket_00003"}, "base_0000007_v0000018"); - } - - @Test - public void testRebalanceCompactionOfNotPartitionedImplicitlyBucketedTable() throws Exception { - conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); - conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); - conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); - - //set grouping size to have 3 buckets, and re-create driver with the new config - conf.set("tez.grouping.min-size", "400"); - conf.set("tez.grouping.max-size", "5000"); - driver = new Driver(conf); - - final String tableName = "rebalance_test"; - TestDataProvider testDataProvider = prepareRebalanceTestData(tableName); - - //Try to do a rebalancing compaction - executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance'", driver); - runWorker(conf); - - //Check if the compaction succeed - verifyCompaction(1, TxnStore.CLEANING_RESPONSE); - - String[][] expectedBuckets = new String[][] { - { - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t5\t4", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2", - }, - { - - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":5}\t5\t3", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":6}\t2\t4", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":7}\t3\t3", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":8}\t4\t4", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":9}\t4\t3", - }, - { - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":10}\t2\t3", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":11}\t3\t4", - "{\"writeid\":2,\"bucketid\":537001984,\"rowid\":12}\t12\t12", - "{\"writeid\":3,\"bucketid\":537001984,\"rowid\":13}\t13\t13", - "{\"writeid\":4,\"bucketid\":537001984,\"rowid\":14}\t14\t14", - }, - { - "{\"writeid\":5,\"bucketid\":537067520,\"rowid\":15}\t15\t15", - "{\"writeid\":6,\"bucketid\":537067520,\"rowid\":16}\t16\t16", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t17\t17", - }, - }; - verifyRebalance(testDataProvider, tableName, null, expectedBuckets, - new String[] {"bucket_00000", "bucket_00001", "bucket_00002","bucket_00003"}, "base_0000007_v0000018"); - } - - @Test - public void testRebalanceCompactionOfPartitionedImplicitlyBucketedTable() throws Exception { - conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); - conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); - conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); - - //set grouping size to have 3 buckets, and re-create driver with the new config - conf.set("tez.grouping.min-size", "1"); - driver = new Driver(conf); - - final String stageTableName = "stage_rebalance_test"; - final String tableName = "rebalance_test"; - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf); - - TestDataProvider testDataProvider = new TestDataProvider(); - testDataProvider.createFullAcidTable(stageTableName, true, false); - executeStatementOnDriver("insert into " + stageTableName +" values " + - "('1',1,'yesterday'), ('1',2,'yesterday'), ('1',3, 'yesterday'), ('1',4, 'yesterday'), " + - "('2',1,'today'), ('2',2,'today'), ('2',3,'today'), ('2',4, 'today'), " + - "('3',1,'tomorrow'), ('3',2,'tomorrow'), ('3',3,'tomorrow'), ('3',4,'tomorrow')", - driver); - - dropTables(driver, tableName); - executeStatementOnDriver("CREATE TABLE " + tableName + "(a string, b int) " + - "PARTITIONED BY (ds string) STORED AS ORC TBLPROPERTIES('transactional'='true')", driver); - executeStatementOnDriver("INSERT OVERWRITE TABLE " + tableName + " partition (ds='tomorrow') select a, b from " + stageTableName, driver); - - //do some single inserts to have more data in the first bucket. - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('12',12,'tomorrow')", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('13',13,'tomorrow')", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('14',14,'tomorrow')", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('15',15,'tomorrow')", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('16',16,'tomorrow')", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('17',17,'tomorrow')", driver); - - // Verify buckets and their content before rebalance in partition ds=tomorrow - Table table = msClient.getTable("default", tableName); - FileSystem fs = FileSystem.get(conf); - Assert.assertEquals("Test setup does not match the expected: different buckets", - Arrays.asList("bucket_00000_0", "bucket_00001_0", "bucket_00002_0"), - CompactorTestUtil.getBucketFileNames(fs, table, "ds=tomorrow", "base_0000001")); - String[][] expectedBuckets = new String[][] { - { - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t2\t1\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t2\t2\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t2\t3\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t2\t4\ttomorrow", - "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12\ttomorrow", - "{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13\ttomorrow", - "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14\ttomorrow", - "{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t15\t15\ttomorrow", - "{\"writeid\":6,\"bucketid\":536870912,\"rowid\":0}\t16\t16\ttomorrow", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17\ttomorrow", - }, - { - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t3\t1\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t2\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t3\t3\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":3}\t3\t4\ttomorrow", - }, - { - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t1\t1\ttomorrow", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t1\t2\ttomorrow", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":2}\t1\t3\ttomorrow", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":3}\t1\t4\ttomorrow", - }, - }; - for(int i = 0; i < 3; i++) { - Assert.assertEquals("rebalanced bucket " + i, Arrays.asList(expectedBuckets[i]), - testDataProvider.getBucketData(tableName, BucketCodec.V1.encode(options.bucket(i)) + "")); - } - - //Try to do a rebalancing compaction - executeStatementOnDriver("ALTER TABLE " + tableName + " PARTITION (ds='tomorrow') COMPACT 'rebalance'", driver); - runWorker(conf); - - //Check if the compaction succeed - verifyCompaction(1, TxnStore.CLEANING_RESPONSE); - - expectedBuckets = new String[][] { - { - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t2\t1\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t2\t2\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t2\t3\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t2\t4\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t3\t1\ttomorrow", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":5}\t3\t2\ttomorrow", - }, - { - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":6}\t3\t3\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":7}\t3\t4\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":8}\t1\t1\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":9}\t1\t2\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":10}\t1\t3\ttomorrow", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":11}\t1\t4\ttomorrow", - }, - { - "{\"writeid\":2,\"bucketid\":537001984,\"rowid\":12}\t12\t12\ttomorrow", - "{\"writeid\":3,\"bucketid\":537001984,\"rowid\":13}\t13\t13\ttomorrow", - "{\"writeid\":4,\"bucketid\":537001984,\"rowid\":14}\t14\t14\ttomorrow", - "{\"writeid\":5,\"bucketid\":537001984,\"rowid\":15}\t15\t15\ttomorrow", - "{\"writeid\":6,\"bucketid\":537001984,\"rowid\":16}\t16\t16\ttomorrow", - "{\"writeid\":7,\"bucketid\":537001984,\"rowid\":17}\t17\t17\ttomorrow", - }, - }; - verifyRebalance(testDataProvider, tableName, "ds=tomorrow", expectedBuckets, - new String[] {"bucket_00000", "bucket_00001", "bucket_00002"}, "base_0000007_v0000014"); - } - - @Test - public void testRebalanceCompactionOfNotPartitionedExplicitlyBucketedTable() throws Exception { - conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); - conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); - conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); - - final String tableName = "rebalance_test"; - dropTables(driver, tableName); - executeStatementOnDriver("CREATE TABLE " + tableName + "(a string, b int) " + - "CLUSTERED BY(a) INTO 4 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true')", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('11',11),('22',22),('33',33),('44',44)", driver); - - //Try to do a rebalancing compaction - executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance'", driver); - runWorker(conf); - - //Check if the compaction is refused - List compacts = verifyCompaction(1, TxnStore.REFUSED_RESPONSE); - Assert.assertEquals("Expecting error message 'Cannot execute rebalancing compaction on bucketed tables.' and found:" + compacts.get(0).getState(), - "Cannot execute rebalancing compaction on bucketed tables.", compacts.get(0).getErrorMessage()); - } - - @Test - public void testRebalanceCompactionNotPartitionedExplicitBucketNumbers() throws Exception { - conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); - conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); - conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); - - //set grouping size to have 3 buckets, and re-create driver with the new config - conf.set("tez.grouping.min-size", "400"); - conf.set("tez.grouping.max-size", "5000"); - driver = new Driver(conf); - - final String tableName = "rebalance_test"; - TestDataProvider testDataProvider = prepareRebalanceTestData(tableName); - - //Try to do a rebalancing compaction - executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance' CLUSTERED INTO 4 BUCKETS", driver); - runWorker(conf); - - verifyCompaction(1, TxnStore.CLEANING_RESPONSE); - - String[][] expectedBuckets = new String[][] { - { - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t5\t4", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":4}\t5\t2", - }, - { - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":5}\t5\t3", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":6}\t2\t4", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":7}\t3\t3", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":8}\t4\t4", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":9}\t4\t3", - }, - { - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":10}\t2\t3", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":11}\t3\t4", - "{\"writeid\":2,\"bucketid\":537001984,\"rowid\":12}\t12\t12", - "{\"writeid\":3,\"bucketid\":537001984,\"rowid\":13}\t13\t13", - "{\"writeid\":4,\"bucketid\":537001984,\"rowid\":14}\t14\t14", - }, - { - "{\"writeid\":5,\"bucketid\":537067520,\"rowid\":15}\t15\t15", - "{\"writeid\":6,\"bucketid\":537067520,\"rowid\":16}\t16\t16", - "{\"writeid\":7,\"bucketid\":537067520,\"rowid\":17}\t17\t17", - }, - }; - verifyRebalance(testDataProvider, tableName, null, expectedBuckets, - new String[] {"bucket_00000", "bucket_00001", "bucket_00002", "bucket_00003"}, "base_0000007_v0000018"); - } - - private TestDataProvider prepareRebalanceTestData(String tableName) throws Exception { - final String stageTableName = "stage_" + tableName; - - TestDataProvider testDataProvider = new TestDataProvider(); - testDataProvider.createFullAcidTable(stageTableName, true, false); - testDataProvider.insertTestData(stageTableName, true); - - dropTables(driver, tableName); - executeStatementOnDriver("CREATE TABLE " + tableName + "(a string, b int) " + - "STORED AS ORC TBLPROPERTIES('transactional'='true')", driver); - executeStatementOnDriver("INSERT OVERWRITE TABLE " + tableName + " select a, b from " + stageTableName, driver); - - //do some single inserts to have more data in the first bucket. - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('12',12)", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('13',13)", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('14',14)", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('15',15)", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('16',16)", driver); - executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('17',17)", driver); - - // Verify buckets and their content before rebalance - Table table = msClient.getTable("default", tableName); - FileSystem fs = FileSystem.get(conf); - Assert.assertEquals("Test setup does not match the expected: different buckets", - Arrays.asList("bucket_00000_0", "bucket_00001_0", "bucket_00002_0","bucket_00003_0"), - CompactorTestUtil.getBucketFileNames(fs, table, null, "base_0000001")); - String[][] expectedBuckets = new String[][] { - { - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t5\t4", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t6\t2", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":2}\t6\t3", - "{\"writeid\":1,\"bucketid\":536870912,\"rowid\":3}\t6\t4", - "{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t12\t12", - "{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t13\t13", - "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t14\t14", - "{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t15\t15", - "{\"writeid\":6,\"bucketid\":536870912,\"rowid\":0}\t16\t16", - "{\"writeid\":7,\"bucketid\":536870912,\"rowid\":0}\t17\t17", - }, - { - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t5\t2", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t5\t3", - "{\"writeid\":1,\"bucketid\":536936448,\"rowid\":2}\t2\t4", - }, - { - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t3\t3", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t4\t4", - "{\"writeid\":1,\"bucketid\":537001984,\"rowid\":2}\t4\t3", - }, - { - "{\"writeid\":1,\"bucketid\":537067520,\"rowid\":0}\t2\t3", - "{\"writeid\":1,\"bucketid\":537067520,\"rowid\":1}\t3\t4", - }, - }; - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf); - for(int i = 0; i < 3; i++) { - Assert.assertEquals("unbalanced bucket " + i, Arrays.asList(expectedBuckets[i]), - testDataProvider.getBucketData(tableName, BucketCodec.V1.encode(options.bucket(i)) + "")); - } - return testDataProvider; - } - - private void verifyRebalance(TestDataProvider testDataProvider, String tableName, String partitionName, - String[][] expectedBucketContent, String[] bucketNames, String folderName) throws Exception { - // Verify buckets and their content after rebalance - Table table = msClient.getTable("default", tableName); - FileSystem fs = FileSystem.get(conf); - Assert.assertEquals("Buckets does not match after compaction", Arrays.asList(bucketNames), - CompactorTestUtil.getBucketFileNames(fs, table, partitionName, folderName)); - AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf); - for (int i = 0; i < expectedBucketContent.length; i++) { - Assert.assertEquals("rebalanced bucket " + i, Arrays.asList(expectedBucketContent[i]), - testDataProvider.getBucketData(tableName, BucketCodec.V1.encode(options.bucket(i)) + "")); - } - } - @Test public void testCompactionShouldNotFailOnPartitionsWithBooleanField() throws Exception { conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestRebalanceCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestRebalanceCompactor.java new file mode 100644 index 000000000000..f85edf030fca --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestRebalanceCompactor.java @@ -0,0 +1,644 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.txn.compactor; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.GetTableRequest; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.txn.TxnStore; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.io.AcidOutputFormat; +import org.apache.hadoop.hive.ql.io.BucketCodec; +import org.apache.hadoop.hive.ql.processors.CommandProcessorException; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.apache.hadoop.hive.ql.ErrorMsg.TXN_ABORTED; +import static org.apache.hadoop.hive.ql.TxnCommandsBaseForTests.runWorker; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactorBase.dropTables; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactorBase.execSelectAndDumpData; +import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactorBase.executeStatementOnDriver; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; + +public class TestRebalanceCompactor extends CompactorOnTezTest { + + @Test + public void testRebalanceCompactionWithParallelDeleteAsSecondOptimisticLock() throws Exception { + testRebalanceCompactionWithParallelDeleteAsSecond(true); + } + + @Test + public void testRebalanceCompactionWithParallelDeleteAsSecondPessimisticLock() throws Exception { + testRebalanceCompactionWithParallelDeleteAsSecond(false); + } + + @Test + public void testRebalanceCompactionOfNotPartitionedImplicitlyBucketedTableWithOrder() throws Exception { + prepareHiveConfForRebalanceCompaction(); + + conf.set("tez.grouping.min-size", "400"); + conf.set("tez.grouping.max-size", "5000"); + driver = new Driver(conf); + + final String tableName = "rebalance_test"; + TestDataProvider testDataProvider = prepareRebalanceTestData(); + + //Try to do a rebalancing compaction + executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance' ORDER BY b DESC", driver); + runWorker(conf); + + driver.close(); + driver = new Driver(conf); + + //Check if the compaction succeed + verifyCompaction(1, TxnStore.CLEANING_RESPONSE); + + // Populate expected data + List expectedData = new ArrayList<>(); + + expectedData.addAll(List.of( + new RowData("17", "17"), + new RowData("16", "16"), + new RowData("15", "15"), + new RowData("14", "14"), + new RowData("13", "13"), + new RowData("12", "12") + )); + + // Adding the '4' group + expectedData.addAll(List.of( + new RowData("6", "4"), + new RowData("3", "4"), + new RowData("4", "4"), + new RowData("2", "4"), + new RowData("5", "4") + )); + + // Adding the '3' group + expectedData.addAll(List.of( + new RowData("2", "3"), + new RowData("3", "3"), + new RowData("6", "3"), + new RowData("4", "3"), + new RowData("5", "3") + )); + + // Adding the '2' group + expectedData.addAll(List.of( + new RowData("6", "2"), + new RowData("5", "2") + )); + + verifyDataAfterCompaction(expectedData, testDataProvider); + } + + private void prepareHiveConfForRebalanceCompaction() { + conf.setBoolVar(HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED, true); + conf.setBoolVar(HiveConf.ConfVars.HIVE_COMPACTOR_GATHER_STATS, false); + conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER, false); + } + + @Test + public void testRebalanceCompactionOfNotPartitionedImplicitlyBucketedTable() throws Exception { + prepareHiveConfForRebalanceCompaction(); + + // set grouping size to have 3 buckets, and re-create driver with the new config + conf.set("tez.grouping.min-size", "400"); + conf.set("tez.grouping.max-size", "5000"); + driver = new Driver(conf); + + final String tableName = "rebalance_test"; + TestDataProvider testDataProvider = prepareRebalanceTestData(); + + // Run rebalance compaction + executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance'", driver); + runWorker(conf); + + // Check if the compaction succeed + verifyCompaction(1, TxnStore.CLEANING_RESPONSE); + + // Populate expected data + List expectedData = new ArrayList<>(); + + expectedData.addAll(List.of( + new RowData("5", "4"), + new RowData("6", "2"), + new RowData("6", "3"), + new RowData("6", "4"), + new RowData("5", "2") + )); + + expectedData.addAll(List.of( + new RowData("5", "3"), + new RowData("2", "4"), + new RowData("3", "3"), + new RowData("4", "4"), + new RowData("4", "3") + )); + + expectedData.addAll(List.of( + new RowData("2", "3"), + new RowData("3", "4"), + new RowData("12", "12"), + new RowData("13", "13"), + new RowData("14", "14") + )); + + expectedData.addAll(List.of( + new RowData("15", "15"), + new RowData("16", "16"), + new RowData("17", "17") + )); + + verifyDataAfterCompaction(expectedData, testDataProvider, null, false); + } + + @Test + public void testRebalanceCompactionOfPartitionedImplicitlyBucketedTable() throws Exception { + prepareHiveConfForRebalanceCompaction(); + + //set grouping size to have 3 buckets, and re-create driver with the new config + conf.set("tez.grouping.min-size", "1"); + driver = new Driver(conf); + + final String stageTableName = "stage_rebalance_test"; + final String tableName = "rebalance_test"; + + TestDataProvider testDataProvider = new TestDataProvider(); + testDataProvider.createFullAcidTable(stageTableName, true, false); + executeStatementOnDriver("insert into " + stageTableName +" values " + + "('1',1,'yesterday'), ('1',2,'yesterday'), ('1',3, 'yesterday'), ('1',4, 'yesterday'), " + + "('2',1,'today'), ('2',2,'today'), ('2',3,'today'), ('2',4, 'today'), " + + "('3',1,'tomorrow'), ('3',2,'tomorrow'), ('3',3,'tomorrow'), ('3',4,'tomorrow')", + driver); + + dropTables(driver, tableName); + executeStatementOnDriver("CREATE TABLE " + tableName + "(a string, b int) " + + "PARTITIONED BY (ds string) STORED AS ORC TBLPROPERTIES('transactional'='true')", driver); + executeStatementOnDriver( + "INSERT OVERWRITE TABLE " + tableName + " partition (ds='tomorrow') select a, b from " + stageTableName, driver + ); + + //do some single inserts to have more data in the first bucket. + executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('12',12,'tomorrow')", driver); + executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('13',13,'tomorrow')", driver); + executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('14',14,'tomorrow')", driver); + executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('15',15,'tomorrow')", driver); + executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('16',16,'tomorrow')", driver); + executeStatementOnDriver("INSERT INTO TABLE " + tableName + " values ('17',17,'tomorrow')", driver); + + // Verify buckets and their content before rebalance in partition ds=tomorrow + // Make sure we have all the records persisted + List allRecords = execSelectAndDumpData( + "SELECT * FROM " + tableName, driver, "Dumping data from test table, " + tableName); + Assert.assertEquals(18, allRecords.size()); + + Assert.assertFalse(isBalanced(testDataProvider, "ds=tomorrow")); + + //Try to do a rebalancing compaction + executeStatementOnDriver("ALTER TABLE " + tableName + " PARTITION (ds='tomorrow') COMPACT 'rebalance'", driver); + runWorker(conf); + + //Check if the compaction succeed + verifyCompaction(1, TxnStore.CLEANING_RESPONSE); + + List expectedData = new ArrayList<>(); + + expectedData.addAll(List.of( + new RowData("2", "1", "tomorrow"), + new RowData("2", "2", "tomorrow"), + new RowData("2", "3", "tomorrow"), + new RowData("2", "4", "tomorrow"), + new RowData("3", "1", "tomorrow"), + new RowData("3", "2", "tomorrow") + )); + + expectedData.addAll(List.of( + new RowData("3", "3", "tomorrow"), + new RowData("3", "4", "tomorrow"), + new RowData("1", "1", "tomorrow"), + new RowData("1", "2", "tomorrow"), + new RowData("1", "3", "tomorrow"), + new RowData("1", "4", "tomorrow") + )); + + expectedData.addAll(List.of( + new RowData("12", "12", "tomorrow"), + new RowData("13", "13", "tomorrow"), + new RowData("14", "14", "tomorrow"), + new RowData("15", "15", "tomorrow"), + new RowData("16", "16", "tomorrow"), + new RowData("17", "17", "tomorrow") + )); + + verifyDataAfterCompaction(expectedData, testDataProvider, "ds=tomorrow", false); + } + + @Test + public void testRebalanceCompactionOfNotPartitionedExplicitlyBucketedTable() throws Exception { + prepareHiveConfForRebalanceCompaction(); + + final String tableName = "rebalance_test"; + dropTables(driver, tableName); + executeStatementOnDriver("CREATE TABLE " + tableName + "(a string, b int) " + + "CLUSTERED BY(a) INTO 4 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true')", driver); + executeStatementOnDriver( + "INSERT INTO TABLE " + tableName + " values ('11',11),('22',22),('33',33),('44',44)", driver + ); + + //Try to do a rebalancing compaction + executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance'", driver); + runWorker(conf); + + //Check if the compaction is refused + List compacts = verifyCompaction(1, TxnStore.REFUSED_RESPONSE); + assertEquals( + "Expecting error message 'Cannot execute rebalancing compaction on bucketed tables.' and found:" + + compacts.getFirst().getState(), + "Cannot execute rebalancing compaction on bucketed tables.", compacts.getFirst().getErrorMessage()); + } + + @Test + public void testRebalanceCompactionNotPartitionedExplicitBucketNumbers() throws Exception { + prepareHiveConfForRebalanceCompaction(); + + //set grouping size to have 3 buckets, and re-create driver with the new config + conf.set("tez.grouping.min-size", "400"); + conf.set("tez.grouping.max-size", "5000"); + driver = new Driver(conf); + + final String tableName = "rebalance_test"; + TestDataProvider testDataProvider = prepareRebalanceTestData(); + + //Try to do a rebalancing compaction + executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance' CLUSTERED INTO 4 BUCKETS", driver); + runWorker(conf); + + verifyCompaction(1, TxnStore.CLEANING_RESPONSE); + + List expectedData = new ArrayList<>(); + expectedData.addAll(List.of( + new RowData("5", "4"), + new RowData("6", "2"), + new RowData("6", "3"), + new RowData("6", "4"), + new RowData("5", "2") + )); + + expectedData.addAll(List.of( + new RowData("5", "3"), + new RowData("2", "4"), + new RowData("3", "3"), + new RowData("4", "4"), + new RowData("4", "3") + )); + + expectedData.addAll(List.of( + new RowData("2", "3"), + new RowData("3", "4"), + new RowData("12", "12"), + new RowData("13", "13"), + new RowData("14", "14") + )); + + expectedData.addAll(List.of( + new RowData("15", "15"), + new RowData("16", "16"), + new RowData("17", "17") + )); + + verifyDataAfterCompaction(expectedData, testDataProvider, null, false); + } + + @SuppressWarnings("java:S2925") + private void testRebalanceCompactionWithParallelDeleteAsSecond(boolean optimisticLock) throws Exception { + prepareHiveConfForRebalanceCompaction(); + conf.setBoolVar(HiveConf.ConfVars.TXN_WRITE_X_LOCK, optimisticLock); + + //set grouping size to have 3 buckets, and re-create driver with the new config + conf.set("tez.grouping.min-size", "400"); + conf.set("tez.grouping.max-size", "5000"); + driver = new Driver(conf); + + final String tableName = "rebalance_test"; + TestDataProvider testDataProvider = prepareRebalanceTestData(); + + //Try to do a rebalancing compaction + executeStatementOnDriver("ALTER TABLE " + tableName + " COMPACT 'rebalance' ORDER BY b DESC", driver); + + CountDownLatch startDelete = new CountDownLatch(1); + CountDownLatch endDelete = new CountDownLatch(1); + CompactorFactory factory = Mockito.spy(CompactorFactory.getInstance()); + doAnswer(invocation -> { + Object result = invocation.callRealMethod(); + startDelete.countDown(); + Thread.sleep(1000); + return result; + }).when(factory).getCompactorPipeline(any(), any(), any(), any()); + + Worker worker = new Worker(factory); + worker.setConf(conf); + worker.init(new AtomicBoolean(true)); + worker.start(); + + if (!startDelete.await(10, TimeUnit.SECONDS)) { + throw new RuntimeException("Waiting for the compaction to start timed out!"); + } + + boolean aborted = false; + try { + executeStatementOnDriver("DELETE FROM " + tableName + " WHERE b = 12", driver); + } catch (CommandProcessorException e) { + if (optimisticLock) { + Assert.fail("In case of TXN_WRITE_X_LOCK = true, the transaction must be retried instead of being aborted."); + } + aborted = true; + Assert.assertEquals(12, e.getResponseCode()); + Assert.assertEquals(TXN_ABORTED.getErrorCode(), e.getErrorCode()); + + // Delete the record, so the rest of the test can be the same in both cases + executeStatementOnDriver("DELETE FROM " + tableName + " WHERE b = 12", driver); + } finally { + if(!optimisticLock && !aborted) { + Assert.fail("In case of TXN_WRITE_X_LOCK = false, the transaction must be aborted instead of being retried."); + } + } + endDelete.countDown(); + + worker.join(); + + driver.close(); + driver = new Driver(conf); + + List result = execSelectAndDumpData("select * from " + tableName + " WHERE b = 12", driver, + "Dumping data for " + tableName + " after load:"); + assertEquals(0, result.size()); + + //Check if the compaction succeed + verifyCompaction(1, TxnStore.CLEANING_RESPONSE); + + // Populate expected data + List expectedData = new ArrayList<>(); + + expectedData.addAll(List.of( + new RowData("17", "17"), + new RowData("16", "16"), + new RowData("15", "15"), + new RowData("14", "14"), + new RowData("13", "13") + )); + + // Adding the '4' group + expectedData.addAll(List.of( + new RowData("6", "4"), + new RowData("3", "4"), + new RowData("4", "4"), + new RowData("2", "4"), + new RowData("5", "4") + )); + + // Adding the '3' group + expectedData.addAll(List.of( + new RowData("2", "3"), + new RowData("3", "3"), + new RowData("6", "3"), + new RowData("4", "3"), + new RowData("5", "3") + )); + + // Adding the '2' group + expectedData.addAll(List.of( + new RowData("6", "2"), + new RowData("5", "2") + )); + + verifyDataAfterCompaction(expectedData, testDataProvider); + } + + record RowData(String... columns) { + @Override + public boolean equals(Object obj) { + if (obj instanceof RowData(String[] otherColumns)) { + return Arrays.equals(otherColumns, this.columns); + } + + return false; + } + + @Override + public int hashCode() { + return Arrays.hashCode(columns); + } + } + + /** + * Validate the data after rebalance compaction. + * - the table is balanced (or if not, only numberOfDeletedRows amount of rows are missing + * - there is only one writeId + * - buckets has unique bucketId and the bucketId doesn't change inside a bucket + * - all the required value present + * - rowId must be strictly monotonic + * + * @param expectedData Expected row data + * @param testDataProvider Test data provider + * @throws Exception Any exception that occurs during the execution + */ + private void verifyDataAfterCompaction(List expectedData, TestDataProvider testDataProvider) + throws Exception { + verifyDataAfterCompaction(expectedData, testDataProvider, (String) null, true); + } + /** + * Validate the data after rebalance compaction. + * - the table is balanced (or if not, only numberOfDeletedRows amount of rows are missing + * - writeId must be strictly monotonic + * - buckets has unique bucketId and the bucketId doesn't change inside a bucket + * - if we expect the output sorted, data is sorted by column b (so the order of column a is not predictable) + * - all the required value present + * - rowId must be strictly monotonic + * + * @param expectedData Expected row data + * @param testDataProvider Test data provider + * @param sorted True if the data must be sorted + * @throws Exception Any exception that occurs during the execution + */ + private void verifyDataAfterCompaction( + List expectedData, TestDataProvider testDataProvider, String partition, boolean sorted + ) throws Exception { + + FileSystem fs = FileSystem.get(conf); + GetTableRequest getTableRequest = new GetTableRequest("default", "rebalance_test"); + Table table = msClient.getTable(getTableRequest); + List bucketFilenames = CompactorTestUtil.getBucketFileNames( + fs, table, partition, "base_0000001"); + + int bucketCount = bucketFilenames.size(); + assertTrue(bucketCount > 0); + + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf); + + int upperBound = (expectedData.size() + bucketCount - 1) / bucketCount; + + long previousValueForColB = Long.MAX_VALUE; + long previousRowId = Long.MIN_VALUE; + + for (int i = 0; i < bucketCount; i++) { + List bucket = + testDataProvider.getStructuredBucketData( + table.getTableName(), BucketCodec.V1.encode(options.bucket(i)) + "" + ); + + int bucketSize = bucket.size(); + assertTrue(bucketSize <= upperBound); + + long bucketId = -1L; + + long previousWriteId = -1L; + + for (TestDataProvider.RowInfo rowInfo : bucket) { + + // RowId must be strictly monotonic + assertTrue( + String.format( + "RowId must be strictly monotonic rule failed. Previous RowId: %d, Bucket: %s, ", + previousRowId, rowInfo), + rowInfo.rowId() > previousRowId); + previousRowId = rowInfo.rowId(); + + // Check if writeId is strictly monotonic + if (previousWriteId == -1L) { + // we are at the first element + previousWriteId = rowInfo.writeId(); + } else { + assertTrue(previousWriteId <= rowInfo.writeId()); + previousWriteId = rowInfo.writeId(); + } + + // Check if bucketId doesn't change inside the bucket + if (bucketId == -1) { + // we are at the first element of the bucket + bucketId = rowInfo.bucketId(); + } else { + assertEquals(bucketId, rowInfo.bucketId()); + } + + // Check if all the necessary data persist + RowData rowData = rowInfo.rowData(); + assertTrue(expectedData.contains(rowData)); + expectedData.remove(rowData); + + // Check if the data is sorted by colB desc + if (sorted) { + long colB = Long.parseLong(rowData.columns()[1]); + assertTrue(colB <= previousValueForColB); + previousValueForColB = colB; + } + } + } + + // check if we got all the expected values + assertEquals(0, expectedData.size()); // we have found all the elements in a proper order + } + + private TestDataProvider prepareRebalanceTestData() throws Exception { + final String stageTableName = "stage_" + "rebalance_test"; + + TestDataProvider testDataProvider = new TestDataProvider(); + testDataProvider.createFullAcidTable(stageTableName, true, false); + testDataProvider.insertTestData(stageTableName, true); + + dropTables(driver, "rebalance_test"); + executeStatementOnDriver("CREATE TABLE " + "rebalance_test" + "(a string, b int) " + + "STORED AS ORC TBLPROPERTIES('transactional'='true')", driver); + executeStatementOnDriver( + "INSERT OVERWRITE TABLE " + "rebalance_test" + " select a, b from " + stageTableName, driver); + + //do some single inserts to have more data in the first bucket. + executeStatementOnDriver("INSERT INTO TABLE " + "rebalance_test" + " values ('12',12)", driver); + executeStatementOnDriver("INSERT INTO TABLE " + "rebalance_test" + " values ('13',13)", driver); + executeStatementOnDriver("INSERT INTO TABLE " + "rebalance_test" + " values ('14',14)", driver); + executeStatementOnDriver("INSERT INTO TABLE " + "rebalance_test" + " values ('15',15)", driver); + executeStatementOnDriver("INSERT INTO TABLE " + "rebalance_test" + " values ('16',16)", driver); + executeStatementOnDriver("INSERT INTO TABLE " + "rebalance_test" + " values ('17',17)", driver); + + // Make sure we have all the records persisted + List allRecords = execSelectAndDumpData( + "SELECT * FROM " + "rebalance_test", driver, "Dumping data from test table, " + "rebalance_test"); + Assert.assertEquals(18, allRecords.size()); + + Assert.assertFalse(isBalanced(testDataProvider, null)); + + // Please note, as the test tests rebalance compaction, not insert overwrite, it is not necessary to test if + // we have the exact same data after preparing the test data as we had at the source table. + return testDataProvider; + } + + /** + * checks if the test data is unbalanced + * Balanced if all the buckets contain between n / bucket count and n / bucket count + bucket count rows, + * where n is the number of rows in the table. + * In our test case, we inserted 6 extra rows into the first bucket so, we can say it is properly unbalanced + * if the first bucket has 6 more elements than the second one. + **/ + private boolean isBalanced(TestDataProvider testDataProvider, String partition) throws Exception { + FileSystem fs = FileSystem.get(conf); + GetTableRequest getTableRequest = new GetTableRequest("default", "rebalance_test"); + Table table = msClient.getTable(getTableRequest); + + // Assert that we have multiple buckets + List bucketFilenames = CompactorTestUtil.getBucketFileNames(fs, table, partition, "base_0000001"); + assertTrue(bucketFilenames.size() > 1); + + int bucketCount = bucketFilenames.size(); + + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf); + List[] bucketData = new ArrayList[bucketCount]; + for (int i = 0; i < bucketCount; i++) { + bucketData[i] = testDataProvider.getBucketData( + table.getTableName(), BucketCodec.V1.encode(options.bucket(i)) + ""); + } + + int allRecordCount = Arrays.stream(bucketData) + .map(Collection::size) + .reduce(0, Integer::sum); + + int lowerBound = allRecordCount / bucketCount; + int upperBound = (allRecordCount + bucketCount - 1) / bucketCount; + + for (int i = 0; i < bucketCount; i++) { + if (bucketData[i].size() > upperBound || bucketData[i].size() < lowerBound) { + return false; + } + } + + return true; + } +} diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java index 7443ae64ae43..6ae1a193c601 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestRestrictedList.java @@ -111,6 +111,7 @@ public static void startServices() throws Exception { addToExpectedRestrictedMap("hive.hook.proto.base-directory"); addToExpectedRestrictedMap("hive.rewrite.data.policy"); addToExpectedRestrictedMap("hive.query.history.enabled"); + addToExpectedRestrictedMap("hive.llap.cluster.routing.rules"); checkRestrictedListMatch(); } diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/auth/saml/TestHttpSamlAuthentication.java b/itests/hive-unit/src/test/java/org/apache/hive/service/auth/saml/TestHttpSamlAuthentication.java index 7d119e9372c2..f58cf6a0adf5 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/service/auth/saml/TestHttpSamlAuthentication.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/service/auth/saml/TestHttpSamlAuthentication.java @@ -35,6 +35,7 @@ import java.net.InetAddress; import java.net.ServerSocket; import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; @@ -137,9 +138,17 @@ public void cleanUpIdpEnv() { idpContainer.stop(); idpContainer = null; } - if (miniHS2 != null) { + if (miniHS2 != null && miniHS2.isStarted()) { miniHS2.stop(); } + HiveSamlAuthTokenGenerator.shutdown(); + } + + private static ISAMLAuthTokenGenerator createTokenGenerator(String tokenTtl) { + HiveSamlAuthTokenGenerator.shutdown(); + HiveConf conf = new HiveConf(); + conf.setVar(ConfVars.HIVE_SERVER2_SAML_CALLBACK_TOKEN_TTL, tokenTtl); + return HiveSamlAuthTokenGenerator.get(conf); } private void setupIDP(boolean useSignedAssertions, String authMode) throws Exception { @@ -554,6 +563,86 @@ public void testTokenReuse() throws Exception { } } + @Test + public void testValidTokenRoundTrip() throws Exception { + ISAMLAuthTokenGenerator tokenGenerator = createTokenGenerator("30s"); + String token = tokenGenerator.get("alice", "relay-state-1"); + assertEquals("alice", tokenGenerator.validate(token)); + } + + @Test + public void testForgedSignatureRejected() throws Exception { + ISAMLAuthTokenGenerator tokenGenerator = createTokenGenerator("30s"); + String forgedPayload = "u=alice;id=1337;time=" + System.currentTimeMillis() + + ";rs=deadbeef;sg=bogus"; + try { + String forgedToken = Base64.getEncoder().encodeToString(forgedPayload.getBytes(StandardCharsets.UTF_8)); + tokenGenerator.validate(forgedToken); + fail("Expected forged token to be rejected"); + } catch (HttpSamlAuthenticationException e) { + assertEquals("Token could not be verified", e.getMessage()); + } + } + + @Test + public void testInvalidTokenRejected() throws Exception { + ISAMLAuthTokenGenerator tokenGenerator = createTokenGenerator("30s"); + try { + tokenGenerator.validate("notAValidToken"); + fail("Expected malformed base64 token to be rejected"); + } catch (HttpSamlAuthenticationException e) { + assertEquals("Invalid token", e.getMessage()); + } + String invalidStructure = Base64.getEncoder().encodeToString("foo".getBytes()); + try { + tokenGenerator.validate(invalidStructure); + fail("Expected invalid token structure to be rejected"); + } catch (HttpSamlAuthenticationException e) { + assertEquals("Invalid token", e.getMessage()); + } + } + + @Test + public void testExpiredTokenRejected() throws Exception { + ISAMLAuthTokenGenerator tokenGenerator = createTokenGenerator("1s"); + String token = tokenGenerator.get("alice", "relay-state-1"); + Thread.sleep(1100); + try { + tokenGenerator.validate(token); + fail("Expected expired token to be rejected"); + } catch (HttpSamlAuthenticationException e) { + assertEquals("Token is expired", e.getMessage()); + } + } + + @Test + public void testParseHandlesBase64PaddingInSignature() { + Map kv = new HashMap<>(); + String token = "u=alice;id=1;time=1000;rs=rs1;sg=YWJjZA=="; + assertTrue(HiveSamlAuthTokenGenerator.parse(token, kv)); + assertEquals("alice", kv.get("u")); + assertEquals("YWJjZA==", kv.get("sg")); + } + + @Test + public void testParseRejectsEncodedBearerToken() { + Map kv = new HashMap<>(); + String encoded = Base64.getEncoder().encodeToString( + "u=alice;id=1;time=1000;rs=rs1;sg=abc".getBytes()); + assertFalse(HiveSamlAuthTokenGenerator.parse(encoded, kv)); + } + + @Test + public void testParseDecodedTokenFromGenerator() throws Exception { + ISAMLAuthTokenGenerator tokenGenerator = createTokenGenerator("30s"); + String encoded = tokenGenerator.get("bob", "relay-42"); + String decoded = new String(Base64.getDecoder().decode(encoded), StandardCharsets.UTF_8); + Map kv = new HashMap<>(); + assertTrue(HiveSamlAuthTokenGenerator.parse(decoded, kv)); + assertEquals("bob", kv.get("u")); + assertEquals("relay-42", kv.get(HiveSamlAuthTokenGenerator.RELAY_STATE)); + } + private static void assertLoggedInUser(HiveConnection connection, String expectedUser) throws SQLException { Statement stmt = connection.createStatement(); diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 214afa40d6aa..b4441518a64a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -75,6 +75,7 @@ minillap.query.files=\ cte_4.q,\ cttl.q,\ custom_udf_vectorization.q,\ + db_notification_batch_insert.q,\ dynamic_partition_pruning_2.q,\ dynamic_semijoin_user_level.q,\ dynpart_cast.q,\ diff --git a/llap-common/src/protobuf/LlapDaemonProtocol.proto b/llap-common/src/protobuf/LlapDaemonProtocol.proto index 641958aef8a1..8b15f4392eb5 100644 --- a/llap-common/src/protobuf/LlapDaemonProtocol.proto +++ b/llap-common/src/protobuf/LlapDaemonProtocol.proto @@ -233,10 +233,11 @@ message SetCapacityRequestProto { message SetCapacityResponseProto { } -// Used for proactive eviction request. Must contain one DB name, and optionally table information. +// Used for proactive eviction request. Must contain a DB name, and optionally table information and catalog name. message EvictEntityRequestProto { required string db_name = 1; repeated TableProto table = 2; + optional string catalog_name = 3 [default = "hive"]; } // Used in EvictEntityRequestProto, can be used for non-partitioned and partitioned tables too. diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapCacheMetadataSerializer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapCacheMetadataSerializer.java index dcb90ec197dd..aa3b6fdd0662 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapCacheMetadataSerializer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapCacheMetadataSerializer.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.llap.io.encoded.LlapOrcCacheLoader; import org.apache.hadoop.hive.ql.io.SyntheticFileId; import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hive.common.util.FixedSizedObjectPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -149,7 +150,7 @@ public void loadData(LlapDaemonProtocolProtos.CacheEntryList data) { } private void loadData(LlapDaemonProtocolProtos.CacheEntry ce) throws IOException { - CacheTag cacheTag = decodeCacheTag(ce.getCacheTag()); + CacheTag cacheTag = decodeCacheTag(ce.getCacheTag(), conf); DiskRangeList ranges = decodeRanges(ce.getRangesList()); Object fileKey = decodeFileKey(ce.getFileKey()); try (LlapOrcCacheLoader llr = new LlapOrcCacheLoader(new Path(ce.getFilePath()), fileKey, conf, cache, @@ -167,9 +168,16 @@ private static DiskRangeList decodeRanges(List + tables.forEach((table, partitions) -> + sb.append(catalogdb.catalog()).append(".").append(catalogdb.database()) + .append(".").append(table).append(" ")) + ); sb.append(" Duration: ").append(time).append(" ms"); LOG.debug(sb.toString()); } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 75a71560b81c..d97c2071f869 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -235,7 +235,7 @@ public OrcEncodedDataReader(LowLevelCache lowLevelCache, BufferUsageManager buff // LlapInputFormat needs to know the file schema to decide if schema evolution is supported. PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(split.getPath(), parts); cacheTag = HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_TRACK_CACHE_USAGE) - ? LlapHiveUtils.getDbAndTableNameForMetrics(split.getPath(), true, partitionDesc) : null; + ? LlapHiveUtils.getCacheTag(split.getPath(), true, partitionDesc) : null; // 1. Get file metadata from cache, or create the reader and read it. // Don't cache the filesystem object for now; Tez closes it and FS cache will fix all that fsSupplier = getFsSupplier(split.getPath(), jobConf); @@ -377,7 +377,9 @@ private void performDataRead() throws IOException { // TODO: I/O threadpool could be here - one thread per stripe; for now, linear. boolean hasFileId = this.fileKey != null; OrcBatchKey stripeKey = hasFileId ? new OrcBatchKey(fileKey, -1, 0) : null; - pathCache.touch(fileKey, split.getPath().toUri().toString()); + if (pathCache != null) { + pathCache.touch(fileKey, split.getPath().toUri().toString()); + } for (int stripeIxMod = 0; stripeIxMod < stripeRgs.length; ++stripeIxMod) { if (processStop()) { return; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java index 3322136366d0..e32b0584c888 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java @@ -225,7 +225,7 @@ public MemoryBuffer create() { PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(split.getPath(), parts); fileKey = determineCacheKey(fs, split, partitionDesc, daemonConf); cacheTag = HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_TRACK_CACHE_USAGE) - ? LlapHiveUtils.getDbAndTableNameForMetrics(split.getPath(), true, partitionDesc) : null; + ? LlapHiveUtils.getCacheTag(split.getPath(), true, partitionDesc) : null; this.sourceInputFormat = sourceInputFormat; this.sourceSerDe = sourceSerDe; this.reporter = reporter; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java index 02ee55f250e8..1c975b623d0e 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper; import org.apache.hadoop.hive.common.io.CacheTag; import org.apache.hadoop.hive.llap.cache.EvictionDispatcher; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; import org.apache.hadoop.hive.ql.io.SyntheticFileId; import org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb; @@ -140,7 +141,6 @@ public boolean isMarkedForEviction() { @Override public CacheTag getTag() { - // We don't care about these. - return CacheTag.build("OrcEstimates"); + return CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "OrcEstimates"); } } \ No newline at end of file diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestCacheContentsTracker.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestCacheContentsTracker.java index 15d3f8fd1579..08eb1f45d936 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestCacheContentsTracker.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestCacheContentsTracker.java @@ -17,11 +17,10 @@ */ package org.apache.hadoop.hive.llap.cache; -import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.Map; import org.apache.hadoop.hive.common.io.CacheTag; +import org.apache.hadoop.hive.metastore.Warehouse; import org.junit.BeforeClass; import org.junit.Test; @@ -127,7 +126,7 @@ public void testCacheTagComparison() { public void testEncodingDecoding() throws Exception { LinkedHashMap partDescs = new LinkedHashMap<>(); partDescs.put("pytha=goras", "a2+b2=c2"); - CacheTag tag = CacheTag.build("math.rules", partDescs); + CacheTag tag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "math.rules", partDescs); CacheTag.SinglePartitionCacheTag stag = ((CacheTag.SinglePartitionCacheTag)tag); assertEquals("pytha=goras=a2+b2=c2", stag.partitionDescToString()); assertEquals(1, stag.getPartitionDescMap().size()); @@ -136,7 +135,7 @@ public void testEncodingDecoding() throws Exception { partDescs.clear(); partDescs.put("mutli=one", "one=/1"); partDescs.put("mutli=two/", "two=2"); - tag = CacheTag.build("math.rules", partDescs); + tag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "math.rules", partDescs); CacheTag.MultiPartitionCacheTag mtag = ((CacheTag.MultiPartitionCacheTag)tag); assertEquals("mutli=one=one=/1/mutli=two/=two=2", mtag.partitionDescToString()); assertEquals(2, mtag.getPartitionDescMap().size()); @@ -168,6 +167,10 @@ private static LlapCacheableBuffer createMockBuffer(long size, CacheTag cacheTag } public static CacheTag cacheTagBuilder(String dbAndTable, String... partitions) { + String[] parts = dbAndTable.split("\\."); + if(parts.length < 3) { + dbAndTable = Warehouse.DEFAULT_CATALOG_NAME + "." + dbAndTable; + } if (partitions != null && partitions.length > 0) { LinkedHashMap partDescs = new LinkedHashMap<>(); for (String partition : partitions) { @@ -215,33 +218,33 @@ private static void evictSomeTestBuffers() { private static final String EXPECTED_CACHE_STATE_WHEN_FULL = "\n" + "Cache state: \n" + - "default : 2/2, 2101248/2101248\n" + - "default.testtable : 2/2, 2101248/2101248\n" + - "otherdb : 7/7, 1611106304/1611106304\n" + - "otherdb.testtable : 4/4, 231424/231424\n" + - "otherdb.testtable/p=v1 : 3/3, 100352/100352\n" + - "otherdb.testtable/p=v1/pp=vv1 : 2/2, 34816/34816\n" + - "otherdb.testtable/p=v1/pp=vv2 : 1/1, 65536/65536\n" + - "otherdb.testtable/p=v2 : 1/1, 131072/131072\n" + - "otherdb.testtable/p=v2/pp=vv1 : 1/1, 131072/131072\n" + - "otherdb.testtable2 : 2/2, 537133056/537133056\n" + - "otherdb.testtable2/p=v3 : 2/2, 537133056/537133056\n" + - "otherdb.testtable3 : 1/1, 1073741824/1073741824"; + "hive.default : 2/2, 2101248/2101248\n" + + "hive.default.testtable : 2/2, 2101248/2101248\n" + + "hive.otherdb : 7/7, 1611106304/1611106304\n" + + "hive.otherdb.testtable : 4/4, 231424/231424\n" + + "hive.otherdb.testtable/p=v1 : 3/3, 100352/100352\n" + + "hive.otherdb.testtable/p=v1/pp=vv1 : 2/2, 34816/34816\n" + + "hive.otherdb.testtable/p=v1/pp=vv2 : 1/1, 65536/65536\n" + + "hive.otherdb.testtable/p=v2 : 1/1, 131072/131072\n" + + "hive.otherdb.testtable/p=v2/pp=vv1 : 1/1, 131072/131072\n" + + "hive.otherdb.testtable2 : 2/2, 537133056/537133056\n" + + "hive.otherdb.testtable2/p=v3 : 2/2, 537133056/537133056\n" + + "hive.otherdb.testtable3 : 1/1, 1073741824/1073741824"; private static final String EXPECTED_CACHE_STATE_AFTER_EVICTION = "\n" + "Cache state: \n" + - "default : 0/2, 0/2101248\n" + - "default.testtable : 0/2, 0/2101248\n" + - "otherdb : 5/7, 1074202624/1611106304\n" + - "otherdb.testtable : 3/4, 198656/231424\n" + - "otherdb.testtable/p=v1 : 2/3, 67584/100352\n" + - "otherdb.testtable/p=v1/pp=vv1 : 1/2, 2048/34816\n" + - "otherdb.testtable/p=v1/pp=vv2 : 1/1, 65536/65536\n" + - "otherdb.testtable/p=v2 : 1/1, 131072/131072\n" + - "otherdb.testtable/p=v2/pp=vv1 : 1/1, 131072/131072\n" + - "otherdb.testtable2 : 1/2, 262144/537133056\n" + - "otherdb.testtable2/p=v3 : 1/2, 262144/537133056\n" + - "otherdb.testtable3 : 1/1, 1073741824/1073741824"; + "hive.default : 0/2, 0/2101248\n" + + "hive.default.testtable : 0/2, 0/2101248\n" + + "hive.otherdb : 5/7, 1074202624/1611106304\n" + + "hive.otherdb.testtable : 3/4, 198656/231424\n" + + "hive.otherdb.testtable/p=v1 : 2/3, 67584/100352\n" + + "hive.otherdb.testtable/p=v1/pp=vv1 : 1/2, 2048/34816\n" + + "hive.otherdb.testtable/p=v1/pp=vv2 : 1/1, 65536/65536\n" + + "hive.otherdb.testtable/p=v2 : 1/1, 131072/131072\n" + + "hive.otherdb.testtable/p=v2/pp=vv1 : 1/1, 131072/131072\n" + + "hive.otherdb.testtable2 : 1/2, 262144/537133056\n" + + "hive.otherdb.testtable2/p=v3 : 1/2, 262144/537133056\n" + + "hive.otherdb.testtable3 : 1/1, 1073741824/1073741824"; } diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestFileCache.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestFileCache.java index 34203ddf5d6f..f5bb1e0d254d 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestFileCache.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestFileCache.java @@ -19,6 +19,7 @@ import com.google.common.base.Function; import org.apache.hadoop.hive.common.io.CacheTag; +import org.apache.hadoop.hive.metastore.Warehouse; import org.junit.Test; import java.util.concurrent.ConcurrentHashMap; @@ -32,7 +33,7 @@ public void testFileCacheMetadata() { ConcurrentHashMap> cache = new ConcurrentHashMap<>(); Object fileKey = 1234L; Function f = a -> new Object(); - CacheTag tag = CacheTag.build("test_table"); + CacheTag tag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "test_db.test_table"); FileCache result = FileCache.getOrAddFileSubCache(cache, fileKey, f, tag); diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java index 4e3c10ed6b2d..764dd9ec319b 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java @@ -39,6 +39,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.io.CacheTag; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.common.io.DiskRange; import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.io.DataCache.DiskRangeListFactory; @@ -309,13 +310,14 @@ private void _testProactiveEvictionMark(boolean isInstantDeallocation) { LlapDataBuffer[] buffs1 = IntStream.range(0, 4).mapToObj(i -> fb()).toArray(LlapDataBuffer[]::new); DiskRange[] drs1 = drs(IntStream.range(1, 5).toArray()); - CacheTag tag1 = CacheTag.build("default.table1"); + CacheTag tag1 = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "default.table1"); LlapDataBuffer[] buffs2 = IntStream.range(0, 41).mapToObj(i -> fb()).toArray(LlapDataBuffer[]::new); DiskRange[] drs2 = drs(IntStream.range(1, 42).toArray()); - CacheTag tag2 = CacheTag.build("default.table2"); + CacheTag tag2 = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "default.table2"); - Predicate predicate = tag -> "default.table1".equals(tag.getTableName()); + Predicate predicate = tag -> + (Warehouse.DEFAULT_CATALOG_NAME + "." + "default.table1").equals(tag.getTableName()); cache.putFileData(fn1, drs1, buffs1, 0, Priority.NORMAL, null, tag1); cache.putFileData(fn2, drs2, buffs2, 0, Priority.NORMAL, null, tag2); diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java index 62a8c7474399..5ec15beccfb9 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.IllegalCacheConfigurationException; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.llap.cache.LowLevelCache.Priority; import org.apache.hadoop.hive.llap.io.encoded.OrcEncodedDataReader; import org.apache.hadoop.hive.llap.io.metadata.MetadataCache; @@ -249,7 +250,7 @@ public void testGetOrcTailForPath() throws Exception { Path path = new Path("../data/files/alltypesorc"); Configuration jobConf = new Configuration(); Configuration daemonConf = new Configuration(); - CacheTag tag = CacheTag.build("test-table"); + CacheTag tag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "test-db.test-table"); OrcTail uncached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, null); jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true"); OrcTail cached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, null); @@ -270,7 +271,7 @@ public void testGetOrcTailForPathWithFileId() throws Exception { Path path = new Path("../data/files/alltypesorc"); Configuration jobConf = new Configuration(); Configuration daemonConf = new Configuration(); - CacheTag tag = CacheTag.build("test-table"); + CacheTag tag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "test-db.test-table"); FileSystem fs = FileSystem.get(daemonConf); FileStatus fileStatus = fs.getFileStatus(path); OrcTail uncached = OrcEncodedDataReader.getOrcTailForPath(fileStatus.getPath(), jobConf, tag, daemonConf, cache, new SyntheticFileId(fileStatus)); @@ -294,7 +295,7 @@ public void testGetOrcTailForPathWithFileIdChange() throws Exception { Path path = new Path("../data/files/alltypesorc"); Configuration jobConf = new Configuration(); Configuration daemonConf = new Configuration(); - CacheTag tag = CacheTag.build("test-table"); + CacheTag tag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "test-db.test-table"); OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 100)); jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true"); Exception ex = null; @@ -337,19 +338,23 @@ public void testProactiveEvictionMark() throws Exception { // below is of length 65 ByteBuffer bb2 = ByteBuffer.wrap("-large-meta-data-content-large-meta-data-content-large-meta-data-".getBytes()); - LlapBufferOrBuffers table1Buffers1 = cache.putFileMetadata(fn1, bb, CacheTag.build("default.table1"), isStopped); + LlapBufferOrBuffers table1Buffers1 = cache.putFileMetadata(fn1, bb, + CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "default.table1"), isStopped); assertNotNull(table1Buffers1.getSingleLlapBuffer()); - LlapBufferOrBuffers table1Buffers2 = cache.putFileMetadata(fn2, bb2, CacheTag.build("default.table1"), isStopped); + LlapBufferOrBuffers table1Buffers2 = cache.putFileMetadata(fn2, bb2, + CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "default.table1"), isStopped); assertNotNull(table1Buffers2.getMultipleLlapBuffers()); assertEquals(2, table1Buffers2.getMultipleLlapBuffers().length); // Case for when metadata consists of just 1 buffer (most of the realworld cases) ByteBuffer bb3 = ByteBuffer.wrap("small-meta-data-content-for-otherFile".getBytes()); - LlapBufferOrBuffers table2Buffers1 = cache.putFileMetadata(fn3, bb3, CacheTag.build("default.table2"), isStopped); + LlapBufferOrBuffers table2Buffers1 = cache.putFileMetadata(fn3, bb3, + CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME, "default.table2"), isStopped); assertNotNull(table2Buffers1.getSingleLlapBuffer()); - Predicate predicate = tag -> "default.table1".equals(tag.getTableName()); + Predicate predicate = tag -> + (Warehouse.DEFAULT_CATALOG_NAME + ".default.table1").equals(tag.getTableName()); // Simulating eviction on some buffers table1Buffers2.getMultipleLlapBuffers()[1].decRef(); diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestProactiveEviction.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestProactiveEviction.java index 89c8f6055038..e75237b87650 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestProactiveEviction.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestProactiveEviction.java @@ -22,6 +22,8 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -32,7 +34,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.ProactiveEviction.Request; import org.apache.hadoop.hive.llap.ProactiveEviction.Request.Builder; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.EvictEntityRequestProto; import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; +import org.apache.hadoop.hive.metastore.Warehouse; import com.google.common.annotations.VisibleForTesting; @@ -42,6 +46,7 @@ import static org.apache.hadoop.hive.llap.cache.TestCacheContentsTracker.cacheTagBuilder; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; /** @@ -106,6 +111,328 @@ private static void assertMatchOnTags(Builder requestBuilder, String expected) { assertEquals(expected, sb.toString()); } + /** + * Verifies that passing an explicit catalog produces correct matching via isTagMatch. + * TEST_TAGS all belong to the default catalog, so requests for a different catalog must not match. + */ + @Test + public void testCatalogAwareCacheTagAndRequestMatching() { + // Default catalog matches as expected. + assertMatchOnTags(Builder.create().addDb("fx"), "111111111111000000"); + assertMatchOnTags(Builder.create().addTable("fx", "futures"), "000001111000000000"); + assertMatchOnTags(Builder.create().addPartitionOfATable("fx", "futures", + buildParts("ccy", "JPY")), "000000110000000000"); + assertMatchOnTags(Builder.create().addTable("fixedincome", "bonds"), "000000000000000110"); + assertMatchOnTags(Builder.create().addPartitionOfATable("fx", "rates", + buildParts("from", "EUR", "to", "HUF")), "000010000000000000"); + + // Non-default catalog: CacheTag now carries catalog info, so none of the TEST_TAGS + // (all default-catalog) should match requests targeting a different catalog. + assertMatchOnTags(Builder.create().addDb("custom_catalog", "fx"), "000000000000000000"); + assertMatchOnTags(Builder.create().addTable("custom_catalog", "equity", "prices"), + "000000000000000000"); + assertMatchOnTags(Builder.create().addPartitionOfATable( + "custom_catalog", "equity", "prices", buildParts("ex", "NYSE")), + "000000000000000000"); + } + + /** + * Verifies that catalog_name is serialized into the proto and correctly restored via fromProtoRequest. + */ + @Test + public void testProtoRoundTripPreservesCatalog() { + // Default catalog is always serialized into the proto. + Request defaultCatRequest = Builder.create().addDb("testdb").build(); + List protos = defaultCatRequest.toProtoRequests(); + assertEquals(1, protos.size()); + EvictEntityRequestProto proto = protos.get(0); + assertEquals(Warehouse.DEFAULT_CATALOG_NAME, proto.getCatalogName()); + assertEquals("testdb", proto.getDbName()); + + Request roundTripped = Builder.create().fromProtoRequest(proto).build(); + assertTrue(roundTripped.hasDatabaseName(Warehouse.DEFAULT_CATALOG_NAME, "testdb")); + + // Custom catalog is also preserved. + Request customCatRequest = Builder.create().addTable("spark_catalog", "salesdb", "orders").build(); + protos = customCatRequest.toProtoRequests(); + assertEquals(1, protos.size()); + proto = protos.get(0); + assertEquals("spark_catalog", proto.getCatalogName()); + assertEquals("salesdb", proto.getDbName()); + + roundTripped = Builder.create().fromProtoRequest(proto).build(); + assertTrue(roundTripped.hasDatabaseName("spark_catalog", "salesdb")); + } + + /** + * Verifies that entities in different catalogs are independently scoped even when they share + * the same DB name. + */ + @Test + public void testMultiCatalogBuilderScoping() { + // Two different catalogs, each with the same DB name but different tables. + Request request = Builder.create() + .addTable("catalog_a", "shared_db", "table_a") + .addTable("catalog_b", "shared_db", "table_b") + .build(); + + assertEquals(2, request.getEntities().size()); + assertTrue(request.getEntities().containsKey(new Request.CatalogDb("catalog_a", "shared_db"))); + assertTrue(request.getEntities().containsKey(new Request.CatalogDb("catalog_b", "shared_db"))); + + // catalog_a only knows about table_a. + assertTrue(request.getEntities().get(new Request.CatalogDb("catalog_a", "shared_db")).containsKey("table_a")); + assertFalse(request.getEntities().get(new Request.CatalogDb("catalog_a", "shared_db")).containsKey("table_b")); + + // catalog_b only knows about table_b. + assertTrue(request.getEntities().get(new Request.CatalogDb("catalog_b", "shared_db")).containsKey("table_b")); + assertFalse(request.getEntities().get(new Request.CatalogDb("catalog_b", "shared_db")).containsKey("table_a")); + } + + /** + * Verifies that multiple tables and partitions added to the same catalog+DB are merged + * into a single catalog entry (no duplication). + */ + @Test + public void testSameCatalogMultipleEntitiesMergedCorrectly() { + Request request = Builder.create() + .addTable("mydb", "table1") + .addTable("mydb", "table2") + .addPartitionOfATable("mydb", "table3", buildParts("dt", "2024-01-01")) + .addPartitionOfATable("mydb", "table3", buildParts("dt", "2024-01-02")) + .build(); + + assertTrue(request.hasDatabaseName(Warehouse.DEFAULT_CATALOG_NAME, "mydb")); + // One catalog, one DB, three tables. + assertEquals(1, request.getEntities().size()); + assertEquals(3, request.getEntities() + .get(new Request.CatalogDb(Warehouse.DEFAULT_CATALOG_NAME, "mydb")).size()); + // table3 has two partition specs. + assertEquals(2, request.getEntities() + .get(new Request.CatalogDb(Warehouse.DEFAULT_CATALOG_NAME, "mydb")).get("table3").size()); + } + + /** + * Verifies that CacheTag catalog information is correctly used to isolate eviction between catalogs. + * A request targeting catalog A must not evict buffers that belong to catalog B, even when the + * DB and table names are identical. + */ + @Test + public void testCatalogIsolationInIsTagMatch() { + CacheTag defaultCatalogTag = cacheTagBuilder("fx.rates", "from=USD", "to=HUF"); + CacheTag otherCatalogTag = cacheTagBuilder("other_catalog.fx.rates", "from=USD", "to=HUF"); + + // Request for the default catalog's "fx" DB matches only default-catalog tags. + Request defaultCatalogRequest = Builder.create() + .fromProtoRequest(Builder.create() + .addDb("fx") + .build().toProtoRequests().get(0)) + .build(); + assertTrue(defaultCatalogRequest.isTagMatch(defaultCatalogTag)); + assertFalse("Must not evict buffers belonging to other_catalog", + defaultCatalogRequest.isTagMatch(otherCatalogTag)); + + // Request for a different catalog matches only tags from that catalog. + Request otherCatalogRequest = Builder.create() + .fromProtoRequest(Builder.create() + .addDb("other_catalog", "fx") + .build().toProtoRequests().get(0)) + .build(); + assertTrue(otherCatalogRequest.isTagMatch(otherCatalogTag)); + assertFalse("Must not evict buffers belonging to the default catalog", + otherCatalogRequest.isTagMatch(defaultCatalogTag)); + + // A request for a DB that doesn't exist in the tags must not match, regardless of catalog. + Request noMatchRequest = Builder.create() + .fromProtoRequest(Builder.create() + .addDb("any_catalog", "nonexistent_db") + .build().toProtoRequests().get(0)) + .build(); + assertFalse(noMatchRequest.isTagMatch(defaultCatalogTag)); + assertFalse(noMatchRequest.isTagMatch(otherCatalogTag)); + } + + /** + * Verifies that Iceberg metadata table cache tags (catalog.db.table.metaTable) are handled by + * isTagMatch and evicted when the base table is dropped. + */ + @Test + public void testIcebergMetaTableTagMatching() { + CacheTag baseTableTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders"); + CacheTag filesMetaTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders.files"); + CacheTag snapshotsMetaTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders.snapshots"); + CacheTag otherTableMetaTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.other.files"); + + Request dropTableRequest = Builder.create() + .fromProtoRequest(Builder.create() + .addTable("salesdb", "orders") + .build().toProtoRequests().get(0)) + .build(); + + assertTrue(dropTableRequest.isTagMatch(baseTableTag)); + assertTrue(dropTableRequest.isTagMatch(filesMetaTag)); + assertTrue(dropTableRequest.isTagMatch(snapshotsMetaTag)); + assertFalse(dropTableRequest.isTagMatch(otherTableMetaTag)); + + // Drop-partition requests must not evict metadata-table cache via prefix matching. + Request dropPartitionRequest = Builder.create() + .fromProtoRequest(Builder.create() + .addPartitionOfATable("salesdb", "orders", buildParts("dt", "2024-01-01")) + .build().toProtoRequests().get(0)) + .build(); + assertFalse(dropPartitionRequest.isTagMatch(filesMetaTag)); + } + + /** + * Legacy cache tags created before catalog support are 2-part (db.table) with no catalog + * component. They must be treated as belonging to the default catalog. + */ + @Test + public void testTwoPartLegacyTagMatching() { + CacheTag tableTag = CacheTag.build("salesdb.orders"); + CacheTag partitionedTag = CacheTag.build("salesdb.orders", buildParts("dt", "2024-01-01")); + + // Drop database, table and matching partition (all default catalog) evict the legacy tags. + assertTrue(dropDbRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb").isTagMatch(tableTag)); + assertTrue(dropTableRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb", "orders").isTagMatch(tableTag)); + assertTrue(dropPartitionRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb", "orders", + buildParts("dt", "2024-01-01")).isTagMatch(partitionedTag)); + + // A non-matching partition value must not evict. + assertFalse(dropPartitionRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb", "orders", + buildParts("dt", "2024-01-02")).isTagMatch(partitionedTag)); + + // A request scoped to a custom catalog must not evict default-catalog legacy tags. + assertFalse(dropDbRequest("custom_catalog", "salesdb").isTagMatch(tableTag)); + assertFalse(dropTableRequest("custom_catalog", "salesdb", "orders").isTagMatch(tableTag)); + } + + /** + * Iceberg metadata table tags (catalog.db.table.metaTable) on a non-default catalog must be + * evicted only by requests scoped to that same catalog. + */ + @Test + public void testNonDefaultCatalogIcebergMetaTableMatching() { + CacheTag filesMetaTag = CacheTag.build("spark_catalog.salesdb.orders.files"); + CacheTag snapshotsMetaTag = CacheTag.build("spark_catalog.salesdb.orders.snapshots"); + + // Drop table on the custom catalog evicts its metadata-table cache. + Request dropTable = dropTableRequest("spark_catalog", "salesdb", "orders"); + assertTrue(dropTable.isTagMatch(filesMetaTag)); + assertTrue(dropTable.isTagMatch(snapshotsMetaTag)); + + // Drop database on the custom catalog evicts metadata-table cache too. + Request dropDb = dropDbRequest("spark_catalog", "salesdb"); + assertTrue(dropDb.isTagMatch(filesMetaTag)); + assertTrue(dropDb.isTagMatch(snapshotsMetaTag)); + + // The same logical name in the default catalog must not be evicted. + assertFalse(dropTableRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb", "orders") + .isTagMatch(filesMetaTag)); + assertFalse(dropDbRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb").isTagMatch(filesMetaTag)); + } + + /** + * Dropping a database must evict both base-table and Iceberg metadata-table cache entries + * belonging to that database. + */ + @Test + public void testDropDatabaseEvictsMetaTableTags() { + CacheTag baseTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders"); + CacheTag filesMetaTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders.files"); + + Request dropDb = dropDbRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb"); + assertTrue(dropDb.isTagMatch(baseTag)); + assertTrue(dropDb.isTagMatch(filesMetaTag)); + + // A database with a different name must not match. + assertFalse(dropDbRequest(Warehouse.DEFAULT_CATALOG_NAME, "otherdb").isTagMatch(filesMetaTag)); + } + + /** + * Prefix matching used for Iceberg metadata tables must not produce false positives for tables + * that merely share a name prefix with the dropped table. + */ + @Test + public void testDropTablePrefixMatchingAvoidsFalsePositives() { + CacheTag siblingTableTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders_archive"); + CacheTag siblingMetaTag = + CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders_archive.files"); + + Request dropTable = dropTableRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb", "orders"); + assertFalse(dropTable.isTagMatch(siblingTableTag)); + assertFalse(dropTable.isTagMatch(siblingMetaTag)); + } + + /** + * With catalog-aware tags a 3-part name is always interpreted as catalog.db.table, never as a + * default-catalog db.table.metaTable. + */ + @Test + public void testThreePartTagInterpretedAsCatalogQualified() { + CacheTag tag = CacheTag.build("custom_catalog.salesdb.orders"); + + // Matched when the request targets the same catalog + db + table. + assertTrue(dropTableRequest("custom_catalog", "salesdb", "orders").isTagMatch(tag)); + + // Not matched when "custom_catalog" is mistaken for a database in the default catalog. + assertFalse(dropTableRequest(Warehouse.DEFAULT_CATALOG_NAME, "custom_catalog", "salesdb") + .isTagMatch(tag)); + } + + /** + * Snapshot-ref tags (branch_/tag_) must be evicted when their base table is dropped, both in the + * 4-part catalog-qualified form and in the legacy 3-part db.table.ref form. + */ + @Test + public void testSnapshotRefTagMatching() { + CacheTag branchTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders.branch_main"); + CacheTag tagRefTag = CacheTag.build(Warehouse.DEFAULT_CATALOG_NAME + ".salesdb.orders.tag_v1"); + // Legacy 3-part snapshot ref without catalog prefix -> default-catalog db.table.ref. + CacheTag legacyBranchTag = CacheTag.build("salesdb.orders.branch_main"); + + Request dropTable = dropTableRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb", "orders"); + assertTrue(dropTable.isTagMatch(branchTag)); + assertTrue(dropTable.isTagMatch(tagRefTag)); + assertTrue(dropTable.isTagMatch(legacyBranchTag)); + } + + /** + * Cache tag names must have between 2 and 4 dot-separated components; anything else is rejected. + */ + @Test + public void testInvalidCacheTagLengthThrows() { + Request request = dropDbRequest(Warehouse.DEFAULT_CATALOG_NAME, "salesdb"); + + // Single-component tag is not a valid db-qualified name. + assertThrows(UnsupportedOperationException.class, + () -> request.isTagMatch(CacheTag.build("orders"))); + + // Five-component tag exceeds the supported catalog.db.table.metaTable structure. + assertThrows(UnsupportedOperationException.class, + () -> request.isTagMatch(CacheTag.build("a.b.c.d.e"))); + } + + private static Request dropDbRequest(String catalog, String db) { + return roundTrip(Builder.create().addDb(catalog, db)); + } + + private static Request dropTableRequest(String catalog, String db, String table) { + return roundTrip(Builder.create().addTable(catalog, db, table)); + } + + private static Request dropPartitionRequest(String catalog, String db, String table, + Map partSpec) { + return roundTrip(Builder.create().addPartitionOfATable(catalog, db, table, partSpec)); + } + + /** + * Marshals the request to proto and back, mirroring how the LLAP daemon receives requests. + */ + private static Request roundTrip(Builder requestBuilder) { + return Builder.create().fromProtoRequest(requestBuilder.build().toProtoRequests().get(0)).build(); + } + @Test public void testProactiveSweep() throws Exception { closeSweeperExecutorForTest(); diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/io/api/impl/TestLlapCacheMetadataSerializer.java b/llap-server/src/test/org/apache/hadoop/hive/llap/io/api/impl/TestLlapCacheMetadataSerializer.java index 01581dd94a71..6082f57d0657 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/io/api/impl/TestLlapCacheMetadataSerializer.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/io/api/impl/TestLlapCacheMetadataSerializer.java @@ -107,7 +107,7 @@ private LlapDaemonProtocolProtos.CacheEntryList createDummyMetadata() throws IOE LlapDaemonProtocolProtos.CacheEntryRange re2 = LlapDaemonProtocolProtos.CacheEntryRange.newBuilder().setStart(14L).setEnd(38L).build(); LlapDaemonProtocolProtos.CacheTag ct = - LlapDaemonProtocolProtos.CacheTag.newBuilder().setTableName("dummyTable").build(); + LlapDaemonProtocolProtos.CacheTag.newBuilder().setTableName("hive.default.dummyTable").build(); Path path = new Path(TEST_PATH); SyntheticFileId syntheticFileId = fileId(path); diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/io/encoded/TestOrcEncodedDataReaderIOCacheDisabled.java b/llap-server/src/test/org/apache/hadoop/hive/llap/io/encoded/TestOrcEncodedDataReaderIOCacheDisabled.java new file mode 100644 index 000000000000..5a6fef8f1823 --- /dev/null +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/io/encoded/TestOrcEncodedDataReaderIOCacheDisabled.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.io.encoded; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Properties; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.orc.OrcFile; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestOrcEncodedDataReaderIOCacheDisabled { + + private static final TypeDescription ORC_SCHEMA = TypeDescription.fromString("struct"); + private static final int ROW_COUNT = 3; + private static final long[] EXPECTED_IDS = {1L, 2L, 3L}; + private static final String[] EXPECTED_VALUES = {"one", "two", "three"}; + + private static HiveConf daemonConf; + private static Path orcFile; + + @BeforeClass + public static void setUpClass() throws Exception { + daemonConf = new HiveConf(); + HiveConf.setVar(daemonConf, ConfVars.LLAP_IO_MEMORY_MODE, "none"); + + Path tmpDir = new Path(Files.createTempDirectory("llap-orc-no-io-cache").toString()); + orcFile = new Path(tmpDir, "data.orc"); + writeOrcFile(orcFile, daemonConf); + + LlapProxy.setDaemon(true); + LlapProxy.initializeLlapIo(daemonConf); + assertFalse(LlapProxy.getIo().usingLowLevelCache()); + } + + @AfterClass + public static void tearDownClass() { + LlapProxy.close(); + } + + @Test + public void testVectorizedOrcEncodedReadWithIoCacheDisabled() throws Exception { + JobConf job = buildJobConf(orcFile); + long fileLen = orcFile.getFileSystem(job).getFileStatus(orcFile).getLen(); + + RecordReader reader = LlapProxy.getIo().llapVectorizedOrcReaderForPath( + null, orcFile, null, Arrays.asList(0, 1), job, 0, fileLen, Reporter.NULL); + assertNotNull("LLAP should handle this ORC read", reader); + + try { + VectorizedRowBatch batch = reader.createValue(); + int rowsRead = 0; + while (reader.next(NullWritable.get(), batch)) { + LongColumnVector idCol = (LongColumnVector) batch.cols[0]; + BytesColumnVector valueCol = (BytesColumnVector) batch.cols[1]; + for (int i = 0; i < batch.size; i++) { + assertEquals("id at row " + rowsRead, EXPECTED_IDS[rowsRead], idCol.vector[i]); + assertEquals("value at row " + rowsRead, EXPECTED_VALUES[rowsRead], valueCol.toString(i)); + rowsRead++; + } + } + assertEquals(ROW_COUNT, rowsRead); + } finally { + reader.close(); + } + } + + private static void writeOrcFile(Path path, HiveConf conf) throws IOException { + try (Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).setSchema(ORC_SCHEMA))) { + VectorizedRowBatch batch = ORC_SCHEMA.createRowBatch(); + LongColumnVector idCol = (LongColumnVector) batch.cols[0]; + BytesColumnVector valueCol = (BytesColumnVector) batch.cols[1]; + batch.size = ROW_COUNT; + for (int i = 0; i < ROW_COUNT; i++) { + idCol.vector[i] = EXPECTED_IDS[i]; + valueCol.setVal(i, EXPECTED_VALUES[i].getBytes(StandardCharsets.UTF_8)); + } + writer.addRowBatch(batch); + } + } + + private static JobConf buildJobConf(Path orcPath) { + JobConf job = new JobConf(daemonConf); + HiveConf.setBoolVar(job, ConfVars.HIVE_VECTORIZATION_ENABLED, true); + HiveConf.setVar(job, ConfVars.PLAN, "//tmp"); + job.set(IOConstants.COLUMNS, "id,value"); + job.set(IOConstants.COLUMNS_TYPES, "bigint,string"); + job.set(ColumnProjectionUtils.ORC_SCHEMA_STRING, ORC_SCHEMA.toString()); + + Properties tblProps = new Properties(); + tblProps.setProperty(META_TABLE_NAME, "default.test_orc"); + TableDesc tableDesc = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps); + + MapWork mapWork = new MapWork(); + mapWork.setVectorMode(true); + mapWork.setVectorizedRowBatchCtx(new VectorizedRowBatchCtx( + new String[] {"id", "value"}, + new TypeInfo[] {TypeInfoFactory.longTypeInfo, TypeInfoFactory.stringTypeInfo}, + null, null, 0, 0, null, new String[0], null)); + PartitionDesc partitionDesc = new PartitionDesc(); + partitionDesc.setTableDesc(tableDesc); + mapWork.addPathToPartitionInfo(orcPath.getParent(), partitionDesc); + Utilities.setMapWork(job, mapWork); + return job; + } +} diff --git a/packaging/src/kubernetes/README.md b/packaging/src/kubernetes/README.md index 1fc11623240c..7a9aac5aa121 100644 --- a/packaging/src/kubernetes/README.md +++ b/packaging/src/kubernetes/README.md @@ -58,8 +58,8 @@ mvn clean package -pl packaging/src/kubernetes -Pkubernetes -DskipTests ## Quick Start (Helm) The Helm chart defaults to a **Full-HA** cluster (Metastore x2, HiveServer2 x2, -LLAP x2, TezAM x2). You only need to provide three things: database, ZooKeeper, -and storage. +LLAP x2, TezAM x2 — one TezAM per LLAP cluster). You only need to provide three +things: database, ZooKeeper, and storage. ### Prerequisites @@ -353,7 +353,7 @@ helm install hive ./helm/hive-operator \ --set 'cluster.storage.envVars[2].value=ozone' \ --set cluster.metastore.replicas=1 \ --set cluster.hiveServer2.replicas=1 \ - --set cluster.llap.enabled=false \ + --set cluster.llapClusters=[] \ --set cluster.tezAm.enabled=false ``` @@ -392,8 +392,7 @@ cluster: replicas: 1 hiveServer2: replicas: 1 - llap: - enabled: false + llapClusters: [] tezAm: enabled: false ``` @@ -477,7 +476,8 @@ cluster: requestsMemory: "2Gi" limitsMemory: "4Gi" - llap: + llapClusters: + - name: llap0 enabled: true replicas: 3 executors: 2 @@ -487,7 +487,7 @@ cluster: limitsMemory: "6Gi" tezAm: - replicas: 3 + enabled: true scratchStorageSize: "5Gi" ``` @@ -497,6 +497,210 @@ helm install hive ./helm/hive-operator -f values.yaml --- +## Multi-Tenant LLAP + +Multi-tenant LLAP allows you to run multiple independent LLAP clusters within a single +HiveCluster, each with its own resource pool, autoscaling policy, and TezAM instance. +HS2 routes sessions to clusters server-side based on admin-defined user/group rules. + +### How It Works + +``` + Multi-Tenant LLAP Architecture + + beeline -n alice + | + v + HiveServer2 (resolves user→cluster via routing rules) + | + +-- alice (user:alice=llap0) --> TezAM-llap0 --> LLAP daemon llap0-0, llap0-1, ... + +-- bob (user:bob=llap1) --> TezAM-llap1 --> LLAP daemon llap1-0, llap1-1, ... + +-- carol (default=llap2) --> TezAM-llap2 --> LLAP daemon llap2-0, llap2-1, ... +``` + +Each LLAP cluster is fully isolated: +- **Separate LLAP daemon StatefulSet** with independent executor count, memory, and replicas +- **Separate TezAM StatefulSet** (one per LLAP cluster) with its own ZooKeeper registration +- **Separate autoscaling** — each cluster scales independently based on its own metrics +- **Shared scratch PVC** (ReadWriteMany) for HS2 ↔ TezAM coordination files + +### Configuration + +**Values file:** + +```yaml +# values-multi-tenant.yaml +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" + + hiveServer2: + replicas: 2 + + tezAm: + enabled: true + scratchStorageSize: "2Gi" + + # Server-side routing: map users/groups to LLAP clusters + llapClusterRouting: "user:alice=production,group:eng=analytics,default=dev" + + # Three LLAP clusters: production (large), analytics (medium), dev (small) + llapClusters: + - name: production + enabled: true + replicas: 6 + executors: 4 + memoryMb: 8192 + resources: + requestsMemory: "8Gi" + limitsMemory: "10Gi" + autoscaling: + enabled: true + minReplicas: 2 + scaleUpThreshold: 2 + + - name: analytics + enabled: true + replicas: 4 + executors: 2 + memoryMb: 4096 + resources: + requestsMemory: "4Gi" + limitsMemory: "6Gi" + autoscaling: + enabled: true + minReplicas: 0 # scales to zero when idle + + - name: dev + enabled: true + replicas: 2 + executors: 1 + memoryMb: 1024 + resources: + requestsMemory: "1Gi" + limitsMemory: "2Gi" + autoscaling: + enabled: true + minReplicas: 0 # scales to zero when idle +``` + +```bash +helm install hive ./helm/hive-operator -f values-multi-tenant.yaml +``` + +### Resulting Kubernetes Resources + +For the above configuration, the operator creates: + +| Resource | Name | Purpose | +|----------|------|---------| +| StatefulSet | `hive-production` | LLAP daemons for production cluster | +| StatefulSet | `hive-tezam-production` | TezAM for production cluster | +| StatefulSet | `hive-analytics` | LLAP daemons for analytics cluster | +| StatefulSet | `hive-tezam-analytics` | TezAM for analytics cluster | +| StatefulSet | `hive-dev` | LLAP daemons for dev cluster | +| StatefulSet | `hive-tezam-dev` | TezAM for dev cluster | +| Service (headless) | `hive-production`, `hive-analytics`, `hive-dev` | LLAP daemon discovery | +| Service (headless) | `hive-tezam-production`, `hive-tezam-analytics`, `hive-tezam-dev` | TezAM discovery | +| ConfigMap | `hive-production-config`, etc. | `llap-daemon-site.xml` per cluster | +| ConfigMap | `hive-tezam-production-config`, etc. | `tez-site.xml` per cluster | +| PVC | `hive-scratch` | Shared scratch (ReadWriteMany) for HS2 ↔ TezAM | + +### Routing Queries to a Specific LLAP Cluster + +Routing is server-side — administrators define rules in the CR that map users/groups to +LLAP clusters. HS2 resolves the target cluster at session open and sets all required +properties automatically. Clients do not need any cluster-specific configuration. + +```yaml +spec: + llapClusterRouting: "user:alice=production,group:eng=analytics,default=dev" + llapClusters: + - name: production + - name: analytics + - name: dev +``` + +Clients just connect with their identity: + +```bash +beeline -u "jdbc:hive2://hive-hiveserver2:10001/;transportMode=http;httpPath=cliservice" -n alice +``` + +HS2 resolves `alice → production` and sets the following on the session: +- `hive.server2.tez.external.sessions.namespace = /tez-external-sessions/production` +- `tez.am.registry.namespace = /production` +- `hive.llap.daemon.service.hosts = @production` + +Priority: user match > group match > default. + +The operator auto-generates per-cluster definitions from the LLAP spec names: +- `hive.llap.cluster..sessions.namespace = /tez-external-sessions/` +- `hive.llap.cluster..registry.namespace = /` +- `hive.llap.cluster..service.hosts = @` + +### ZooKeeper Registration + +Each LLAP cluster registers independently in ZooKeeper: + +| Cluster | LLAP daemons register at | TezAM registers session at | +|---------|--------------------------|----------------------------| +| `production` | `@production` (ZK service record) | `/tez-external-sessions/production/` | +| `analytics` | `@analytics` (ZK service record) | `/tez-external-sessions/analytics/` | +| `dev` | `@dev` (ZK service record) | `/tez-external-sessions/dev/` | + +HS2 discovers available TezAM sessions via `hive.server2.tez.external.sessions.namespace` and +uses `tez.am.registry.namespace` for client cache isolation. + +### Per-Cluster Autoscaling Isolation + +When autoscaling is enabled, metrics are fully isolated per LLAP cluster: + +- **LLAP executor metrics**: The operator selects pods by label `hive.apache.org/llap-cluster=`. + Only that cluster's pods are scraped and included in the scaling formula. +- **HS2 activation gate**: The operator reads `hs2_llap_target_sessions_` from HS2 pods. + Each cluster only wakes when sessions specifically target it. +- **TezAM scaling**: Each TezAM scales based on session demand for its paired LLAP cluster. + +This means scaling up `production` never affects `analytics` or `dev` replicas. + +### Adding/Removing LLAP Clusters + +To add a new cluster, append to `llapClusters[]` and run `helm upgrade`: + +```bash +helm upgrade hive ./helm/hive-operator -f values-multi-tenant.yaml +``` + +To remove a cluster, delete the entry from `llapClusters[]` and upgrade. The operator +automatically garbage-collects the removed cluster's StatefulSet, Service, ConfigMap, +and PDB via label-based discovery. + +--- + ## Verify ```bash @@ -505,19 +709,609 @@ kubectl get hiveclusters kubectl describe hivecluster hive ``` +--- + +## Autoscaling + +The operator supports metric-based autoscaling for all four Hive components using +an **operator-driven control loop** that scrapes JMX Exporter metrics directly from +pods. No Prometheus server or external autoscaling tools are needed. Autoscaling is +opt-in per component and designed for **zero query failures** during scale-down. + +### Prerequisites + +- No external dependencies — the operator handles all scaling decisions internally + +### How It Works + +When `autoscaling.enabled: true` is set for a component, the operator: +1. Attaches the JMX Exporter javaagent (port 9404) to each pod +2. Polls `/metrics` on each pod at `metricsScrapeIntervalSeconds` intervals +3. Computes desired replicas using component-specific formulas +4. Applies HPA-like stabilization windows (scale-up/scale-down) +5. Patches the workload `spec.replicas` directly + +### Graceful Scale-Down Architecture + +``` + Scale Down Flow + 1. Operator reduces desired replicas (metric below threshold, + stabilization window elapsed) + 2. PodDisruptionBudget ensures minAvailable=1 (at least one pod + always running) + 3. Kubernetes sends SIGTERM to selected pod + 4. preStop hook runs: + - HS2: deregisters from ZK, drains open sessions, kills JVM + - HMS: kills JVM (stateless HTTP — no drain needed) + - LLAP: waits until all executors become idle, kills JVM + - TezAM: no drain (DAGAppMaster does not expose JMX metrics) + 5. terminationGracePeriodSeconds = gracePeriodSeconds (safety cap) + 6. Pod terminates immediately once drain completes (does NOT wait + the full grace period — it's only the upper safety bound) +``` + +> **Note:** Shell entrypoints (PID 1) in containers don't forward SIGTERM to child +> processes. The preStop hook explicitly sends SIGTERM to the Hive/Tez Java process +> after drain completes, ensuring prompt shutdown without waiting for the grace period +> to expire. + +### Scaling Timers + +The autoscaling system uses three independent timing controls: + +| Timer | Config Field | Default | Purpose | +|-------|-------------|---------|---------| +| **Metrics scrape interval** | `metricsScrapeIntervalSeconds` | `10` | How often the operator scrapes JMX Exporter `/metrics` on each pod. This is the **biggest bottleneck** for autoscaling reaction time. | +| **Scale-up stabilization** | `scaleUpStabilizationSeconds` | `60` | Window: picks the highest recommendation within this period before scaling up. Prevents flapping when metrics oscillate. Set to `0` for LLAP and TezAM (reactive dependents). | +| **Scale-down stabilization** | `scaleDownStabilizationSeconds` | `300-900` | Window: picks the most conservative (highest) recommendation within this period before scaling down. Also acts as the cooldown between consecutive scale-downs — no separate cooldown needed. | + +**How they interact:** +- Load spike detected → operator scrapes metrics within `metricsScrapeIntervalSeconds` → waits `scaleUpStabilizationSeconds` then scales up +- Load drops → operator waits `scaleDownStabilizationSeconds` (stabilization window must confirm low demand consistently) then scales down + +**Tuning reaction time:** With defaults (`metricsScrapeIntervalSeconds: 10`, `scaleUpStabilizationSeconds: 0` for LLAP/TezAM), scale-up latency is ~10-20s (one scrape cycle). For HS2 with `scaleUpStabilizationSeconds: 60`, expect ~70s. + +### Per-Component Scaling Logic + +| Component | Scale-Up Formula | Scale-Down | JMX Metric | +|-----------|-----------------|------------|------------| +| **HiveServer2** | `max(ceil(sessions / threshold), cpu_desired)` | Sessions drop to 0 AND CPU below threshold → scale to minReplicas | `hs2_open_sessions`, `jvm_process_cpu_load` | +| **Metastore** | `max(ceil(api_rate / threshold), cpu_desired)` | Rate drops to 0 AND CPU below threshold → scale to minReplicas | `api_*_total`, `jvm_process_cpu_load` | +| **LLAP** | `ceil(avg(queued + configured - available) / scaleUpThreshold)` | All executors idle + no HS2 sessions | `hadoop_llapdaemon_executor*` | +| **Tez AM** | `max(sum(hs2_open_sessions), count(HS2_pods) * sessions_per_queue)` | All HS2 sessions closed | `hs2_open_sessions` (from HS2 pods) | + +**TezAM Scaling Model:** TezAM uses demand-driven scaling with two formulas (max wins): +1. **Session demand** — `sum(hs2_open_sessions)`: scales to match the total number of + concurrent sessions across all HS2 pods (each session needs its own exclusive TezAM). +2. **Pre-warm** — `count(HS2 pods with sessions) × hive.server2.tez.sessions.per.default.queue` (default 1): + ensures every active HS2 pod has enough TezAM sessions pre-claimed from ZooKeeper. + +The operator takes the maximum across both formulas. This ensures TezAM capacity +is always sufficient for both current demand and eager session pre-warming. +TezAM scaling is purely demand-driven from HS2 metrics. + +### Scale-to-Zero Architecture + +When `minReplicas: 0` is configured (LLAP, TezAM), the cluster scales those +components down to zero pods when HS2 has no active sessions. HS2 itself always +maintains at least 1 replica (`minReplicas >= 1`) so it is always available to +accept connections. + +``` + Scale-to-Zero (Idle Detection) + + 1. HS2 reports hs2_open_sessions = 0 for scaleDownStabilization + → operator scales HS2 to minReplicas (>= 1) + + 2. Operator sees hs2_open_sessions = 0 on next LLAP/TezAM eval + → activation gate fails + → scale LLAP and TezAM to 0 (if minReplicas=0) + + 3. HMS stays at minReplicas=1 (always available) + +``` + +``` + Wake-from-Zero (LLAP/TezAM) + + 1. Beeline connects to HS2 (always running, at least 1 pod) + + 2. HS2 reports hs2_open_sessions > 0 via JMX Exporter + + 3. Operator detects HS2 sessions on next scrape cycle: + - LLAP activation gate passes → scales up from 0 + - TezAM activation gate passes → scales up from 0 + + 4. Query executes once LLAP/TezAM pods are ready + +``` + +**Session protection:** The HS2 Service uses `sessionAffinity: ClientIP` to ensure +beeline clients always reach the same pod. The preStop hook deregisters the pod from +ZooKeeper (preventing new sessions) and waits for `hs2_open_sessions` to drain to 0 +before terminating. The `gracePeriodSeconds` (default 3600s) is a safety cap — the pod +terminates immediately once sessions drain, not after the full grace period. + +**Component-specific behavior:** + +| Component | minReplicas | Scale-to-Zero Trigger | Wake Trigger | +|-----------|-------------|----------------------|--------------| +| **HS2** | 1 | N/A (always running) | N/A | +| **HMS** | 1 | Never (always running) | N/A | +| **LLAP** | 0 | No HS2 sessions targeting this cluster | HS2 has sessions targeting this cluster (`hs2_llap_target_sessions_{name}`) | +| **TezAM** | 0 | No HS2 sessions (activation gate fails) | HS2 has open sessions (next scrape) | + +**Per-cluster LLAP wake:** When multiple LLAP clusters are configured (e.g., `llap0`, `llap1`), +each cluster wakes independently based on the `hs2_llap_target_sessions_{name}` metric. +If HS2 does not expose per-target metrics (older builds), the operator falls back to the generic +`hs2_open_sessions` metric (which wakes all LLAP clusters on any session). + +### Auto-Suspend (Full Cluster Hibernation) + +Auto-suspend goes beyond scale-to-zero — it fully hibernates the **entire** cluster +(including HS2 and HMS) to 0 replicas after a configurable idle timeout. This is +useful for dev/test clusters that should not consume resources when nobody is using +them. + +**Prerequisites:** Auto-suspend requires autoscaling to be enabled on ALL active +components (HS2, LLAP if enabled, TezAM if enabled, and HMS if `includeMetastore=true`). +The operator will not auto-suspend unless it can confirm all components are at their +minimum state. + +**Idle criteria (all must hold simultaneously for `idleTimeoutMinutes`):** + +| Component | Idle Condition | +|-----------|---------------| +| **HS2** | At `minReplicas` with 0 open sessions | +| **HMS** | At `minReplicas` (only checked if `includeMetastore=true`) | +| **LLAP** | At `minReplicas` (default 0) | +| **TezAM** | At `minReplicas` (default 0) | + +**Important:** HS2 can **only** scale to 0 replicas via auto-suspend. Normal +autoscaling always maintains `minReplicas >= 1` for HS2. Auto-suspend is the +only mechanism that overrides this to achieve full hibernation. + +``` + Auto-Suspend Flow + + 1. Autoscaling scales all components to their minReplicas + (HS2≥1, HMS≥1, LLAP/TezAM to configured min) + + 2. Operator detects idle state: + - HS2 has 0 open sessions + - HMS at minReplicas (if includeMetastore=true) + - LLAP/TezAM at minReplicas + + 3. Idle timer starts (status: clusterPhase=Idle, idleSince=) + + 4. After idleTimeoutMinutes (default 15): + - ALL components scaled to 0 (HMS excluded if includeMetastore=false) + - spec.suspend set to true (cluster stays suspended until user wakes it) + - Status: clusterPhase=Suspended, suspendedSince= + + 5. To wake: kubectl patch hivecluster hive --type=merge -p '{"spec":{"suspend":false}}' + All components restored to minReplicas + (HS2/HMS ≥1, LLAP/TezAM ≥1 for immediate usability) + +``` + +**Configuration:** + +```yaml +cluster: + autoSuspend: + enabled: true + idleTimeoutMinutes: 15 # minutes idle before full hibernation + includeMetastore: true # set false to keep HMS running during suspend +``` + +**Manual Suspend/Wake Commands:** + +```bash +# Suspend immediately (bypasses idle timer) +kubectl patch hivecluster hive --type=merge -p '{"spec":{"suspend":true}}' + +# Wake cluster (restores to minReplicas) +kubectl patch hivecluster hive --type=merge -p '{"spec":{"suspend":false}}' +``` + +Manual suspend works regardless of whether `autoSuspend.enabled` is true — it +immediately scales all components to 0 without waiting for the idle timeout. +When `includeMetastore: false`, HMS stays running even during manual suspend. + +**Observing cluster state:** + +```bash +# Quick view — printer columns show phase and idle time +kubectl get hivecluster +``` +``` +NAME PHASE IDLE (MIN) AGE +hive Idle 12 2h +``` + +```bash +# After suspend triggers +kubectl get hivecluster +``` +``` +NAME PHASE IDLE (MIN) AGE +hive Suspended 2h +``` + +```bash +# Full status (kubectl get hivecluster hive -o yaml) +``` +```yaml +status: + clusterPhase: Suspended + idleSince: "2026-06-08T10:00:00Z" + idleForMinutes: 15 + suspendedSince: "2026-06-08T10:15:00Z" + conditions: + - type: Suspended + status: "True" + reason: AutoSuspend # or ManualSuspend + message: "Cluster suspended after idle timeout" + lastTransitionTime: "2026-06-08T10:15:00Z" +``` + +When the cluster is running normally: +``` +NAME PHASE IDLE (MIN) AGE +hive Running 2h +``` + +**Full example (autoscaling + auto-suspend):** + +```yaml +cluster: + autoSuspend: + enabled: true + idleTimeoutMinutes: 15 + includeMetastore: false # keep HMS running during suspend + + hiveServer2: + replicas: 10 + autoscaling: + enabled: true + minReplicas: 1 + + metastore: + replicas: 6 + autoscaling: + enabled: true + minReplicas: 1 + + llapClusters: + - name: llap0 + replicas: 8 + autoscaling: + enabled: true + minReplicas: 0 # scales to 0 via normal autoscaling when no sessions target this cluster + + tezAm: + replicas: 10 + autoscaling: + enabled: true + minReplicas: 0 # scales to 0 via normal autoscaling when HS2 idle +``` + +With this configuration, the cluster lifecycle is: +1. Under load → all components scaled up by autoscaler +2. Load drops → autoscaler scales to minReplicas (HS2=1, HMS=1, LLAP clusters=0, TezAM=0) +3. HS2 idle (0 sessions) for 15 minutes → auto-suspend kicks in → HS2, LLAP, TezAM to 0 (HMS stays at minReplicas) +4. `kubectl patch hivecluster hive --type=merge -p '{"spec":{"suspend":false}}'` → wake → HS2=1, each LLAP cluster=1, TezAM=1 +5. User connects → autoscaler detects sessions → scales up as needed + +### CPU-Based Scaling (HS2 and HMS) + +In addition to the primary metrics (sessions for HS2, API request rate for HMS), +the operator supports a secondary **CPU-based scaling signal** for HiveServer2 and +Metastore. The final desired replica count is: + +``` +final_desired = max(metric_desired, cpu_desired) +``` + +Either signal can trigger scale-up; neither can force scale-down below what the +other recommends. CPU-based scaling uses the same stabilization windows as metric-based +scaling (no separate CPU stabilization). + +**How it works:** + +1. The operator scrapes `ProcessCpuLoad` from `java.lang:type=OperatingSystem` via JMX + Exporter (exported as `jvm_process_cpu_load`, a 0.0–1.0 fraction) +2. Averages across all pods, converts to percentage (0–100) +3. If avg CPU >= `cpuScaleUpThreshold`: scales up proportionally + (`ceil(avgCpu * currentReplicas / cpuScaleUpThreshold)`) +4. If avg CPU < `cpuScaleDownThreshold`: scales down + (`ceil(avgCpu * currentReplicas / cpuScaleUpThreshold)`, floored at `minReplicas`) +5. Between thresholds: holds current replica count + +**Configuration:** + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster..autoscaling.cpuScaleUpThreshold` | `90` | CPU percentage (0-100) that triggers scale-up. Set to `0` to disable CPU-based scaling. | +| `cluster..autoscaling.cpuScaleDownThreshold` | `30` | CPU percentage (0-100) below which scale-down is considered. | + +**Example:** + +```yaml +cluster: + hiveServer2: + replicas: 10 + resources: + limitsCpu: "2" # Recommended: set CPU limits so ProcessCpuLoad is relative to pod allocation + autoscaling: + enabled: true + cpuScaleUpThreshold: 90 + cpuScaleDownThreshold: 30 + + metastore: + replicas: 6 + resources: + limitsCpu: "2" + autoscaling: + enabled: true + cpuScaleUpThreshold: 90 + cpuScaleDownThreshold: 30 +``` + +**Important: CPU limits and metric accuracy** + +`ProcessCpuLoad` reports CPU usage as a fraction of **available processors**. Without +CPU limits, the JVM sees all node cores (e.g., 8 cores), so even heavy single-pod +load only shows ~12.5%. With `limitsCpu: "2"`, the JVM sees 2 processors and the +metric becomes "% of allocated CPU" — making thresholds meaningful. + +| Pod CPU Limit | JVM sees | 90% threshold means | +|---------------|----------|---------------------| +| None (no limit) | All node cores (e.g., 8) | Using 7.2 of 8 cores — very hard to reach | +| `2` | 2 cores | Using 1.8 of 2 allocated cores | +| `4` | 4 cores | Using 3.6 of 4 allocated cores | + +**Recommendation:** Always set `resources.limitsCpu` when using CPU-based autoscaling. + +**Status output:** + +The operator reports CPU metrics in the HiveCluster status: + +```yaml +status: + hiveServer2: + autoscaling: + currentMetricValue: 5 # total sessions + scaleUpThreshold: 100 + currentCpuPercent: 72.45 # avg ProcessCpuLoad * 100 + cpuScaleUpThreshold: 90 + cpuProposedReplicas: 2 # what CPU alone would recommend + proposedReplicas: 2 + lastScaleTime: "2026-05-31T04:23:07Z" +``` + +**Applicability:** CPU-based scaling only applies to HS2 and HMS. LLAP and TezAM +do not use CPU as a scaling signal (LLAP scales on busy executor slots which already +correlates with CPU; TezAM is demand-based from HS2 session count). + +--- + +### Enabling Autoscaling + +**CLI (with Ozone storage backend):** + +Each component has sensible per-component defaults (see [Configuration Reference](#configuration-reference)). +Only `enabled=true` is needed to turn on autoscaling: + +```bash +helm install hive ./helm/hive-operator \ + --set cluster.database.type=postgres \ + --set cluster.database.url="jdbc:postgresql://postgres-postgresql:5432/metastore" \ + --set cluster.database.driver="org.postgresql.Driver" \ + --set cluster.database.username=hive \ + --set cluster.database.passwordSecretRef.name=hive-db-secret \ + --set cluster.database.passwordSecretRef.key=password \ + --set cluster.database.driverJarUrl="https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" \ + --set cluster.zookeeper.quorum="zookeeper:2181" \ + --set cluster.storage.coreSiteOverrides."fs\.defaultFS"="s3a://hive" \ + --set cluster.storage.coreSiteOverrides."fs\.s3a\.endpoint"="http://ozone-s3g-rest:9878" \ + --set-string cluster.storage.coreSiteOverrides."fs\.s3a\.path\.style\.access"=true \ + --set 'cluster.storage.envVars[0].name=HADOOP_OPTIONAL_TOOLS' \ + --set 'cluster.storage.envVars[0].value=hadoop-aws' \ + --set 'cluster.storage.envVars[1].name=AWS_ACCESS_KEY_ID' \ + --set 'cluster.storage.envVars[1].value=ozone' \ + --set 'cluster.storage.envVars[2].name=AWS_SECRET_ACCESS_KEY' \ + --set 'cluster.storage.envVars[2].value=ozone' \ + --set cluster.hiveServer2.autoscaling.enabled=true \ + --set cluster.metastore.autoscaling.enabled=true \ + --set 'cluster.llapClusters[0].autoscaling.enabled=true' \ + --set cluster.tezAm.autoscaling.enabled=true +``` + +**Values file (for customizing beyond defaults):** + +```yaml +# values-autoscaling.yaml — only override what you need +cluster: + database: + type: postgres + url: "jdbc:postgresql://postgres-postgresql:5432/metastore" + driver: "org.postgresql.Driver" + username: hive + passwordSecretRef: + name: hive-db-secret + key: password + driverJarUrl: "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.5/postgresql-42.7.5.jar" + + zookeeper: + quorum: "zookeeper:2181" + + storage: + coreSiteOverrides: + fs.defaultFS: "s3a://hive" + fs.s3a.endpoint: "http://ozone-s3g-rest:9878" + fs.s3a.path.style.access: "true" + envVars: + - name: HADOOP_OPTIONAL_TOOLS + value: "hadoop-aws" + - name: AWS_ACCESS_KEY_ID + value: "ozone" + - name: AWS_SECRET_ACCESS_KEY + value: "ozone" + + hiveServer2: + replicas: 10 # Acts as maxReplicas when autoscaling is enabled + autoscaling: + enabled: true + # minReplicas: 1 # default — always keep at least 1 HS2 running + # scaleUpThreshold: 80 # default — avg open sessions per pod triggering scale-up + # scaleUpStabilizationSeconds: 60 # default — scale-up window + # scaleDownStabilizationSeconds: 600 # default — scale-down window (also acts as cooldown) + # metricsScrapeIntervalSeconds: 10 # default — operator scrape interval (lower = faster reaction) + + metastore: + replicas: 6 # Acts as maxReplicas when autoscaling is enabled + autoscaling: + enabled: true + # minReplicas: 1 # default — always keep at least 1 metastore running + # scaleUpThreshold: 75 # default — API request rate (req/s) triggering scale-up + # scaleUpStabilizationSeconds: 60 # default — scale-up window + # scaleDownStabilizationSeconds: 300 # default — scale-down window (also acts as cooldown) + # gracePeriodSeconds: 60 # default — fast drain (HMS is stateless) + # metricsScrapeIntervalSeconds: 10 # default — operator scrape interval + + llapClusters: + - name: llap0 + replicas: 8 # Acts as maxReplicas when autoscaling is enabled + autoscaling: + enabled: true + # minReplicas: 0 # default — scale to zero when no sessions target this cluster + # scaleUpThreshold: 1 # default — total busy slots (queued+running) triggering scale-up + # scaleUpStabilizationSeconds: 60 # default — scale-up window + # scaleDownStabilizationSeconds: 900 # default — scale-down window (long — scaling down destroys cache) + # gracePeriodSeconds: 600 # default — 10 min drain for in-flight fragments + # metricsScrapeIntervalSeconds: 10 # default — operator scrape interval (lower = faster reaction) + + tezAm: + replicas: 10 # Acts as maxReplicas when autoscaling is enabled + autoscaling: + enabled: true + # minReplicas: 0 # default — scale to zero when no HS2 sessions + # scaleUpThreshold: 1 # default — threshold for demand metric (1 = match HS2 pod count) + # scaleUpStabilizationSeconds: 60 # default — HPA scale-up window + # scaleDownStabilizationSeconds: 300 # default — HPA scale-down window + # gracePeriodSeconds: 120 # default — 2 min drain for DAG completion + # metricsScrapeIntervalSeconds: 10 # default — operator scrape interval (lower = faster reaction) +``` + +```bash +helm install hive ./helm/hive-operator -f values-autoscaling.yaml +``` + +When autoscaling is enabled, the operator automatically: +- Deploys the JMX Exporter javaagent (port 9404, `/metrics`) +- Enables `hive.server2.metrics.enabled` / `metastore.metrics.enabled` (JMX reporter) +- Attaches JMX Exporter javaagent (port 9404, `/metrics`) to each pod +- Creates PodDisruptionBudgets (minAvailable: 1) +- Configures preStop lifecycle hooks for graceful drain +- Sets `terminationGracePeriodSeconds` to the configured grace period +- LLAP/TezAM use HS2 metrics as activation gate (only scale when HS2 has sessions) + +**JMX Metrics Scraped by Operator (per component):** + +| Component | Key Metrics | Purpose | +|-----------|---------|---------| +| **HiveServer2** | `hs2_open_sessions`, `jvm_process_cpu_load` | Session count for primary scaling + CPU for secondary scaling signal | +| **Metastore** | `api_*_total`, `jvm_process_cpu_load` | API call counters (operator computes request rate from deltas) + CPU for secondary scaling signal | +| **LLAP** | `hadoop_llapdaemon_executornumqueuedrequests`, `hadoop_llapdaemon_executornumexecutorsconfigured`, `hadoop_llapdaemon_executornumexecutorsavailable` | Total busy slots = queued + configured - available | +| **Tez AM** | N/A (scales on HS2 metrics) | TezAM scaling is demand-driven from `hs2_open_sessions` — no TezAM-specific metrics needed | + +### Enabling Autoscaling — Example + +To enable autoscaling for HS2 and Metastore: + +```yaml +cluster: + hiveServer2: + replicas: 4 # max replicas ceiling + autoscaling: + enabled: true + scaleUpThreshold: 1 # scale up when total sessions > 1 + minReplicas: 1 # always keep at least 1 HS2 pod running + + metastore: + replicas: 3 # max replicas ceiling + autoscaling: + enabled: true + minReplicas: 1 # always keep at least 1 running + scaleUpThreshold: 75 # API requests/sec threshold +``` + +> **Note:** LLAP scales on total busy slots (queued + running executors). +> TezAM scales on demand — the number of active HS2 pods multiplied by +> `hive.server2.tez.sessions.per.default.queue` (default 1). + +### Helm Values Reference (Autoscaling) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster..replicas` | `1-2` | Static replica count, or max replicas ceiling when autoscaling is enabled | +| `cluster..autoscaling.enabled` | `false` | Enable operator-driven autoscaling | +| `cluster..autoscaling.minReplicas` | `1` (HS2/HMS), `0` (LLAP/TezAM) | Minimum replica count. Set to 0 for scale-to-zero (LLAP, TezAM only; HS2 minimum is 1) | +| `cluster..autoscaling.scaleUpThreshold` | varies | Metric threshold triggering scale-up | +| `cluster..autoscaling.scaleUpStabilizationSeconds` | `60` | Stabilization window for scale-up (picks highest recommendation in window) | +| `cluster..autoscaling.scaleDownStabilizationSeconds` | `300-900` | Stabilization window for scale-down (picks most conservative recommendation in window). Also acts as cooldown between consecutive scale-downs. | +| `cluster..autoscaling.gracePeriodSeconds` | `3600` | Safety cap: max drain time before forced termination. Pod exits immediately once drain completes. | +| `cluster..autoscaling.metricsScrapeIntervalSeconds` | `10` | How often the operator scrapes JMX metrics from pods. Lower = faster reaction. | +| `cluster..autoscaling.cpuScaleUpThreshold` | `90` | CPU percentage (0-100) triggering scale-up. Only HS2/HMS. Set to 0 to disable. | +| `cluster..autoscaling.cpuScaleDownThreshold` | `30` | CPU percentage (0-100) below which scale-down is considered. Only HS2/HMS. | + +--- + ## Connect to HiveServer2 +HiveServer2 runs in **HTTP transport mode** by default (recommended for Kubernetes +environments as it works well with load balancers, ingress controllers, and proxies). + +### Standard Connection (minReplicas >= 1) + +When HS2 always has at least one pod running, connect directly to the service: + ```bash -kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10000/" +kubectl exec -it deployment/hive-hiveserver2 -- beeline -u "jdbc:hive2://hive-hiveserver2:10001/;transportMode=http;httpPath=cliservice" ``` Or via port-forward: ```bash -kubectl port-forward svc/hive-hiveserver2 10000:10000 -beeline -u "jdbc:hive2://localhost:10000/" +kubectl port-forward svc/hive-hiveserver2 10001:10001 +beeline -u "jdbc:hive2://localhost:10001/;transportMode=http;httpPath=cliservice" ``` +### LLAP/TezAM Scale-to-Zero Behavior + +When LLAP and TezAM are configured with `minReplicas: 0` (the default), they start +with zero pods on fresh install. The operator automatically scales them up when HS2 +reports open sessions, and scales them back to zero when HS2 is idle. + +Since HS2 always runs at least 1 pod (`minReplicas >= 1`), no special connection +setup is needed — simply connect to HS2 and the operator wakes LLAP/TezAM as needed. + +> **Note:** The operator sets `hive.server2.transport.mode=http`, +> `hive.server2.thrift.http.port=10001`, and +> `hive.server2.thrift.http.path=cliservice` by default. The binary Thrift +> port (10000) is still exposed for backward compatibility but HTTP mode +> is the primary transport. To override, use `configOverrides` in the +> HiveServer2 spec. + +> **Metastore HTTP Mode:** The operator configures HMS in HTTP transport mode +> (`metastore.server.thrift.transport.mode=http`) and sets the matching client +> config (`hive.metastore.client.thrift.transport.mode=http`) on HS2 and TezAM. +> HTTP mode makes Metastore connections stateless — each RPC is an independent +> HTTP request, so Metastore pods can scale down safely without breaking active +> connections from HiveServer2. The port remains 9083 (same as binary mode). + --- ## Helm Values Reference @@ -593,33 +1387,75 @@ beeline -u "jdbc:hive2://localhost:10000/" | `cluster.hiveServer2.extraVolumes` | `[]` | Additional volumes for HS2 pods | | `cluster.hiveServer2.extraVolumeMounts` | `[]` | Additional volume mounts for HS2 containers | -### LLAP +### LLAP Clusters + +LLAP is configured as an array (`llapClusters`) to support multi-tenant deployments with +independent scaling. Each entry creates a separate LLAP StatefulSet, Service, ConfigMap, +and a paired TezAM StatefulSet (when `tezAm.enabled: true`). | Value | Default | Description | |-------|---------|-------------| -| `cluster.llap.enabled` | `true` | Enable LLAP daemons | -| `cluster.llap.replicas` | `2` | Replica count | -| `cluster.llap.executors` | `1` | Executors per daemon | -| `cluster.llap.memoryMb` | `1024` | Memory per daemon (MB) | -| `cluster.llap.serviceHosts` | `@llap0` | LLAP ZK identity | -| `cluster.llap.resources` | `{}` | CPU/memory | -| `cluster.llap.configOverrides` | `{}` | Extra LLAP config properties | -| `cluster.llap.extraVolumes` | `[]` | Additional volumes for LLAP pods | -| `cluster.llap.extraVolumeMounts` | `[]` | Additional volume mounts for LLAP containers | +| `cluster.llapClusters[].name` | *(required)* | Unique name for this LLAP cluster (e.g., `llap0`) | +| `cluster.llapClusters[].enabled` | `true` | Enable this LLAP cluster | +| `cluster.llapClusters[].replicas` | `2` | Replica count (maxReplicas when autoscaling enabled) | +| `cluster.llapClusters[].executors` | `1` | Executors per daemon | +| `cluster.llapClusters[].memoryMb` | `1024` | Memory per daemon (MB) | +| `cluster.llapClusters[].resources` | `{}` | CPU/memory | +| `cluster.llapClusters[].configOverrides` | `{}` | Extra LLAP config properties | +| `cluster.llapClusters[].extraVolumes` | `[]` | Additional volumes for LLAP pods | +| `cluster.llapClusters[].extraVolumeMounts` | `[]` | Additional volume mounts for LLAP containers | +| `cluster.llapClusters[].autoscaling.enabled` | `false` | Enable per-cluster autoscaling | +| `cluster.llapClusters[].autoscaling.minReplicas` | `0` | Min replicas (0 = scale to zero) | +| `cluster.llapClusters[].autoscaling.scaleUpThreshold` | `1` | Busy-slot threshold for scale-up | + +HS2 routes sessions to LLAP clusters server-side based on user/group identity: + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.llapClusterRouting` | `""` | Routing rules (e.g., `"user:alice=llap1,default=llap0"`) | + +The operator auto-generates per-cluster namespace definitions in hive-site.xml. +Clients connect with just their identity — no cluster-specific JDBC URL params needed. ### Tez AM +TezAM is deployed as one StatefulSet per LLAP cluster. The global `tezAm` section +controls shared settings (enabled flag, scratch PVC). Per-LLAP TezAM settings +(replicas, autoscaling) can be overridden in each `llapClusters[].tezAm` entry. + | Value | Default | Description | |-------|---------|-------------| -| `cluster.tezAm.enabled` | `true` | Enable Tez Application Master | -| `cluster.tezAm.replicas` | `2` | Replica count | -| `cluster.tezAm.scratchStorageSize` | `1Gi` | Shared scratch PVC size | -| `cluster.tezAm.scratchStorageClassName` | | StorageClass (must support RWX) | +| `cluster.tezAm.enabled` | `true` | Enable Tez Application Master (one per LLAP cluster) | +| `cluster.tezAm.replicas` | `2` | Default replica count per TezAM (overridable per LLAP cluster) | +| `cluster.tezAm.scratchStorageSize` | `1Gi` | Shared scratch PVC size (single PVC shared by all HS2 and TezAM pods) | +| `cluster.tezAm.scratchStorageClassName` | | StorageClass (must support ReadWriteMany) | | `cluster.tezAm.resources` | `{}` | CPU/memory | | `cluster.tezAm.configOverrides` | `{}` | Extra TezAM config properties | | `cluster.tezAm.extraVolumes` | `[]` | Additional volumes for TezAM pods | | `cluster.tezAm.extraVolumeMounts` | `[]` | Additional volume mounts for TezAM containers | +### Auto-Suspend + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster.autoSuspend.enabled` | `false` | Enable full cluster hibernation after idle timeout. Requires autoscaling enabled on all active components (HMS only if `includeMetastore=true`). | +| `cluster.autoSuspend.idleTimeoutMinutes` | `15` | Minutes of idle time (HS2=0 sessions, LLAP/TezAM at minReplicas) before the cluster suspends. | +| `cluster.autoSuspend.includeMetastore` | `true` | Whether HMS participates in auto-suspend. When false, HMS stays at minReplicas during suspend and HMS autoscaling is not required. | + +### Autoscaling (per component) + +| Value | Default | Description | +|-------|---------|-------------| +| `cluster..autoscaling.enabled` | `false` | Enable operator-driven autoscaling for this component | +| `cluster..autoscaling.minReplicas` | `0` | Floor replica count. 0 enables scale-to-zero (LLAP, TezAM only; HS2 minimum is 1) | +| `cluster..autoscaling.scaleUpThreshold` | `100` (HS2/HMS), `10` (LLAP) | Metric threshold per pod triggering scale-up (sessions for HS2, connections for HMS, busy slots for LLAP). TezAM scales 1:1 with demand (no threshold). | +| `cluster..autoscaling.scaleUpStabilizationSeconds` | `60` | Stabilization window for scale-up decisions (prevents flapping) | +| `cluster..autoscaling.scaleDownStabilizationSeconds` | `300-900` | Stabilization window for scale-down decisions (also acts as cooldown between consecutive scale-downs) | +| `cluster..autoscaling.gracePeriodSeconds` | `3600` | Safety cap (seconds) — pod terminates immediately once drain completes, this is only the upper bound | +| `cluster..autoscaling.metricsScrapeIntervalSeconds` | `10` | How often the operator polls JMX metrics from pods. Lower = faster reaction time. | +| `cluster..autoscaling.cpuScaleUpThreshold` | `90` | CPU percentage (0-100) triggering scale-up. Only HS2/HMS. Set to 0 to disable. | +| `cluster..autoscaling.cpuScaleDownThreshold` | `30` | CPU percentage (0-100) below which scale-down is considered. Only HS2/HMS. | + --- ## Upgrade and Uninstall @@ -659,11 +1495,13 @@ helm install hive ./helm/hive-operator -f my-values.yaml ### Remove Everything (including dependencies) ```bash -helm uninstall hive -kubectl delete crd hiveclusters.hive.apache.org -helm uninstall ozone postgres zookeeper --ignore-not-found -kubectl delete pvc data-zookeeper-0 --ignore-not-found -kubectl delete pvc data-postgres-postgresql-0 --ignore-not-found +kubectl delete hivecluster --all -A --wait=false --ignore-not-found +helm uninstall hive --ignore-not-found +kubectl delete crd hiveclusters.hive.apache.org --wait=false --ignore-not-found +helm uninstall ozone --ignore-not-found +helm uninstall postgres --ignore-not-found +helm uninstall zookeeper --ignore-not-found +kubectl delete pvc data-zookeeper-0 data-postgres-postgresql-0 --ignore-not-found kubectl delete secret hive-db-secret --ignore-not-found ``` @@ -738,20 +1576,32 @@ HiveCluster CR v HiveClusterReconciler | - +-- HadoopConfigMapDependent (core-site.xml) - +-- MetastoreConfigMapDependent (metastore-site.xml) - +-- HiveServer2ConfigMapDependent (hive-site.xml + tez-site.xml) - +-- SchemaInitJobDependent (schematool -initOrUpgradeSchema) - +-- MetastoreDeploymentDependent --> MetastoreServiceDependent - +-- HiveServer2DeploymentDependent --> HiveServer2ServiceDependent - +-- LlapStatefulSetDependent --> LlapServiceDependent (optional) - +-- ScratchPvcDependent (shared scratch PVC, optional) - +-- TezAmStatefulSetDependent --> TezAmServiceDependent (optional) + +-- [JOSDK Workflow Dependents] + | +-- HadoopConfigMapDependent (core-site.xml) + | +-- MetastoreConfigMapDependent (metastore-site.xml) + | +-- HiveServer2ConfigMapDependent (hive-site.xml + tez-site.xml) + | +-- SchemaInitJobDependent (schematool -initOrUpgradeSchema) + | +-- MetastoreDeploymentDependent --> MetastoreServiceDependent + | +-- HiveServer2DeploymentDependent --> HiveServer2ServiceDependent + | +-- ScratchPvcDependent (shared scratch PVC for HS2 ↔ TezAM) + | + +-- [Imperative] Per-LLAP-Cluster Resources (for each llapClusters[] entry): + +-- LLAP StatefulSet + headless Service + ConfigMap + PDB + +-- TezAM StatefulSet + headless Service + ConfigMap (one TezAM per LLAP cluster) ``` +LLAP clusters and their paired TezAM instances are managed imperatively by the reconciler +(not via JOSDK workflow dependents) because the number of clusters is dynamic — determined +at runtime from the CR spec. Each `llapClusters[]` entry produces: +- **LLAP**: StatefulSet (`{cluster}-{name}`), headless Service, ConfigMap (`llap-daemon-site.xml`), PDB +- **TezAM**: StatefulSet (`{cluster}-tezam-{name}`), headless Service, ConfigMap (`tez-site.xml`) + +All imperative resources are applied via `serverSideApply()`. Removed LLAP clusters (and +their TezAMs) are garbage-collected automatically using label-based discovery. + **Startup order:** 1. ConfigMaps (Hadoop, Metastore, HiveServer2) 2. Schema Init Job [if Metastore enabled] 3. Metastore Deployment + Service [if enabled] -4. HiveServer2 Deployment + Service -5. LLAP + TezAM [if enabled] +4. HiveServer2 Deployment + Service + Shared Scratch PVC +5. LLAP clusters + paired TezAM instances [if enabled] diff --git a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml index cc65852d4f35..22ca3c09458f 100644 --- a/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml +++ b/packaging/src/kubernetes/config/samples/hivecluster-full-ha.yaml @@ -44,12 +44,12 @@ spec: configOverrides: hive.server2.enable.doAs: "false" - llap: + llapClusters: + - name: llap0 enabled: true replicas: 2 executors: 1 memoryMb: 1024 - serviceHosts: "@llap0" resources: requestsMemory: "2Gi" limitsMemory: "3Gi" diff --git a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml index 99768633a128..81947c1c4910 100644 --- a/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml +++ b/packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml @@ -13,12 +13,47 @@ spec: singular: hivecluster scope: Namespaced versions: - - name: v1 + - additionalPrinterColumns: + - jsonPath: .status.clusterPhase + name: Phase + priority: 0 + type: string + - jsonPath: .status.idleForMinutes + name: Idle (min) + priority: 0 + type: integer + - jsonPath: .status.suspendedSince + name: Suspended Since + priority: 1 + type: string + name: v1 schema: openAPIV3Schema: properties: spec: properties: + autoSuspend: + description: "Auto-suspend configuration. When enabled and all components\ + \ are idle for the configured timeout, the cluster scales to 0 replicas." + properties: + enabled: + default: false + description: "Whether auto-suspend is enabled. Requires autoscaling\ + \ to be enabled on all active components (HS2, LLAP if enabled,\ + \ TezAM if enabled, and HMS if includeMetastore is true)." + type: boolean + idleTimeoutMinutes: + default: 15 + description: "Minutes of idle time (HS2=0 sessions, LLAP/TezAM\ + \ at minReplicas) before the cluster auto-suspends." + type: integer + includeMetastore: + default: true + description: "Whether Metastore participates in auto-suspend.\ + \ When false, HMS stays at minReplicas during suspend and HMS\ + \ autoscaling is not required for auto-suspend to activate." + type: boolean + type: object envVars: description: "Environment variables injected into all component pods\ \ (e.g., storage credentials, custom JVM options)" @@ -44,6 +79,70 @@ spec: hiveServer2: description: HiveServer2 component configuration properties: + autoscaling: + description: "Autoscaling configuration (operator-driven, no external\ + \ dependencies)" + properties: + cpuScaleDownThreshold: + default: 30 + description: CPU percentage (0-100) below which scale-down + is considered. Only applies to HS2 and HMS. + type: integer + cpuScaleUpThreshold: + default: 90 + description: CPU percentage (0-100) that triggers scale-up. + Only applies to HS2 and HMS. Set to 0 to disable CPU-based + scaling. + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 3600 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated. + The pod terminates immediately once sessions/connections + drain to 0; this value is only the upper safety cap. + type: integer + metricsPort: + default: 9404 + description: Port on which the Prometheus JMX Exporter serves + metrics. The operator scrapes this port on each pod for + autoscaling decisions. + type: integer + metricsScrapeIntervalSeconds: + default: 10 + description: How often (seconds) the operator scrapes JMX + metrics from pods. Lower values make autoscaling react faster. + type: integer + minReplicas: + default: 0 + description: "Minimum number of replicas (floor for scale-down).\ + \ Set to 0 for scale-to-zero (LLAP, TezAM only; HS2 minimum\ + \ is 1)" + type: integer + scaleDownStabilizationSeconds: + default: 600 + description: Stabilization window in seconds for scale-down + decisions. How long metrics must consistently indicate fewer + replicas before scale-down occurs. Also acts as the cooldown + between consecutive scale-downs. + type: integer + scaleUpStabilizationSeconds: + default: 60 + description: Stabilization window in seconds for scale-up + decisions. Picks the highest recommendation within this + window to prevent flapping. + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions per pod for HS2, request rate for HMS, busy slots\ + \ per daemon for LLAP). Not used by TezAM (demand-based:\ + \ 1 TezAM per session)." + type: integer + type: object configOverrides: additionalProperties: type: string @@ -149,92 +248,317 @@ spec: imagePullPolicy: description: "Image pull policy: Always, Never, or IfNotPresent" type: string - llap: - description: LLAP daemon configuration. Enabled by default. - properties: - configOverrides: - additionalProperties: + llapClusterRouting: + description: "Server-side LLAP cluster routing rules. Maps users/groups\ + \ to LLAP cluster names so clients don't need to specify namespace\ + \ configs. Format: user:=,group:=,default=.\ + \ Example: \"user:alice=llap1,group:eng=llap0,default=llap0\"" + type: string + llapClusters: + description: "LLAP compute clusters. Each entry is an independent\ + \ LLAP cluster with its own StatefulSet, autoscaling, and ZooKeeper\ + \ registration. Users select a cluster via hive.llap.daemon.service.hosts=@{name}\ + \ in their session." + items: + properties: + autoscaling: + description: "Autoscaling configuration (operator-driven, no\ + \ external dependencies)" + properties: + cpuScaleDownThreshold: + default: 30 + description: CPU percentage (0-100) below which scale-down + is considered. Only applies to HS2 and HMS. + type: integer + cpuScaleUpThreshold: + default: 90 + description: CPU percentage (0-100) that triggers scale-up. + Only applies to HS2 and HMS. Set to 0 to disable CPU-based + scaling. + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 3600 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated. + The pod terminates immediately once sessions/connections + drain to 0; this value is only the upper safety cap. + type: integer + metricsPort: + default: 9404 + description: Port on which the Prometheus JMX Exporter serves + metrics. The operator scrapes this port on each pod for + autoscaling decisions. + type: integer + metricsScrapeIntervalSeconds: + default: 10 + description: How often (seconds) the operator scrapes JMX + metrics from pods. Lower values make autoscaling react + faster. + type: integer + minReplicas: + default: 0 + description: "Minimum number of replicas (floor for scale-down).\ + \ Set to 0 for scale-to-zero (LLAP, TezAM only; HS2 minimum\ + \ is 1)" + type: integer + scaleDownStabilizationSeconds: + default: 600 + description: Stabilization window in seconds for scale-down + decisions. How long metrics must consistently indicate + fewer replicas before scale-down occurs. Also acts as + the cooldown between consecutive scale-downs. + type: integer + scaleUpStabilizationSeconds: + default: 60 + description: Stabilization window in seconds for scale-up + decisions. Picks the highest recommendation within this + window to prevent flapping. + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions per pod for HS2, request rate for HMS, busy\ + \ slots per daemon for LLAP). Not used by TezAM (demand-based:\ + \ 1 TezAM per session)." + type: integer + type: object + configOverrides: + additionalProperties: + type: string + description: Additional configuration overrides as key-value + pairs + type: object + enabled: + default: true + description: Whether LLAP is enabled + type: boolean + executors: + default: 1 + description: Number of LLAP executors per daemon + type: integer + extraVolumeMounts: + description: Additional volume mounts for the container + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + extraVolumes: + description: "Additional volumes to attach to the pod (e.g.,\ + \ for keytabs or truststores)" + items: + type: object + type: array + x-kubernetes-preserve-unknown-fields: true + memoryMb: + default: 1024 + description: Memory in MB per LLAP daemon instance + type: integer + name: + description: "Unique name for this LLAP cluster (e.g. llap0,\ + \ llap1). Used as the ZooKeeper registration namespace and\ + \ Kubernetes resource suffix." type: string - description: Additional configuration overrides as key-value pairs - type: object - enabled: - default: true - description: Whether LLAP is enabled - type: boolean - executors: - default: 1 - description: Number of LLAP executors per daemon - type: integer - extraVolumeMounts: - description: Additional volume mounts for the container - items: + readinessProbe: + description: Readiness probe configuration + properties: + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. + type: integer + initialDelaySeconds: + description: Number of seconds after the container has started + before probes are initiated. + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. + type: integer + timeoutSeconds: + description: Number of seconds after which the probe times + out. + type: integer type: object - type: array - x-kubernetes-preserve-unknown-fields: true - extraVolumes: - description: "Additional volumes to attach to the pod (e.g., for\ - \ keytabs or truststores)" - items: + replicas: + default: 1 + description: Number of replicas + type: integer + resources: + description: Resource requirements for pods + properties: + limitsCpu: + description: "CPU limit (e.g. 2, 1000m)" + type: string + limitsMemory: + description: "Memory limit (e.g. 2Gi, 1024Mi)" + type: string + requestsCpu: + default: 500m + description: "CPU request (e.g. 500m, 1)" + type: string + requestsMemory: + default: 1Gi + description: "Memory request (e.g. 1Gi, 512Mi)" + type: string type: object - type: array - x-kubernetes-preserve-unknown-fields: true - memoryMb: - default: 1024 - description: Memory in MB per LLAP daemon instance - type: integer - readinessProbe: - description: Readiness probe configuration + serviceHosts: + description: "LLAP service hosts identifier for ZooKeeper registration.\ + \ Defaults to @{name} (e.g. @llap0)." + type: string + tezAm: + description: Per-LLAP TezAM configuration. Each LLAP cluster + gets its own TezAM with independent replica count and autoscaling. + properties: + autoscaling: + description: Autoscaling configuration for this LLAP cluster's + TezAM + properties: + cpuScaleDownThreshold: + default: 30 + description: CPU percentage (0-100) below which scale-down + is considered. Only applies to HS2 and HMS. + type: integer + cpuScaleUpThreshold: + default: 90 + description: CPU percentage (0-100) that triggers scale-up. + Only applies to HS2 and HMS. Set to 0 to disable CPU-based + scaling. + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this + component + type: boolean + gracePeriodSeconds: + default: 3600 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly + terminated. The pod terminates immediately once sessions/connections + drain to 0; this value is only the upper safety cap. + type: integer + metricsPort: + default: 9404 + description: Port on which the Prometheus JMX Exporter + serves metrics. The operator scrapes this port on + each pod for autoscaling decisions. + type: integer + metricsScrapeIntervalSeconds: + default: 10 + description: How often (seconds) the operator scrapes + JMX metrics from pods. Lower values make autoscaling + react faster. + type: integer + minReplicas: + default: 0 + description: "Minimum number of replicas (floor for\ + \ scale-down). Set to 0 for scale-to-zero (LLAP, TezAM\ + \ only; HS2 minimum is 1)" + type: integer + scaleDownStabilizationSeconds: + default: 600 + description: Stabilization window in seconds for scale-down + decisions. How long metrics must consistently indicate + fewer replicas before scale-down occurs. Also acts + as the cooldown between consecutive scale-downs. + type: integer + scaleUpStabilizationSeconds: + default: 60 + description: Stabilization window in seconds for scale-up + decisions. Picks the highest recommendation within + this window to prevent flapping. + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions per pod for HS2, request rate for HMS,\ + \ busy slots per daemon for LLAP). Not used by TezAM\ + \ (demand-based: 1 TezAM per session)." + type: integer + type: object + replicas: + default: 1 + description: Max number of TezAM replicas for this LLAP + cluster + type: integer + type: object + required: + - name + type: object + x-kubernetes-preserve-unknown-fields: true + type: array + metastore: + description: Metastore component configuration + properties: + autoscaling: + description: "Autoscaling configuration (operator-driven, no external\ + \ dependencies)" properties: - failureThreshold: - description: Minimum consecutive failures for the probe to - be considered failed after having succeeded. + cpuScaleDownThreshold: + default: 30 + description: CPU percentage (0-100) below which scale-down + is considered. Only applies to HS2 and HMS. type: integer - initialDelaySeconds: - description: Number of seconds after the container has started - before probes are initiated. + cpuScaleUpThreshold: + default: 90 + description: CPU percentage (0-100) that triggers scale-up. + Only applies to HS2 and HMS. Set to 0 to disable CPU-based + scaling. type: integer - periodSeconds: - description: How often (in seconds) to perform the probe. + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 3600 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated. + The pod terminates immediately once sessions/connections + drain to 0; this value is only the upper safety cap. type: integer - successThreshold: - description: Minimum consecutive successes for the probe to - be considered successful after having failed. + metricsPort: + default: 9404 + description: Port on which the Prometheus JMX Exporter serves + metrics. The operator scrapes this port on each pod for + autoscaling decisions. type: integer - timeoutSeconds: - description: Number of seconds after which the probe times - out. + metricsScrapeIntervalSeconds: + default: 10 + description: How often (seconds) the operator scrapes JMX + metrics from pods. Lower values make autoscaling react faster. + type: integer + minReplicas: + default: 0 + description: "Minimum number of replicas (floor for scale-down).\ + \ Set to 0 for scale-to-zero (LLAP, TezAM only; HS2 minimum\ + \ is 1)" + type: integer + scaleDownStabilizationSeconds: + default: 600 + description: Stabilization window in seconds for scale-down + decisions. How long metrics must consistently indicate fewer + replicas before scale-down occurs. Also acts as the cooldown + between consecutive scale-downs. + type: integer + scaleUpStabilizationSeconds: + default: 60 + description: Stabilization window in seconds for scale-up + decisions. Picks the highest recommendation within this + window to prevent flapping. + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions per pod for HS2, request rate for HMS, busy slots\ + \ per daemon for LLAP). Not used by TezAM (demand-based:\ + \ 1 TezAM per session)." type: integer type: object - replicas: - default: 1 - description: Number of replicas - type: integer - resources: - description: Resource requirements for pods - properties: - limitsCpu: - description: "CPU limit (e.g. 2, 1000m)" - type: string - limitsMemory: - description: "Memory limit (e.g. 2Gi, 1024Mi)" - type: string - requestsCpu: - default: 500m - description: "CPU request (e.g. 500m, 1)" - type: string - requestsMemory: - default: 1Gi - description: "Memory request (e.g. 1Gi, 512Mi)" - type: string - type: object - serviceHosts: - description: LLAP service hosts identifier for ZooKeeper registration - type: string - type: object - x-kubernetes-preserve-unknown-fields: true - metastore: - description: Metastore component configuration - properties: configOverrides: additionalProperties: type: string @@ -368,9 +692,77 @@ spec: type: string type: object x-kubernetes-preserve-unknown-fields: true + suspend: + description: "When true, the cluster is immediately suspended (all\ + \ components scaled to 0). Set to false to wake a suspended cluster." + type: boolean tezAm: description: Tez Application Master configuration. Enabled by default. properties: + autoscaling: + description: "Autoscaling configuration (operator-driven, no external\ + \ dependencies)" + properties: + cpuScaleDownThreshold: + default: 30 + description: CPU percentage (0-100) below which scale-down + is considered. Only applies to HS2 and HMS. + type: integer + cpuScaleUpThreshold: + default: 90 + description: CPU percentage (0-100) that triggers scale-up. + Only applies to HS2 and HMS. Set to 0 to disable CPU-based + scaling. + type: integer + enabled: + default: false + description: Whether autoscaling is enabled for this component + type: boolean + gracePeriodSeconds: + default: 3600 + description: Maximum time in seconds to wait for graceful + drain during scale-down before the pod is forcibly terminated. + The pod terminates immediately once sessions/connections + drain to 0; this value is only the upper safety cap. + type: integer + metricsPort: + default: 9404 + description: Port on which the Prometheus JMX Exporter serves + metrics. The operator scrapes this port on each pod for + autoscaling decisions. + type: integer + metricsScrapeIntervalSeconds: + default: 10 + description: How often (seconds) the operator scrapes JMX + metrics from pods. Lower values make autoscaling react faster. + type: integer + minReplicas: + default: 0 + description: "Minimum number of replicas (floor for scale-down).\ + \ Set to 0 for scale-to-zero (LLAP, TezAM only; HS2 minimum\ + \ is 1)" + type: integer + scaleDownStabilizationSeconds: + default: 600 + description: Stabilization window in seconds for scale-down + decisions. How long metrics must consistently indicate fewer + replicas before scale-down occurs. Also acts as the cooldown + between consecutive scale-downs. + type: integer + scaleUpStabilizationSeconds: + default: 60 + description: Stabilization window in seconds for scale-up + decisions. Picks the highest recommendation within this + window to prevent flapping. + type: integer + scaleUpThreshold: + default: 80 + description: "Threshold that triggers scale-up (component-specific:\ + \ sessions per pod for HS2, request rate for HMS, busy slots\ + \ per daemon for LLAP). Not used by TezAM (demand-based:\ + \ 1 TezAM per session)." + type: integer + type: object configOverrides: additionalProperties: type: string @@ -457,6 +849,8 @@ spec: x-kubernetes-preserve-unknown-fields: true status: properties: + clusterPhase: + type: string conditions: items: properties: @@ -476,25 +870,94 @@ spec: type: array hiveServer2: properties: - desiredReplicas: + autoscaling: + properties: + cpuProposedReplicas: + type: integer + cpuScaleUpThreshold: + type: integer + currentCpuPercent: + type: number + currentMetricValue: + type: integer + lastScaleTime: + type: string + proposedReplicas: + type: integer + scaleUpThreshold: + type: integer + type: object + currentReplicas: type: integer - phase: - type: string - readyReplicas: + maxReplicas: type: integer - type: object - llap: - properties: - desiredReplicas: + minReplicas: type: integer phase: type: string readyReplicas: type: integer type: object + idleForMinutes: + type: integer + idleSince: + type: string + llapClusters: + additionalProperties: + properties: + autoscaling: + properties: + cpuProposedReplicas: + type: integer + cpuScaleUpThreshold: + type: integer + currentCpuPercent: + type: number + currentMetricValue: + type: integer + lastScaleTime: + type: string + proposedReplicas: + type: integer + scaleUpThreshold: + type: integer + type: object + currentReplicas: + type: integer + maxReplicas: + type: integer + minReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object + type: object metastore: properties: - desiredReplicas: + autoscaling: + properties: + cpuProposedReplicas: + type: integer + cpuScaleUpThreshold: + type: integer + currentCpuPercent: + type: number + currentMetricValue: + type: integer + lastScaleTime: + type: string + proposedReplicas: + type: integer + scaleUpThreshold: + type: integer + type: object + currentReplicas: + type: integer + maxReplicas: + type: integer + minReplicas: type: integer phase: type: string @@ -503,14 +966,39 @@ spec: type: object observedGeneration: type: integer - tezAm: - properties: - desiredReplicas: - type: integer - phase: - type: string - readyReplicas: - type: integer + suspendedSince: + type: string + tezAmClusters: + additionalProperties: + properties: + autoscaling: + properties: + cpuProposedReplicas: + type: integer + cpuScaleUpThreshold: + type: integer + currentCpuPercent: + type: number + currentMetricValue: + type: integer + lastScaleTime: + type: string + proposedReplicas: + type: integer + scaleUpThreshold: + type: integer + type: object + currentReplicas: + type: integer + maxReplicas: + type: integer + minReplicas: + type: integer + phase: + type: string + readyReplicas: + type: integer + type: object type: object type: object type: object diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml index d27e1fea8c6f..3b0eb0e8e40f 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml @@ -30,6 +30,10 @@ rules: - apiGroups: ["apps"] resources: ["deployments", "statefulsets"] verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + # Scale subresource for operator-driven autoscaling + - apiGroups: ["apps"] + resources: ["deployments/scale", "statefulsets/scale"] + verbs: ["get", "update", "patch"] # Jobs for schema initialization - apiGroups: ["batch"] resources: ["jobs"] @@ -46,7 +50,11 @@ rules: - apiGroups: [""] resources: ["events"] verbs: ["create", "patch"] - # Pods: read-only for readiness checking + # Pods: read + patch (patch needed for pod-deletion-cost annotation) - apiGroups: [""] resources: ["pods"] - verbs: ["get", "list", "watch"] + verbs: ["get", "list", "watch", "patch"] + # PodDisruptionBudgets for graceful autoscaling + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] diff --git a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml index 091ecefb3cb0..67ec6c168fb9 100644 --- a/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml @@ -67,6 +67,18 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.metastore.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.metastore.autoscaling .Values.cluster.metastore.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.metastore.autoscaling.minReplicas }} + scaleUpThreshold: {{ .Values.cluster.metastore.autoscaling.scaleUpThreshold }} + scaleUpStabilizationSeconds: {{ .Values.cluster.metastore.autoscaling.scaleUpStabilizationSeconds }} + scaleDownStabilizationSeconds: {{ .Values.cluster.metastore.autoscaling.scaleDownStabilizationSeconds }} + gracePeriodSeconds: {{ .Values.cluster.metastore.autoscaling.gracePeriodSeconds }} + metricsScrapeIntervalSeconds: {{ .Values.cluster.metastore.autoscaling.metricsScrapeIntervalSeconds | default 10 }} + cpuScaleUpThreshold: {{ .Values.cluster.metastore.autoscaling.cpuScaleUpThreshold | default 90 }} + cpuScaleDownThreshold: {{ .Values.cluster.metastore.autoscaling.cpuScaleDownThreshold | default 30 }} + {{- end }} {{- else }} {{- if .Values.cluster.metastore.externalUri }} externalUri: {{ .Values.cluster.metastore.externalUri | quote }} @@ -96,31 +108,72 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.hiveServer2.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.hiveServer2.autoscaling .Values.cluster.hiveServer2.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.hiveServer2.autoscaling.minReplicas }} + scaleUpThreshold: {{ .Values.cluster.hiveServer2.autoscaling.scaleUpThreshold }} + scaleUpStabilizationSeconds: {{ .Values.cluster.hiveServer2.autoscaling.scaleUpStabilizationSeconds }} + scaleDownStabilizationSeconds: {{ .Values.cluster.hiveServer2.autoscaling.scaleDownStabilizationSeconds }} + gracePeriodSeconds: {{ .Values.cluster.hiveServer2.autoscaling.gracePeriodSeconds }} + metricsScrapeIntervalSeconds: {{ .Values.cluster.hiveServer2.autoscaling.metricsScrapeIntervalSeconds | default 10 }} + cpuScaleUpThreshold: {{ .Values.cluster.hiveServer2.autoscaling.cpuScaleUpThreshold | default 90 }} + cpuScaleDownThreshold: {{ .Values.cluster.hiveServer2.autoscaling.cpuScaleDownThreshold | default 30 }} + {{- end }} + + {{- if .Values.cluster.llapClusterRouting }} + llapClusterRouting: {{ .Values.cluster.llapClusterRouting | quote }} + {{- end }} - llap: - enabled: {{ .Values.cluster.llap.enabled }} - {{- if .Values.cluster.llap.enabled }} - replicas: {{ .Values.cluster.llap.replicas }} - executors: {{ .Values.cluster.llap.executors }} - memoryMb: {{ .Values.cluster.llap.memoryMb }} - serviceHosts: {{ .Values.cluster.llap.serviceHosts | quote }} - {{- if .Values.cluster.llap.resources }} + {{- if .Values.cluster.llapClusters }} + llapClusters: + {{- range .Values.cluster.llapClusters }} + - name: {{ .name }} + enabled: {{ .enabled | default true }} + replicas: {{ .replicas | default 2 }} + executors: {{ .executors | default 1 }} + memoryMb: {{ .memoryMb | default 1024 }} + {{- if .resources }} resources: - {{- toYaml .Values.cluster.llap.resources | nindent 6 }} + {{- toYaml .resources | nindent 6 }} {{- end }} - {{- if .Values.cluster.llap.configOverrides }} + {{- if .configOverrides }} configOverrides: - {{- toYaml .Values.cluster.llap.configOverrides | nindent 6 }} + {{- toYaml .configOverrides | nindent 6 }} {{- end }} - {{- if .Values.cluster.llap.extraVolumes }} + {{- if .extraVolumes }} extraVolumes: - {{- toYaml .Values.cluster.llap.extraVolumes | nindent 6 }} + {{- toYaml .extraVolumes | nindent 6 }} {{- end }} - {{- if .Values.cluster.llap.extraVolumeMounts }} + {{- if .extraVolumeMounts }} extraVolumeMounts: - {{- toYaml .Values.cluster.llap.extraVolumeMounts | nindent 6 }} - {{- end }} + {{- toYaml .extraVolumeMounts | nindent 6 }} + {{- end }} + {{- if and .autoscaling .autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .autoscaling.minReplicas }} + scaleUpThreshold: {{ .autoscaling.scaleUpThreshold }} + scaleUpStabilizationSeconds: {{ .autoscaling.scaleUpStabilizationSeconds }} + scaleDownStabilizationSeconds: {{ .autoscaling.scaleDownStabilizationSeconds }} + gracePeriodSeconds: {{ .autoscaling.gracePeriodSeconds }} + metricsScrapeIntervalSeconds: {{ .autoscaling.metricsScrapeIntervalSeconds | default 10 }} + {{- end }} + {{- if .tezAm }} + tezAm: + replicas: {{ .tezAm.replicas | default 1 }} + {{- if and .tezAm.autoscaling .tezAm.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .tezAm.autoscaling.minReplicas }} + scaleUpStabilizationSeconds: {{ .tezAm.autoscaling.scaleUpStabilizationSeconds }} + scaleDownStabilizationSeconds: {{ .tezAm.autoscaling.scaleDownStabilizationSeconds }} + gracePeriodSeconds: {{ .tezAm.autoscaling.gracePeriodSeconds }} + metricsScrapeIntervalSeconds: {{ .tezAm.autoscaling.metricsScrapeIntervalSeconds | default 10 }} + {{- end }} {{- end }} + {{- end }} + {{- end }} tezAm: enabled: {{ .Values.cluster.tezAm.enabled }} @@ -146,6 +199,15 @@ spec: extraVolumeMounts: {{- toYaml .Values.cluster.tezAm.extraVolumeMounts | nindent 6 }} {{- end }} + {{- if and .Values.cluster.tezAm.autoscaling .Values.cluster.tezAm.autoscaling.enabled }} + autoscaling: + enabled: true + minReplicas: {{ .Values.cluster.tezAm.autoscaling.minReplicas }} + scaleUpStabilizationSeconds: {{ .Values.cluster.tezAm.autoscaling.scaleUpStabilizationSeconds }} + scaleDownStabilizationSeconds: {{ .Values.cluster.tezAm.autoscaling.scaleDownStabilizationSeconds }} + gracePeriodSeconds: {{ .Values.cluster.tezAm.autoscaling.gracePeriodSeconds }} + metricsScrapeIntervalSeconds: {{ .Values.cluster.tezAm.autoscaling.metricsScrapeIntervalSeconds | default 10 }} + {{- end }} {{- end }} zookeeper: @@ -176,4 +238,15 @@ spec: volumeMounts: {{- toYaml .Values.cluster.storage.volumeMounts | nindent 4 }} {{- end }} + + {{- if and .Values.cluster.autoSuspend .Values.cluster.autoSuspend.enabled }} + autoSuspend: + enabled: true + idleTimeoutMinutes: {{ .Values.cluster.autoSuspend.idleTimeoutMinutes | default 15 }} + {{- if hasKey .Values.cluster.autoSuspend "includeMetastore" }} + includeMetastore: {{ .Values.cluster.autoSuspend.includeMetastore }} + {{- end }} + {{- end }} + + suspend: false {{- end }} diff --git a/packaging/src/kubernetes/helm/hive-operator/values.yaml b/packaging/src/kubernetes/helm/hive-operator/values.yaml index b7d75930c5b2..26f7eeb5af5b 100644 --- a/packaging/src/kubernetes/helm/hive-operator/values.yaml +++ b/packaging/src/kubernetes/helm/hive-operator/values.yaml @@ -101,6 +101,20 @@ cluster: # mountPath: /etc/gcs # readOnly: true + # --------------------------------------------------------------------------- + # AUTO-SUSPEND — fully hibernates the cluster after idle timeout + # --------------------------------------------------------------------------- + # When enabled (requires autoscaling on all active components), the operator + # scales the entire cluster to 0 replicas after all components have been idle + # for idleTimeoutMinutes. Use kubectl patch to manually suspend/wake: + # kubectl patch hivecluster hive --type=merge -p '{"spec":{"suspend":true}}' + # kubectl patch hivecluster hive --type=merge -p '{"spec":{"suspend":false}}' + autoSuspend: + enabled: false + idleTimeoutMinutes: 15 + # Set to false to keep HMS running during suspend (HMS autoscaling not required) + includeMetastore: true + # --------------------------------------------------------------------------- # METASTORE — defaults to enabled, 2 replicas (HA) # --------------------------------------------------------------------------- @@ -112,6 +126,19 @@ cluster: configOverrides: {} extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (operator-driven, no external dependencies) + # The operator scrapes JMX Exporter metrics from pods directly. + # When enabled, 'replicas' above acts as the max replica ceiling. + autoscaling: + enabled: false + minReplicas: 1 + scaleUpThreshold: 100 + scaleUpStabilizationSeconds: 60 + scaleDownStabilizationSeconds: 300 + gracePeriodSeconds: 60 + metricsScrapeIntervalSeconds: 10 + cpuScaleUpThreshold: 90 + cpuScaleDownThreshold: 30 # Set to use an external Metastore instead of deploying one: # enabled: false # externalUri: "thrift://external-metastore:9083" @@ -127,20 +154,53 @@ cluster: externalJars: [] extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (operator-driven, no external dependencies) + # When enabled, 'replicas' above acts as the max replica ceiling + autoscaling: + enabled: false + minReplicas: 1 + scaleUpThreshold: 100 + scaleUpStabilizationSeconds: 60 + scaleDownStabilizationSeconds: 600 + gracePeriodSeconds: 300 + metricsScrapeIntervalSeconds: 10 + cpuScaleUpThreshold: 90 + cpuScaleDownThreshold: 30 + + # --------------------------------------------------------------------------- + # LLAP CLUSTER ROUTING — server-side rules that map users/groups to LLAP + # clusters. When set, clients don't need to specify namespace configs. + # Format: user:=,group:=,default= + # Example: "user:alice=llap0,group:eng=llap0,default=llap0" + # --------------------------------------------------------------------------- + llapClusterRouting: "" # --------------------------------------------------------------------------- - # LLAP — enabled by default for full-HA + # LLAP CLUSTERS — each entry is an independent LLAP cluster with its own + # StatefulSet, autoscaling, and ZooKeeper registration. + # Users select a cluster via: SET hive.llap.daemon.service.hosts=@llap0; # --------------------------------------------------------------------------- - llap: + llapClusters: + - name: llap0 enabled: true replicas: 2 executors: 1 memoryMb: 1024 - serviceHosts: "@llap0" resources: {} configOverrides: {} extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (operator-driven, no external dependencies) + # minReplicas: 0 enables scale-to-zero — scales up when HS2 has active sessions + # When enabled, 'replicas' above acts as the max replica ceiling + autoscaling: + enabled: false + minReplicas: 0 + scaleUpThreshold: 10 + scaleUpStabilizationSeconds: 60 + scaleDownStabilizationSeconds: 900 + gracePeriodSeconds: 600 + metricsScrapeIntervalSeconds: 10 # --------------------------------------------------------------------------- # TEZ AM — enabled by default for full-HA @@ -154,3 +214,15 @@ cluster: configOverrides: {} extraVolumes: [] extraVolumeMounts: [] + # Autoscaling (operator-driven, no external dependencies) + # minReplicas: 0 enables scale-to-zero — wakes when HS2 receives queries + # When enabled, 'replicas' above acts as the max replica ceiling + # TezAM scales demand-based: max(totalSessions, hs2Pods * sessionsPerQueue) + # No scaleUpThreshold needed — scaling is 1:1 with session demand + autoscaling: + enabled: false + minReplicas: 0 + scaleUpStabilizationSeconds: 60 + scaleDownStabilizationSeconds: 600 + gracePeriodSeconds: 120 + metricsScrapeIntervalSeconds: 10 diff --git a/packaging/src/kubernetes/pom.xml b/packaging/src/kubernetes/pom.xml index 3e3c24792b91..729b5defd6cb 100644 --- a/packaging/src/kubernetes/pom.xml +++ b/packaging/src/kubernetes/pom.xml @@ -26,6 +26,10 @@ Kubernetes operator for managing Apache Hive clusters ../../.. + + 2.0.16 @@ -48,6 +52,10 @@ kubernetes-httpclient-vertx ${fabric8.version} + + org.apache.commons + commons-lang3 + io.github.java-diff-utils java-diff-utils @@ -65,9 +73,14 @@ ${fabric8.version} provided + + org.slf4j + slf4j-api + ${slf4j2.version} + org.apache.logging.log4j - log4j-slf4j-impl + log4j-slf4j2-impl ${log4j2.version} @@ -189,6 +202,7 @@ docker build + --no-cache -t apache/hive:operator-${project.version} . diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java index 55bd3372a40d..d02f08fff038 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/HiveOperatorMain.java @@ -19,7 +19,11 @@ package org.apache.hive.kubernetes.operator; import io.javaoperatorsdk.operator.Operator; +import io.javaoperatorsdk.operator.api.config.ControllerConfiguration; +import io.javaoperatorsdk.operator.api.config.ResolvedControllerConfiguration; +import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.reconciler.HiveClusterReconciler; +import org.apache.hive.kubernetes.operator.reconciler.HiveWorkflowSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,7 +40,16 @@ private HiveOperatorMain() { public static void main(String[] args) { LOG.info("Starting Hive Kubernetes Operator"); Operator operator = new Operator(); - operator.register(new HiveClusterReconciler()); + HiveClusterReconciler reconciler = new HiveClusterReconciler(); + // Get the annotation-derived base config, then inject our programmatic workflow spec. + ControllerConfiguration baseConfig = + operator.getConfigurationService().getConfigurationFor(reconciler); + HiveWorkflowSpec workflowSpec = new HiveWorkflowSpec(); + ((ResolvedControllerConfiguration) baseConfig) + .setWorkflowSpec(workflowSpec); + LOG.info("Registered workflow with {} dependent resource specs", + workflowSpec.getDependentResourceSpecs().size()); + operator.register(reconciler, baseConfig); operator.start(); LOG.info("Hive Kubernetes Operator started successfully"); } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/BackgroundMetricsScraper.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/BackgroundMetricsScraper.java new file mode 100644 index 000000000000..0e13136b07c5 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/BackgroundMetricsScraper.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Runs periodic metrics scraping in the background so that the JOSDK reconcile + * thread is never blocked by HTTP calls to pod JMX exporters. + *

+ * Each component gets its own scheduled task that writes results to a shared + * {@link MetricsCache}. The reconciler reads from that cache (non-blocking). + */ +public class BackgroundMetricsScraper { + + private static final Logger LOG = LoggerFactory.getLogger(BackgroundMetricsScraper.class); + + private final ScheduledExecutorService scheduler; + private final MetricsScraper scraper; + private final MetricsCache cache; + // Key: "namespace/clusterName/component" → active scrape task + private final ConcurrentHashMap> activeTasks = + new ConcurrentHashMap<>(); + // Tracks registered intervals to detect spec changes + private final ConcurrentHashMap registeredIntervals = + new ConcurrentHashMap<>(); + + private static final int SCRAPER_THREAD_POOL_SIZE = 4; + + public BackgroundMetricsScraper(MetricsScraper scraper, MetricsCache cache) { + this.scraper = scraper; + this.cache = cache; + this.scheduler = Executors.newScheduledThreadPool(SCRAPER_THREAD_POOL_SIZE, r -> { + Thread t = new Thread(r, "hive-metrics-scraper"); + t.setDaemon(true); + return t; + }); + } + + /** + * Registers (or updates) a periodic scrape task for a component. + * Idempotent — only recreates the task if the interval has changed. + * + * @param namespace the Kubernetes namespace + * @param clusterName the HiveCluster name + * @param component the component name (e.g., "hiveserver2") + * @param selector label selector for pod listing + * @param metricsPort the JMX exporter port + * @param intervalSecs how often to scrape (from AutoscalingSpec) + */ + public void registerOrUpdate(String namespace, String clusterName, + String component, Map selector, + int metricsPort, int intervalSecs) { + String key = HiveClusterAutoscaler.cacheKey(namespace, clusterName, component); + Integer existing = registeredIntervals.get(key); + if (existing != null && existing == intervalSecs) { + return; // Already registered with same interval + } + + // Cancel existing task if interval changed + ScheduledFuture oldTask = activeTasks.remove(key); + if (oldTask != null) { + oldTask.cancel(false); + } + + ScheduledFuture future = scheduler.scheduleWithFixedDelay( + () -> scrapeAndStore(key, namespace, selector, metricsPort), + 0, intervalSecs, TimeUnit.SECONDS); + + activeTasks.put(key, future); + registeredIntervals.put(key, intervalSecs); + LOG.debug("Registered background scrape for {} (interval={}s)", key, intervalSecs); + } + + /** + * Unregisters all scrape tasks for a deleted cluster. + */ + public void unregisterCluster(String namespace, String clusterName) { + String prefix = namespace + "/" + clusterName + "/"; + activeTasks.entrySet().removeIf(entry -> { + if (entry.getKey().startsWith(prefix)) { + entry.getValue().cancel(false); + return true; + } + return false; + }); + registeredIntervals.keySet().removeIf(k -> k.startsWith(prefix)); + cache.removeByPrefix(prefix); + LOG.debug("Unregistered background scrape tasks for {}/{}", namespace, clusterName); + } + + /** + * Shuts down the background scheduler. Called on operator shutdown. + */ + public void shutdown() { + scheduler.shutdownNow(); + } + + private void scrapeAndStore(String key, String namespace, + Map selector, int metricsPort) { + try { + List metrics = scraper.scrape(namespace, selector, metricsPort); + cache.put(key, metrics); + } catch (Exception e) { + // Do not update cache on failure — staleness check handles it + LOG.debug("Background scrape failed for {}: {}", key, e.getMessage()); + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/ComponentAutoscaler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/ComponentAutoscaler.java new file mode 100644 index 000000000000..6824b0e8cb79 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/ComponentAutoscaler.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.time.Duration; +import java.util.List; + +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Per-component autoscaler state. Owns the scaling strategy, + * stabilization windows. + */ +public class ComponentAutoscaler { + + /** Result of an autoscaling evaluation. */ + public record EvaluationResult(int rawMetricValue, double cpuPercent, + int cpuProposedReplicas, int proposedReplicas, Integer patchTo) {} + + + private static final Logger LOG = LoggerFactory.getLogger(ComponentAutoscaler.class); + + private static final String METRIC_CPU_LOAD = "jvm_process_cpu_load"; + + private final String component; + private final ScalingStrategy strategy; + private final boolean cpuScalingApplicable; + private StabilizationWindow scaleUpWindow; + private StabilizationWindow scaleDownWindow; + private int lastScaleUpStabilization = -1; + private int lastScaleDownStabilization = -1; + private boolean initialized; + private double lastCpuPercent; + + public ComponentAutoscaler(String component, ScalingStrategy strategy) { + this.component = component; + this.strategy = strategy; + this.cpuScalingApplicable = ConfigUtils.COMPONENT_HIVESERVER2.equals(component) + || ConfigUtils.COMPONENT_METASTORE.equals(component); + } + + /** Whether the underlying strategy uses scaleUpThreshold for scaling decisions. */ + public boolean usesScaleUpThreshold() { + return strategy.usesScaleUpThreshold(); + } + + /** + * Evaluate metrics and return the evaluation result containing + * raw metric value, proposed replicas, and the actual patch (null if no change). + */ + public EvaluationResult evaluate(List metrics, AutoscalingSpec spec, + int currentReplicas, int maxReplicas) { + + ensureWindows(spec); + + // On first evaluation, seed the scale-down window with currentReplicas. + // This prevents immediate scale-down after operator restart when the window has no history. + if (!initialized) { + initialized = true; + scaleDownWindow.record(currentReplicas); + LOG.debug("[{}] Initialized scale-down window with currentReplicas={}", component, currentReplicas); + } + + int rawDesired = strategy.computeDesiredReplicas(metrics, spec, maxReplicas); + int metricValue = strategy.lastMetricValue(); + + // CPU-based scaling: combine with metric-based desired via max() + int cpuDesired = computeCpuDesired(metrics, spec, currentReplicas); + int combined = Math.max(rawDesired, cpuDesired); + int clamped = Math.max(spec.minReplicas(), Math.min(combined, maxReplicas)); + + scaleUpWindow.record(clamped); + scaleDownWindow.record(clamped); + + int target; + if (clamped > currentReplicas) { + // Scale up: use stabilized max (highest recommendation in window — don't under-scale) + target = scaleUpWindow.stabilizedMax(); + } else if (clamped < currentReplicas) { + // Scale down: use stabilized max (highest/most conservative recommendation in window — + // prevents premature scale-down, matches HPA selectPolicy: Max behavior). + // The stabilization window duration serves as the cooldown between scale-downs. + target = scaleDownWindow.stabilizedMax(); + } else { + target = currentReplicas; + } + + // Ensure target is still within bounds + target = Math.max(spec.minReplicas(), Math.min(target, maxReplicas)); + + if (target == currentReplicas) { + return new EvaluationResult(metricValue, lastCpuPercent, cpuDesired, clamped, null); + } + + if (target < currentReplicas) { + LOG.info("[{}] Scaling down: {} -> {}", component, currentReplicas, target); + } else { + LOG.info("[{}] Scaling up: {} -> {}", component, currentReplicas, target); + } + return new EvaluationResult(metricValue, lastCpuPercent, cpuDesired, clamped, target); + } + + /** + * Compute desired replicas based on CPU utilization. + * Returns 0 if CPU scaling is not applicable or no CPU data is available. + */ + private int computeCpuDesired(List metrics, AutoscalingSpec spec, int currentReplicas) { + if (!cpuScalingApplicable || spec.cpuScaleUpThreshold() <= 0 || metrics.isEmpty()) { + lastCpuPercent = 0; + return 0; + } + + double totalCpu = 0; + int count = 0; + for (PodMetrics pm : metrics) { + Double cpu = pm.metrics().get(METRIC_CPU_LOAD); + if (cpu != null) { + totalCpu += cpu * 100.0; + count++; + } + } + if (count == 0) { + lastCpuPercent = 0; + return 0; + } + double avgCpuPercent = totalCpu / count; + lastCpuPercent = avgCpuPercent; + LOG.debug("[{}] CPU raw: totalCpu={}, count={}, avg={}%", component, totalCpu, count, avgCpuPercent); + + if (avgCpuPercent >= spec.cpuScaleUpThreshold()) { + // Scale up proportionally: how many pods to bring avg below threshold + return (int) Math.ceil(avgCpuPercent * currentReplicas / spec.cpuScaleUpThreshold()); + } else if (avgCpuPercent < spec.cpuScaleDownThreshold()) { + // Scale down: current load could fit in fewer pods + int desired = (int) Math.ceil(avgCpuPercent * currentReplicas / spec.cpuScaleUpThreshold()); + return Math.max(desired, spec.minReplicas()); + } + // Between thresholds: hold current + return currentReplicas; + } + + private void ensureWindows(AutoscalingSpec spec) { + if (scaleUpWindow == null || lastScaleUpStabilization != spec.scaleUpStabilizationSeconds()) { + scaleUpWindow = new StabilizationWindow( + Duration.ofSeconds(spec.scaleUpStabilizationSeconds())); + lastScaleUpStabilization = spec.scaleUpStabilizationSeconds(); + } + if (scaleDownWindow == null || lastScaleDownStabilization != spec.scaleDownStabilizationSeconds()) { + scaleDownWindow = new StabilizationWindow( + Duration.ofSeconds(spec.scaleDownStabilizationSeconds())); + lastScaleDownStabilization = spec.scaleDownStabilizationSeconds(); + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/HiveClusterAutoscaler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/HiveClusterAutoscaler.java new file mode 100644 index 000000000000..df46ced674c6 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/HiveClusterAutoscaler.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.time.Duration; +import java.time.Instant; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import io.fabric8.kubernetes.client.KubernetesClient; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.model.status.AutoscalingStatus; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Main autoscaling orchestrator. Evaluates all enabled components and + * returns a map of component → desired replica count for those that need changing. + *

+ * Maintains per-cluster, per-component state (stabilization windows). + */ +public class HiveClusterAutoscaler { + + private static final Logger LOG = LoggerFactory.getLogger(HiveClusterAutoscaler.class); + + /** Result of evaluating all components. */ + public record AutoscalingEvaluation( + Map patches, + Map statuses) {} + + // Shared replica store: the autoscaler writes its desired replicas here so that + // dependent resources can read them (avoids informer cache lag reverting patches). + // Key: "namespace/clusterName/component" → desired replicas + private static final ConcurrentHashMap MANAGED_REPLICAS = + new ConcurrentHashMap<>(); + + /** Builds the cache key used for per-component state maps. */ + static String cacheKey(String namespace, String clusterName, String component) { + return namespace + "/" + clusterName + "/" + component; + } + + /** + * Returns the autoscaler-managed replica count for a component, or null if the + * autoscaler hasn't made a decision yet (e.g., first reconcile before evaluation runs). + */ + public static Integer getManagedReplicas(String namespace, String clusterName, String component) { + return MANAGED_REPLICAS.get(cacheKey(namespace, clusterName, component)); + } + + /** + * Sets the managed replica count for a component. Used by suspend/wake logic + * to override what the autoscaler would normally compute. + */ + public static void setManagedReplicas(String namespace, String clusterName, + String component, int replicas) { + MANAGED_REPLICAS.put(cacheKey(namespace, clusterName, component), replicas); + } + + private record PendingScaleDown(int targetReplicas, Instant annotatedAt) {} + + private final BackgroundMetricsScraper bgScraper; + private final MetricsCache metricsCache; + // Key: "namespace/clusterName/component" + private final ConcurrentHashMap autoscalers = + new ConcurrentHashMap<>(); + private final ConcurrentHashMap lastScaleTimes = + new ConcurrentHashMap<>(); + // Two-phase scale-down: holds deferred scale-down targets while pod-deletion-cost + // annotations propagate (2s delay before applying the actual scale patch). + private final ConcurrentHashMap pendingScaleDowns = + new ConcurrentHashMap<>(); + + public HiveClusterAutoscaler(MetricsScraper scraper, + BackgroundMetricsScraper bgScraper, MetricsCache metricsCache) { + this.bgScraper = bgScraper; + this.metricsCache = metricsCache; + } + + public BackgroundMetricsScraper getBackgroundScraper() { + return bgScraper; + } + + /** + * Removes all in-memory state for a deleted HiveCluster to prevent memory leaks. + */ + public void cleanupCluster(String namespace, String clusterName) { + String prefix = namespace + "/" + clusterName + "/"; + MANAGED_REPLICAS.keySet().removeIf(k -> k.startsWith(prefix)); + autoscalers.keySet().removeIf(k -> k.startsWith(prefix)); + lastScaleTimes.keySet().removeIf(k -> k.startsWith(prefix)); + pendingScaleDowns.keySet().removeIf(k -> k.startsWith(prefix)); + LOG.info("Cleaned up autoscaler state for {}/{}", namespace, clusterName); + } + + /** + * Returns true if there are pending scale-down operations waiting for + * annotation propagation. The reconciler should reschedule sooner (2s) + * when this returns true. + */ + public boolean hasPendingScaleDowns() { + return !pendingScaleDowns.isEmpty(); + } + + /** + * Evaluate all autoscaling-enabled components and return patches and status info. + * + * @param cluster the HiveCluster resource + * @param client the Kubernetes client (for reading current replica counts) + * @return evaluation result with patches and per-component autoscaling statuses + */ + public AutoscalingEvaluation evaluate(HiveCluster cluster, KubernetesClient client) { + Map patches = new HashMap<>(); + Map statuses = new HashMap<>(); + HiveClusterSpec spec = cluster.getSpec(); + String namespace = cluster.getMetadata().getNamespace(); + String clusterName = cluster.getMetadata().getName(); + + // Always register HS2 metrics scraping when LLAP/TezAM autoscaling needs + // the activation gate (hs2_llap_target_sessions_*), even if HS2 itself + // doesn't autoscale. + boolean llapOrTezAmAutoscales = spec.llapClusters().stream().anyMatch( + l -> l.isEnabled() && (l.autoscaling().isEnabled() + || (spec.tezAm().isEnabled() && l.tezAm().autoscaling().isEnabled()))); + boolean scrapeHs2 = spec.hiveServer2().autoscaling().isEnabled() || llapOrTezAmAutoscales; + if (scrapeHs2) { + AutoscalingSpec hs2Auto = spec.hiveServer2().autoscaling(); + Map hs2Selector = Labels.selectorForComponent(cluster, ConfigUtils.COMPONENT_HIVESERVER2); + bgScraper.registerOrUpdate(namespace, clusterName, + ConfigUtils.COMPONENT_HIVESERVER2, hs2Selector, + hs2Auto.metricsPort(), hs2Auto.metricsScrapeIntervalSeconds()); + } + + // HiveServer2 + if (spec.hiveServer2().autoscaling().isEnabled()) { + AutoscalingSpec hs2Auto = spec.hiveServer2().autoscaling(); + String hs2Key = namespace + "/" + clusterName + "/" + ConfigUtils.COMPONENT_HIVESERVER2; + int maxStale = hs2Auto.metricsScrapeIntervalSeconds() * 3; + List hs2Metrics = metricsCache.getOrEmpty(hs2Key, maxStale); + + // Two-phase scale-down: check if a pending scale-down from a prior + // reconcile is ready to be applied (annotations have propagated). + PendingScaleDown pending = pendingScaleDowns.get(hs2Key); + if (pending != null) { + if (Duration.between(pending.annotatedAt(), Instant.now()).toSeconds() >= 2) { + patches.put(ConfigUtils.COMPONENT_HIVESERVER2, pending.targetReplicas()); + MANAGED_REPLICAS.put(hs2Key, pending.targetReplicas()); + lastScaleTimes.put(hs2Key, Instant.now().toString()); + pendingScaleDowns.remove(hs2Key); + LOG.info("[hiveserver2] Applying deferred scale-down to {} replicas", pending.targetReplicas()); + } + // Build status even when waiting for pending scale-down + evaluateComponent(cluster, client, namespace, clusterName, + ConfigUtils.COMPONENT_HIVESERVER2, hs2Auto, + spec.hiveServer2().replicas(), new HashMap<>(), statuses, hs2Metrics); + } else { + // Pod deletion cost only applies to Deployments (ReplicaSet controller). + // StatefulSets always scale down by highest ordinal regardless of this + // annotation. LLAP/TezAM graceful drain is handled by preStop hooks. + updateDeploymentPodDeletionCost(client, namespace, hs2Metrics, "hs2_open_sessions"); + + Map hs2Patches = new HashMap<>(); + evaluateComponent(cluster, client, namespace, clusterName, + ConfigUtils.COMPONENT_HIVESERVER2, hs2Auto, + spec.hiveServer2().replicas(), hs2Patches, statuses, hs2Metrics); + + Integer hs2Patch = hs2Patches.get(ConfigUtils.COMPONENT_HIVESERVER2); + int currentReplicas = getCurrentReplicas(client, namespace, clusterName, ConfigUtils.COMPONENT_HIVESERVER2); + if (hs2Patch != null && hs2Patch < currentReplicas) { + // Scale-down: defer to allow deletion-cost annotations to propagate + pendingScaleDowns.put(hs2Key, new PendingScaleDown(hs2Patch, Instant.now())); + LOG.info("[hiveserver2] Deferring scale-down to {} (waiting for deletion-cost propagation)", + hs2Patch); + } else if (hs2Patch != null) { + // Scale-up: apply immediately + patches.put(ConfigUtils.COMPONENT_HIVESERVER2, hs2Patch); + MANAGED_REPLICAS.put(hs2Key, hs2Patch); + } + } + } + + // Metastore + if (spec.metastore().isEnabled() && spec.metastore().autoscaling().isEnabled()) { + AutoscalingSpec msAuto = spec.metastore().autoscaling(); + Map msSelector = Labels.selectorForComponent(cluster, ConfigUtils.COMPONENT_METASTORE); + bgScraper.registerOrUpdate(namespace, clusterName, + ConfigUtils.COMPONENT_METASTORE, msSelector, + msAuto.metricsPort(), msAuto.metricsScrapeIntervalSeconds()); + String msKey = namespace + "/" + clusterName + "/" + ConfigUtils.COMPONENT_METASTORE; + List msMetrics = metricsCache.getOrEmpty(msKey, msAuto.metricsScrapeIntervalSeconds() * 3); + evaluateComponent(cluster, client, namespace, clusterName, + ConfigUtils.COMPONENT_METASTORE, msAuto, + spec.metastore().replicas(), patches, statuses, msMetrics); + } + + // LLAP clusters (each evaluated independently) + for (LlapSpec llapSpec : spec.llapClusters()) { + if (!llapSpec.isEnabled() || !llapSpec.autoscaling().isEnabled()) { + continue; + } + String llapComponentKey = ConfigUtils.llapComponentKey(llapSpec.name()); + AutoscalingSpec llapAuto = llapSpec.autoscaling(); + Map llapSelector = Labels.selectorForLlapCluster(cluster, llapSpec.name()); + bgScraper.registerOrUpdate(namespace, clusterName, + llapComponentKey, llapSelector, + llapAuto.metricsPort(), llapAuto.metricsScrapeIntervalSeconds()); + String llapKey = cacheKey(namespace, clusterName, llapComponentKey); + List llapMetrics = metricsCache.getOrEmpty(llapKey, llapAuto.metricsScrapeIntervalSeconds() * 3); + evaluateComponent(cluster, client, namespace, clusterName, + llapComponentKey, llapAuto, + llapSpec.replicas(), patches, statuses, llapMetrics); + } + + // Per-LLAP TezAM (one TezAM per LLAP cluster, each with its own autoscaling config) + if (spec.tezAm().isEnabled()) { + for (LlapSpec llapSpec : spec.llapClusters()) { + if (!llapSpec.isEnabled()) { + continue; + } + LlapSpec.LlapTezAmSpec perLlapTezAm = llapSpec.tezAm(); + if (!perLlapTezAm.autoscaling().isEnabled()) { + continue; + } + AutoscalingSpec tezAuto = perLlapTezAm.autoscaling(); + String tezAmComponentKey = ConfigUtils.tezAmComponentKey(llapSpec.name()); + Map tezSelector = Labels.selectorForTezAmCluster(cluster, llapSpec.name()); + bgScraper.registerOrUpdate(namespace, clusterName, + tezAmComponentKey, tezSelector, + tezAuto.metricsPort(), tezAuto.metricsScrapeIntervalSeconds()); + String tezKey = cacheKey(namespace, clusterName, tezAmComponentKey); + List tezMetrics = metricsCache.getOrEmpty(tezKey, tezAuto.metricsScrapeIntervalSeconds() * 3); + evaluateComponent(cluster, client, namespace, clusterName, + tezAmComponentKey, tezAuto, + perLlapTezAm.replicas(), patches, statuses, tezMetrics); + } + } + + return new AutoscalingEvaluation(patches, statuses); + } + + /** + * Returns cached HS2 metrics (used by LLAP/TezAM activation gate). + * Non-blocking — reads from the background-scraper cache. + */ + public List getHs2MetricsFromCache(HiveCluster cluster) { + String namespace = cluster.getMetadata().getNamespace(); + String clusterName = cluster.getMetadata().getName(); + String key = namespace + "/" + clusterName + "/" + ConfigUtils.COMPONENT_HIVESERVER2; + int maxStale = cluster.getSpec().hiveServer2().autoscaling().metricsScrapeIntervalSeconds() * 3; + return metricsCache.getOrEmpty(key, maxStale); + } + + private void evaluateComponent(HiveCluster cluster, KubernetesClient client, + String namespace, String clusterName, String component, + AutoscalingSpec autoscaling, int maxReplicas, + Map patches, Map statuses, + List metrics) { + + int currentReplicas = getCurrentReplicas(client, namespace, clusterName, component); + + String key = cacheKey(namespace, clusterName, component); + + // For LLAP and TezAM, scaling decisions are based on HS2 metrics (activation gate), + // not their own pod metrics. Allow evaluation even with 0 own pods. + boolean usesHs2Activation = component.startsWith(ConfigUtils.COMPONENT_LLAP + "-") + || component.startsWith(ConfigUtils.COMPONENT_TEZAM + "-"); + + if (metrics.isEmpty() && !usesHs2Activation) { + LOG.debug("[{}] No ready pods to scrape, skipping", component); + MANAGED_REPLICAS.put(key, currentReplicas); + return; + } + + ComponentAutoscaler autoscaler = autoscalers.computeIfAbsent(key, + k -> new ComponentAutoscaler(component, createStrategy(component, cluster))); + + ComponentAutoscaler.EvaluationResult result = + autoscaler.evaluate(metrics, autoscaling, currentReplicas, maxReplicas); + + // Build status + if (result.patchTo() != null) { + lastScaleTimes.put(key, Instant.now().toString()); + } + AutoscalingStatus as = new AutoscalingStatus(); + as.setCurrentMetricValue(result.rawMetricValue()); + // Only show scaleUpThreshold for strategies that use it (TezAM is demand-based, no threshold) + if (autoscaler.usesScaleUpThreshold()) { + as.setScaleUpThreshold(autoscaling.scaleUpThreshold()); + } + // CPU metrics (only for HS2 and HMS — LLAP/TezAM don't use CPU-based scaling) + if ((ConfigUtils.COMPONENT_HIVESERVER2.equals(component) || ConfigUtils.COMPONENT_METASTORE.equals(component)) + && autoscaling.cpuScaleUpThreshold() > 0) { + as.setCurrentCpuPercent(result.cpuPercent()); + as.setCpuScaleUpThreshold(autoscaling.cpuScaleUpThreshold()); + as.setCpuProposedReplicas(result.cpuProposedReplicas()); + } + as.setProposedReplicas(result.proposedReplicas()); + as.setLastScaleTime(lastScaleTimes.get(key)); + statuses.put(component, as); + + if (result.patchTo() != null) { + int patchValue = result.patchTo(); + patches.put(component, patchValue); + MANAGED_REPLICAS.put(key, patchValue); + } else { + // No change needed — record current replicas as the managed value + MANAGED_REPLICAS.put(key, currentReplicas); + } + } + + private ScalingStrategy createStrategy(String component, HiveCluster cluster) { + if (component.startsWith(ConfigUtils.COMPONENT_LLAP + "-")) { + String llapName = component.substring(ConfigUtils.COMPONENT_LLAP.length() + 1); + return new LlapScalingStrategy(this, cluster, llapName); + } + if (component.startsWith(ConfigUtils.COMPONENT_TEZAM + "-")) { + String llapName = component.substring(ConfigUtils.COMPONENT_TEZAM.length() + 1); + return new TezAmScalingStrategy(this, cluster, llapName); + } + return switch (component) { + case ConfigUtils.COMPONENT_HIVESERVER2 -> new HiveServer2ScalingStrategy(); + case ConfigUtils.COMPONENT_METASTORE -> new MetastoreScalingStrategy(); + default -> throw new IllegalArgumentException("Unknown component: " + component); + }; + } + + private int getCurrentReplicas(KubernetesClient client, String namespace, + String clusterName, String component) { + // Component key → workload name mapping: + // "llap-{name}" → "{cluster}-{name}" + // "tezam-{name}" → "{cluster}-tezam-{name}" + // other → "{cluster}-{component}" + String workloadName; + if (component.startsWith(ConfigUtils.COMPONENT_LLAP + "-")) { + String llapName = component.substring(ConfigUtils.COMPONENT_LLAP.length() + 1); + workloadName = clusterName + "-" + llapName; + } else if (component.startsWith(ConfigUtils.COMPONENT_TEZAM + "-")) { + String llapName = component.substring(ConfigUtils.COMPONENT_TEZAM.length() + 1); + workloadName = clusterName + "-tezam-" + llapName; + } else { + workloadName = clusterName + "-" + component; + } + if (component.startsWith(ConfigUtils.COMPONENT_LLAP + "-") + || component.startsWith(ConfigUtils.COMPONENT_TEZAM + "-")) { + var ss = client.apps().statefulSets() + .inNamespace(namespace).withName(workloadName).get(); + return ss != null && ss.getSpec().getReplicas() != null ? ss.getSpec().getReplicas() : 0; + } else { + var deploy = client.apps().deployments() + .inNamespace(namespace).withName(workloadName).get(); + return deploy != null && deploy.getSpec().getReplicas() != null + ? deploy.getSpec().getReplicas() : 0; + } + } + + /** + * Patches each pod's deletion cost annotation based on its active session count. + * Kubernetes uses this during scale-down to kill idle pods first (lower cost = killed first). + *

+ * Only meaningful for Deployments (HS2, Metastore) — the ReplicaSet controller + * respects this annotation. StatefulSets ignore it and always terminate by ordinal. + */ + private void updateDeploymentPodDeletionCost(KubernetesClient client, String namespace, + List metrics, String metricName) { + for (PodMetrics pm : metrics) { + int sessions = pm.metrics().getOrDefault(metricName, 0.0).intValue(); + try { + client.pods().inNamespace(namespace).withName(pm.podName()) + .edit(pod -> { + if (pod.getMetadata().getAnnotations() == null) { + pod.getMetadata().setAnnotations(new java.util.HashMap<>()); + } + pod.getMetadata().getAnnotations() + .put("controller.kubernetes.io/pod-deletion-cost", String.valueOf(sessions)); + return pod; + }); + } catch (Exception e) { + LOG.debug("Failed to update deletion cost for pod {}: {}", pm.podName(), e.getMessage()); + } + } + } + +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/HiveServer2ScalingStrategy.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/HiveServer2ScalingStrategy.java new file mode 100644 index 000000000000..92c826b29af4 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/HiveServer2ScalingStrategy.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.List; + +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; + +/** + * Scaling strategy for HiveServer2. + * desired = ceil(sum(hs2_open_sessions across all pods) / scaleUpThreshold) + * Uses sum() so that each session is counted — prevents premature scale-down + * of pods that still have active sessions. + */ +public class HiveServer2ScalingStrategy implements ScalingStrategy { + + static final String METRIC_OPEN_SESSIONS = "hs2_open_sessions"; + + private int lastMetric; + + @Override + public int computeDesiredReplicas(List podMetrics, + AutoscalingSpec autoscaling, int maxReplicas) { + // HS2 is the cluster entry point — scaling to 0 makes the cluster unreachable. + // Enforce floor of 1 regardless of CRD defaults or user misconfiguration. + int safeMinReplicas = Math.max(1, autoscaling.minReplicas()); + + double totalSessions = 0; + for (PodMetrics pm : podMetrics) { + totalSessions += pm.metrics().getOrDefault(METRIC_OPEN_SESSIONS, 0.0); + } + + lastMetric = (int) totalSessions; + + if (totalSessions <= 0) { + return safeMinReplicas; + } + + int threshold = Math.max(1, autoscaling.scaleUpThreshold()); + int desired = (int) Math.ceil(totalSessions / threshold); + return Math.max(desired, safeMinReplicas); + } + + @Override + public int lastMetricValue() { + return lastMetric; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/LlapScalingStrategy.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/LlapScalingStrategy.java new file mode 100644 index 000000000000..1b913492791f --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/LlapScalingStrategy.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.List; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scaling strategy for LLAP daemons. + * Formula: avg(QueuedRequests + Configured - Available) across all pods. + * This represents average "busy slots + queued" per daemon. + * desired = ceil(avg_busy / scaleUpThreshold) + *

+ * Activation gate: only scale if HS2 has open sessions (prevents zombie scaling). + */ +public class LlapScalingStrategy implements ScalingStrategy { + + private static final Logger LOG = LoggerFactory.getLogger(LlapScalingStrategy.class); + + static final String METRIC_QUEUED = "hadoop_llapdaemon_executornumqueuedrequests"; + static final String METRIC_CONFIGURED = "hadoop_llapdaemon_executornumexecutorsconfigured"; + static final String METRIC_AVAILABLE = "hadoop_llapdaemon_executornumexecutorsavailable"; + static final String METRIC_LLAP_TARGET_PREFIX = "hs2_llap_target_sessions_"; + + private final HiveClusterAutoscaler orchestrator; + private final HiveCluster cluster; + private final String llapName; + private int lastMetric; + + public LlapScalingStrategy(HiveClusterAutoscaler orchestrator, HiveCluster cluster, String llapName) { + this.orchestrator = orchestrator; + this.cluster = cluster; + this.llapName = llapName; + } + + @Override + public int computeDesiredReplicas(List podMetrics, + AutoscalingSpec autoscaling, int maxReplicas) { + + // Activation gate: check if HS2 has any open sessions. + // If scrape returns empty but LLAP has running pods, treat as "unknown" and preserve. + // This prevents spurious scale-to-zero from transient scrape failures after operator restart. + List hs2Metrics = orchestrator.getHs2MetricsFromCache(cluster); + Boolean sessionsDetected = detectHs2Sessions(hs2Metrics); + if (sessionsDetected == null && !podMetrics.isEmpty()) { + // HS2 scrape returned no data but LLAP is running — hold current state + LOG.debug("[llap] HS2 scrape returned no pods; preserving LLAP (has {} running pods)", podMetrics.size()); + lastMetric = 0; + return Math.max(1, autoscaling.minReplicas()); + } + if (sessionsDetected == null || !sessionsDetected) { + LOG.debug("[llap] HS2 has no open sessions, scaling to minReplicas"); + lastMetric = 0; + return autoscaling.minReplicas(); + } + + // HS2 has sessions but LLAP has no pods yet — scale up to at least 1 + if (podMetrics.isEmpty()) { + int minReplica = Math.max(1, autoscaling.minReplicas()); + LOG.debug("[llap] HS2 has sessions but LLAP has 0 pods, scaling to {}", minReplica); + lastMetric = 0; + return minReplica; + } + + // Compute average busy slots across all LLAP pods + double totalBusy = 0; + int podCount = 0; + for (PodMetrics pm : podMetrics) { + double queued = pm.metrics().getOrDefault(METRIC_QUEUED, 0.0); + double configured = pm.metrics().getOrDefault(METRIC_CONFIGURED, 0.0); + double available = pm.metrics().getOrDefault(METRIC_AVAILABLE, 0.0); + double busy = queued + configured - available; + totalBusy += busy; + podCount++; + } + + double avgBusy = totalBusy / podCount; + lastMetric = (int) Math.round(avgBusy); + + if (avgBusy <= 0) { + // HS2 has sessions (passed activation gate above) but executors are idle between queries. + // Keep at least 1 daemon to avoid flapping: scaling to 0 here would cause immediate + // scale-back-up on the next evaluation when the empty-pod path triggers. + return Math.max(1, autoscaling.minReplicas()); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("[llap] avgBusy={}, threshold={}", String.format("%.2f", avgBusy), + autoscaling.scaleUpThreshold()); + } + + int threshold = Math.max(1, autoscaling.scaleUpThreshold()); + int desired = (int) Math.ceil(avgBusy / threshold); + return Math.max(desired, autoscaling.minReplicas()); + } + + @Override + public int lastMetricValue() { + return lastMetric; + } + + /** + * Detect HS2 sessions targeting this specific LLAP cluster. + * First tries the per-target metric (hs2_llap_target_sessions_{llapName}) which is + * available when HS2 has the per-LLAP-cluster session tracking patch. + * Falls back to the generic hs2_open_sessions for backward compatibility only if + * HS2 does NOT expose any per-target metrics at all (i.e. older HS2 image). + * + * @return true if sessions > 0, false if scraped and all 0, null if scrape returned no pods + * (ambiguous — could be transient failure or HS2 genuinely absent) + */ + private Boolean detectHs2Sessions(List hs2Metrics) { + if (hs2Metrics.isEmpty()) { + return null; + } + + // Check if HS2 supports per-target metrics (any metric with the prefix exists). + // If it does, use only the per-target metric for this cluster — a missing metric + // means 0 sessions targeting this cluster (the gauge is registered lazily on first connect). + String targetMetric = METRIC_LLAP_TARGET_PREFIX + llapName; + boolean hs2SupportsTargetMetrics = false; + for (PodMetrics pm : hs2Metrics) { + for (String key : pm.metrics().keySet()) { + if (key.startsWith(METRIC_LLAP_TARGET_PREFIX)) { + hs2SupportsTargetMetrics = true; + break; + } + } + if (hs2SupportsTargetMetrics) { + break; + } + } + + if (hs2SupportsTargetMetrics) { + // HS2 has per-target tracking: check only our specific metric + for (PodMetrics pm : hs2Metrics) { + Double val = pm.metrics().get(targetMetric); + if (val != null && val > 0) { + return true; + } + } + return false; + } + + // Fallback: generic hs2_open_sessions (older HS2 without per-target metrics) + for (PodMetrics pm : hs2Metrics) { + double sessions = pm.metrics().getOrDefault( + HiveServer2ScalingStrategy.METRIC_OPEN_SESSIONS, 0.0); + if (sessions > 0) { + return true; + } + } + return false; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetastoreScalingStrategy.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetastoreScalingStrategy.java new file mode 100644 index 000000000000..fc4b9e30f499 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetastoreScalingStrategy.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scaling strategy for Hive Metastore. + * HMS uses HTTP transport — connections are per-request (stateless), so + * open_connections is always ~0. Instead we compute API request rate: + * rate = (sum(api_*_total) - previous_sum) / elapsed_seconds. + * desired = ceil(rate / scaleUpThreshold) + */ +public class MetastoreScalingStrategy implements ScalingStrategy { + + private static final Logger LOG = LoggerFactory.getLogger(MetastoreScalingStrategy.class); + private static final String API_COUNTER_PREFIX = "api_"; + private static final String API_COUNTER_SUFFIX = "_total"; + + // Previous scrape state for rate computation + private final ConcurrentHashMap previousCounters = new ConcurrentHashMap<>(); + private long previousTimestampMs = 0; + private int lastMetric; + + @Override + public int computeDesiredReplicas(List podMetrics, + AutoscalingSpec autoscaling, int maxReplicas) { + + // Sum all api_*_total counters across all pods + double currentTotal = 0; + for (PodMetrics pm : podMetrics) { + for (Map.Entry entry : pm.metrics().entrySet()) { + String name = entry.getKey(); + if (name.startsWith(API_COUNTER_PREFIX) && name.endsWith(API_COUNTER_SUFFIX)) { + currentTotal += entry.getValue(); + } + } + } + + long now = System.currentTimeMillis(); + double rate = 0; + + if (previousTimestampMs > 0) { + double elapsedSeconds = (now - previousTimestampMs) / 1000.0; + if (elapsedSeconds > 0) { + double previousTotal = previousCounters.values().stream() + .mapToDouble(Double::doubleValue).sum(); + double delta = currentTotal - previousTotal; + if (delta < 0) { + // Counter reset (pod restart) — skip this sample + delta = 0; + } + rate = delta / elapsedSeconds; + } + } + + // Store current state for next evaluation + previousCounters.clear(); + previousCounters.put(API_COUNTER_SUFFIX, currentTotal); + previousTimestampMs = now; + + lastMetric = (int) Math.round(rate); + + if (rate <= 0) { + return autoscaling.minReplicas(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("[metastore] API request rate: {}/s, threshold: {}", + String.format("%.2f", rate), autoscaling.scaleUpThreshold()); + } + + int threshold = Math.max(1, autoscaling.scaleUpThreshold()); + int desired = (int) Math.ceil(rate / threshold); + return Math.max(desired, autoscaling.minReplicas()); + } + + @Override + public int lastMetricValue() { + return lastMetric; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetricsCache.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetricsCache.java new file mode 100644 index 000000000000..687ba739de86 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetricsCache.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Thread-safe cache for scraped Prometheus metrics from pods. + * Entries become stale after a configurable duration (typically 3x the scrape interval) + * and are treated as absent when read. + */ +public class MetricsCache { + + private record CachedResult(List metrics, Instant scrapedAt) {} + + private final ConcurrentHashMap cache = new ConcurrentHashMap<>(); + + /** + * Stores scraped metrics for a component. + * + * @param key format: "namespace/clusterName/component" + * @param metrics the scraped pod metrics + */ + public void put(String key, List metrics) { + cache.put(key, new CachedResult(metrics, Instant.now())); + } + + /** + * Returns cached metrics if present and not stale, otherwise an empty list. + * + * @param key format: "namespace/clusterName/component" + * @param maxStaleSecs maximum age in seconds before the entry is considered stale + * @return the cached metrics, or an empty list if absent or stale + */ + public List getOrEmpty(String key, int maxStaleSecs) { + CachedResult result = cache.get(key); + if (result == null + || Instant.now().isAfter(result.scrapedAt().plusSeconds(maxStaleSecs))) { + return Collections.emptyList(); + } + return result.metrics(); + } + + /** + * Removes all entries whose key starts with the given prefix. + * Used for cleanup when a HiveCluster is deleted. + */ + public void removeByPrefix(String prefix) { + cache.keySet().removeIf(k -> k.startsWith(prefix)); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetricsScraper.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetricsScraper.java new file mode 100644 index 000000000000..c3aa8aa82ca8 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/MetricsScraper.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; + +import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.client.KubernetesClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scrapes Prometheus-format metrics from JMX Exporter endpoints on pods. + * Uses pod IPs directly (no Service or Prometheus intermediary). + * All pods are scraped concurrently to avoid blocking the reconciler loop. + */ +public class MetricsScraper { + + private static final Logger LOG = LoggerFactory.getLogger(MetricsScraper.class); + private static final Duration TIMEOUT = Duration.ofSeconds(5); + + private final KubernetesClient client; + private final HttpClient httpClient; + + public MetricsScraper(KubernetesClient client) { + this.client = client; + this.httpClient = HttpClient.newBuilder() + .connectTimeout(TIMEOUT) + .build(); + } + + /** + * Scrape metrics from all ready pods matching the given label selector. + * Pods are scraped concurrently — total wall-clock time is bounded by + * a single pod's timeout (5s) regardless of pod count. + * + * @param namespace the namespace to query + * @param selector label selector (e.g., app.kubernetes.io/component=hiveserver2) + * @param metricsPort the port on which the Prometheus JMX Exporter serves metrics + * @return list of per-pod metrics (empty if no pods or all fail) + */ + public List scrape(String namespace, Map selector, int metricsPort) { + List pods; + try { + pods = client.pods() + .inNamespace(namespace) + .withLabels(selector) + .list() + .getItems(); + } catch (Exception e) { + LOG.warn("Failed to list pods in {}/{}: {}", namespace, selector, e.getMessage()); + return Collections.emptyList(); + } + + // Filter to ready pods with IPs + List scrapeable = new ArrayList<>(); + for (Pod pod : pods) { + if (isPodReady(pod) && pod.getStatus().getPodIP() != null + && !pod.getStatus().getPodIP().isEmpty()) { + scrapeable.add(pod); + } + } + + if (scrapeable.isEmpty()) { + return Collections.emptyList(); + } + + // Scrape all pods concurrently + List> futures = new ArrayList<>(scrapeable.size()); + for (Pod pod : scrapeable) { + String podName = pod.getMetadata().getName(); + String podIp = pod.getStatus().getPodIP(); + futures.add(fetchMetricsAsync(podIp, metricsPort) + .thenApply(body -> new PodMetrics(podName, PrometheusTextParser.parse(body))) + .exceptionally(ex -> { + LOG.debug("Failed to scrape metrics from pod {}: {}", podName, ex.getMessage()); + return null; + })); + } + + // Wait for all to complete (bounded by TIMEOUT per pod, but all run in parallel) + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); + + List results = new ArrayList<>(); + for (CompletableFuture f : futures) { + PodMetrics pm = f.join(); + if (pm != null) { + results.add(pm); + } + } + return results; + } + + private CompletableFuture fetchMetricsAsync(String podIp, int metricsPort) { + URI uri = URI.create("http://" + podIp + ":" + metricsPort + "/metrics"); + HttpRequest request = HttpRequest.newBuilder() + .uri(uri) + .timeout(TIMEOUT) + .GET() + .build(); + return httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) + .thenApply(response -> { + if (response.statusCode() != 200) { + throw new RuntimeException("HTTP " + response.statusCode() + " from " + uri); + } + return response.body(); + }); + } + + private static boolean isPodReady(Pod pod) { + if (pod.getStatus() == null || pod.getStatus().getConditions() == null) { + return false; + } + return pod.getStatus().getConditions().stream() + .anyMatch(c -> "Ready".equals(c.getType()) && "True".equals(c.getStatus())); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/PodMetrics.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/PodMetrics.java new file mode 100644 index 000000000000..1fe54e98e1b3 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/PodMetrics.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.Map; + +/** Metrics scraped from a single pod's JMX Exporter endpoint. */ +public record PodMetrics(String podName, Map metrics) { +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/PrometheusTextParser.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/PrometheusTextParser.java new file mode 100644 index 000000000000..babee17aa77d --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/PrometheusTextParser.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; + +/** + * Parses Prometheus text exposition format (from JMX Exporter /metrics). + * Only extracts metric name → value pairs; labels are stripped. + * For metrics with labels, the full line (name + labels) is used as key. + */ +public final class PrometheusTextParser { + + private PrometheusTextParser() { + } + + /** + * Parse Prometheus text format into metric-name → value map. + * Labels are stripped from keys; duplicate metric names (from multiple + * label sets) are summed. + */ + public static Map parse(String body) { + return doParse(body, false); + } + + /** + * Parse and return per-label-set metrics (preserving labels in key). + * Key format: "metric_name{label=value,...}" + */ + public static Map parseWithLabels(String body) { + return doParse(body, true); + } + + private static Map doParse(String body, boolean keepLabels) { + Map result = new HashMap<>(); + if (StringUtils.isEmpty(body)) { + return result; + } + try (BufferedReader reader = new BufferedReader(new StringReader(body))) { + String line; + while ((line = reader.readLine()) != null) { + if (line.isEmpty() || line.charAt(0) == '#') { + continue; + } + String metricKey; + String valuePart; + int braceStart = line.indexOf('{'); + if (braceStart >= 0) { + int braceEnd = line.indexOf('}', braceStart); + if (braceEnd < 0) { + continue; + } + metricKey = keepLabels ? line.substring(0, braceEnd + 1) : line.substring(0, braceStart); + valuePart = line.substring(braceEnd + 1).trim(); + } else { + int spaceIdx = line.indexOf(' '); + if (spaceIdx < 0) { + continue; + } + metricKey = line.substring(0, spaceIdx); + valuePart = line.substring(spaceIdx + 1).trim(); + } + int spaceInValue = valuePart.indexOf(' '); + if (spaceInValue > 0) { + valuePart = valuePart.substring(0, spaceInValue); + } + try { + double value = Double.parseDouble(valuePart); + if (keepLabels) { + result.put(metricKey, value); + } else { + result.merge(metricKey, value, Double::sum); + } + } catch (NumberFormatException e) { + // Skip NaN, +Inf, -Inf, or malformed values + } + } + } catch (IOException e) { + // StringReader does not throw IOException + } + return result; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/ScalingStrategy.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/ScalingStrategy.java new file mode 100644 index 000000000000..0724fa5f3d9b --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/ScalingStrategy.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.List; + +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; + +/** Strategy for computing desired replica count from scraped pod metrics. */ +public interface ScalingStrategy { + + /** + * Compute desired replica count based on current pod metrics. + * + * @param podMetrics metrics from all pods of this component + * @param autoscaling the autoscaling configuration + * @param maxReplicas maximum allowed replicas + * @return desired replica count (before stabilization/clamping) + */ + int computeDesiredReplicas(List podMetrics, + AutoscalingSpec autoscaling, int maxReplicas); + + /** + * Returns the raw metric value from the last evaluation (e.g. total sessions, + * request rate, busy slots). Used for status reporting. + */ + default int lastMetricValue() { + return 0; + } + + /** + * Whether this strategy uses scaleUpThreshold from the spec. + * Strategies that are purely demand-based (e.g. TezAM: 1 TezAM per session) + * return false so the threshold is not displayed in status. + */ + default boolean usesScaleUpThreshold() { + return true; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/StabilizationWindow.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/StabilizationWindow.java new file mode 100644 index 000000000000..665b9ac6ef2b --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/StabilizationWindow.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayDeque; +import java.util.Deque; + +/** + * HPA-like stabilization window that smooths scaling decisions. + * Keeps a sliding window of desired replica samples and returns + * the max (for scale-up) or min (for scale-down) over the window. + */ +public class StabilizationWindow { + + private record Sample(Instant timestamp, int value) { + } + + private final Deque samples = new ArrayDeque<>(); + private final Duration window; + + public StabilizationWindow(Duration window) { + this.window = window; + } + + /** Record a new desired replica sample. */ + public void record(int desiredReplicas) { + Instant now = Instant.now(); + evictExpired(now); + samples.addLast(new Sample(now, desiredReplicas)); + } + + /** Returns the maximum value in the window (used for scale-up decisions). */ + public int stabilizedMax() { + evictExpired(Instant.now()); + return samples.stream().mapToInt(Sample::value).max().orElse(0); + } + + /** Returns the minimum value in the window (used for scale-down decisions). */ + public int stabilizedMin() { + evictExpired(Instant.now()); + return samples.stream().mapToInt(Sample::value).min().orElse(0); + } + + /** Returns true if the window has at least one sample. */ + public boolean hasSamples() { + evictExpired(Instant.now()); + return !samples.isEmpty(); + } + + private void evictExpired(Instant now) { + Instant cutoff = now.minus(window); + while (!samples.isEmpty() && samples.peekFirst().timestamp().isBefore(cutoff)) { + samples.pollFirst(); + } + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/TezAmScalingStrategy.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/TezAmScalingStrategy.java new file mode 100644 index 000000000000..bc43905a1b38 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/autoscaling/TezAmScalingStrategy.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.autoscaling; + +import java.util.List; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Scaling strategy for per-LLAP TezAM instances. + * Each TezAM follows its paired LLAP cluster's lifecycle: it should be up + * when there are sessions targeting that LLAP cluster, and at 0 otherwise. + *

+ * Uses the per-target session metric from HS2: hs2_llap_target_sessions_{llapName}. + * Falls back to hs2_open_sessions if per-target metrics are not available. + */ +public class TezAmScalingStrategy implements ScalingStrategy { + + private static final Logger LOG = LoggerFactory.getLogger(TezAmScalingStrategy.class); + + private final HiveClusterAutoscaler orchestrator; + private final HiveCluster cluster; + private final String llapName; + private int lastMetric; + + public TezAmScalingStrategy(HiveClusterAutoscaler orchestrator, + HiveCluster cluster, String llapName) { + this.orchestrator = orchestrator; + this.cluster = cluster; + this.llapName = llapName; + } + + @Override + public int computeDesiredReplicas(List podMetrics, + AutoscalingSpec autoscaling, int maxReplicas) { + + List hs2Metrics = orchestrator.getHs2MetricsFromCache(cluster); + + // Activation gate: if HS2 scrape returns no data but TezAM has running pods, + // treat as "unknown" and preserve current state to avoid spurious scale-to-zero. + if (hs2Metrics.isEmpty() && !podMetrics.isEmpty()) { + LOG.debug("[tezam-{}] HS2 scrape returned no pods; preserving TezAM", llapName); + lastMetric = 0; + return Math.max(1, autoscaling.minReplicas()); + } + + // Use per-LLAP target sessions metric (same logic as LlapScalingStrategy). + String targetMetric = "hs2_llap_target_sessions_" + llapName; + boolean anyPerTargetMetricExists = false; + double targetSessions = 0; + + for (PodMetrics pm : hs2Metrics) { + // Check if HS2 exposes ANY per-target metric (feature support check) + for (String key : pm.metrics().keySet()) { + if (key.startsWith("hs2_llap_target_sessions_")) { + anyPerTargetMetricExists = true; + break; + } + } + targetSessions += pm.metrics().getOrDefault(targetMetric, 0.0); + } + + if (!anyPerTargetMetricExists && !hs2Metrics.isEmpty()) { + // HS2 doesn't support per-target metrics — fall back to total sessions + double totalSessions = 0; + for (PodMetrics pm : hs2Metrics) { + totalSessions += pm.metrics().getOrDefault( + HiveServer2ScalingStrategy.METRIC_OPEN_SESSIONS, 0.0); + } + targetSessions = totalSessions; + } + + if (targetSessions <= 0) { + LOG.debug("[tezam-{}] No sessions targeting this cluster, scaling to minReplicas", llapName); + lastMetric = 0; + return autoscaling.minReplicas(); + } + + lastMetric = (int) targetSessions; + + // TezAM desired: at least 1 when there are sessions, capped at maxReplicas + int desired = (int) Math.ceil(targetSessions); + desired = Math.min(desired, maxReplicas); + + LOG.debug("[tezam-{}] targetSessions={}, desired={}", llapName, targetSessions, desired); + + return Math.max(desired, autoscaling.minReplicas()); + } + + @Override + public int lastMetricValue() { + return lastMetric; + } + + @Override + public boolean usesScaleUpThreshold() { + return false; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java deleted file mode 100644 index 6c0f9308dbc1..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HadoopConfigMapDependent.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the Hadoop core-site.xml ConfigMap for filesystem configuration. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=hadoop-config," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class HadoopConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "hadoop-config"; - - public HadoopConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - Map props = - HiveConfigBuilder.getHadoopCoreSite(hiveCluster.getSpec()); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("core-site.xml", HadoopXmlBuilder.buildXml(props)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-hadoop-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java new file mode 100644 index 000000000000..411e31cb5a0d --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveConfigMapDependent.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; + +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Unified ConfigMap dependent resource for all Hive component configurations. + * Subclassed per component to define the specific XML data and label selector. + */ +public abstract class HiveConfigMapDependent extends HiveDependentResource { + + private final String component; + private final String suffix; + + protected HiveConfigMapDependent(String component, String suffix) { + super(ConfigMap.class); + this.component = component; + this.suffix = suffix; + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, Context context) { + return primary.getMetadata().getName() + "-" + suffix; + } + + @Override + protected ConfigMap desired(HiveCluster hiveCluster, Context context) { + ConfigMapBuilder builder = + new ConfigMapBuilder().withNewMetadata().withName(hiveCluster.getMetadata().getName() + "-" + suffix) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)).endMetadata(); + addData(builder, hiveCluster); + return builder.build(); + } + + /** + * Subclasses add their specific XML data entries. + */ + protected abstract void addData(ConfigMapBuilder builder, HiveCluster hiveCluster); + + /** + * Hadoop core-site.xml ConfigMap for filesystem configuration. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=hadoop-config," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class Hadoop extends HiveConfigMapDependent { + public Hadoop() { + super("hadoop-config", "hadoop-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + builder.addToData("core-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(hiveCluster.getSpec()))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hadoop-config"; + } + } + + /** + * Metastore metastore-site.xml ConfigMap. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class Metastore extends HiveConfigMapDependent { + public Metastore() { + super(ConfigUtils.COMPONENT_METASTORE, "metastore-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + builder.addToData("metastore-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(hiveCluster.getSpec()))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-metastore-config"; + } + } + + /** + * HiveServer2 hive-site.xml + tez-site.xml ConfigMap. + */ + @KubernetesDependent(informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator")) + public static class HiveServer2 extends HiveConfigMapDependent { + public HiveServer2() { + super(ConfigUtils.COMPONENT_HIVESERVER2, "hiveserver2-config"); + } + + @Override + protected void addData(ConfigMapBuilder builder, HiveCluster hiveCluster) { + HiveClusterSpec spec = hiveCluster.getSpec(); + builder.addToData("hive-site.xml", + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec))); + builder.addToData("tez-site.xml", HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec))); + } + + public static String resourceName(HiveCluster hiveCluster) { + return hiveCluster.getMetadata().getName() + "-hiveserver2-config"; + } + } + +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java index cc2eb0de6de0..2315b455d760 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveDependentResource.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import io.fabric8.kubernetes.api.model.AffinityBuilder; import io.fabric8.kubernetes.api.model.Container; import io.fabric8.kubernetes.api.model.ContainerBuilder; @@ -44,12 +45,15 @@ import io.javaoperatorsdk.operator.api.reconciler.Context; import io.javaoperatorsdk.operator.processing.dependent.Matcher; import io.javaoperatorsdk.operator.processing.dependent.kubernetes.CRUDKubernetesDependentResource; +import org.apache.hive.kubernetes.operator.autoscaling.HiveClusterAutoscaler; import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; import org.apache.hive.kubernetes.operator.model.spec.ResourceRequirementsSpec; import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; import org.apache.hive.kubernetes.operator.model.spec.ProbeSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,32 +82,37 @@ protected HiveDependentResource(Class resourceType) { super(resourceType); } + /** - * Catches 409 AlreadyExists during resource creation caused by - * informer lag — the resource exists on the API server but - * the informer cache hasn't indexed it yet, so JOSDK calls - * create directly. + * Returns the expected Kubernetes resource name for this dependent. + * Used to disambiguate when multiple dependents share the same resource + * type (e.g., multiple ConfigMap or Service dependents). Subclasses that + * share a resource type MUST override this method. + * + * @throws IllegalStateException if not overridden and disambiguation is needed */ - @Override - protected R handleCreate(R desired, P primary, Context

context) { - try { - return super.handleCreate(desired, primary, context); - } catch (KubernetesClientException e) { - if (e.getCode() == 409) { - LOG.info("Resource {} already exists (informer lag), " - + "will reconcile on next event", - desired.getMetadata().getName()); - return desired; - } - throw e; - } + protected String getSecondaryResourceName(P primary, Context

context) { + throw new IllegalStateException( + getClass().getSimpleName() + " must override getSecondaryResourceName() " + + "when multiple dependents share the same resource type"); } @Override public Optional getSecondaryResource(P primary, Context

context) { return eventSource() - .flatMap(es -> es.getSecondaryResource(primary)); + .flatMap(es -> { + Set resources = es.getSecondaryResources(primary); + if (resources.isEmpty()) { + return Optional.empty(); + } + String expectedName = getSecondaryResourceName(primary, + context); + return resources.stream() + .filter(r -> expectedName.equals( + r.getMetadata().getName())) + .findFirst(); + }); } /** @@ -125,6 +134,209 @@ public Matcher.Result match(R actualResource, R desired, return super.match(actualResource, desired, primary, context); } + @Override + protected R handleCreate(R desired, P primary, Context

context) { + try { + return super.handleCreate(desired, primary, context); + } catch (KubernetesClientException e) { + if (e.getCode() == 409) { + LOG.info("Resource {} already exists (informer lag), " + + "will reconcile on next event", + desired.getMetadata().getName()); + return desired; + } + throw e; + } + } + + /** + * Resolves the replica count to set in the desired workload spec. + *

+ * Always returns an explicit value — never null. Returning null would cause + * JOSDK/SSA to omit spec.replicas, and Kubernetes would default it to 1. + *

+ * When autoscaling is enabled: + * - On CREATE: returns initialReplicas (minReplicas for the component) + * - On UPDATE: returns the autoscaler's managed value, or falls back to + * the current actual replicas from the informer cache. + *

+ * When autoscaling is disabled: returns staticReplicas (the spec value). + */ + protected Integer resolveReplicaCount(P primary, Context

context, + AutoscalingSpec autoscaling, int staticReplicas, int initialReplicas) { + // Suspended cluster → 0 replicas (dependent resources natively respect suspend). + // Exception: HMS stays running if includeMetastore=false in autoSuspend config. + if (primary instanceof HiveCluster hc && hc.getSpec().suspend()) { + boolean isMetastore = ConfigUtils.COMPONENT_METASTORE.equals(getComponentName()); + if (!isMetastore || hc.getSpec().autoSuspend().includeMetastore()) { + return 0; + } + } + if (autoscaling == null || !autoscaling.isEnabled()) { + return staticReplicas; + } + Optional existing = getSecondaryResource(primary, context); + if (existing.isPresent()) { + // Check if the autoscaler has made a decision during this operator's lifecycle + Integer managed = HiveClusterAutoscaler.getManagedReplicas( + primary.getMetadata().getNamespace(), + primary.getMetadata().getName(), + getComponentName()); + if (managed != null) { + return managed; + } + // Fallback: operator restarted and MANAGED_REPLICAS is empty — read current value + R resource = existing.get(); + if (resource instanceof io.fabric8.kubernetes.api.model.apps.Deployment d) { + return d.getSpec() != null && d.getSpec().getReplicas() != null + ? d.getSpec().getReplicas() : initialReplicas; + } + if (resource instanceof io.fabric8.kubernetes.api.model.apps.StatefulSet s) { + return s.getSpec() != null && s.getSpec().getReplicas() != null + ? s.getSpec().getReplicas() : initialReplicas; + } + return initialReplicas; + } + // First creation: start at minReplicas. + return initialReplicas; + } + + + /** + * Returns the component name for this dependent (used for autoscaler replica lookup). + * Subclasses should override if they manage a workload with autoscaling. + */ + protected String getComponentName() { + return null; + } + + /** + * Builds a preStop drain script that polls a single Prometheus metric + * (from the JMX Exporter at localhost:9404/metrics) until the value + * reaches zero, then exits to allow graceful pod termination. + * + * @param startupMessage logged at the start (e.g. "Waiting for open connections to drain") + * @param metricName Prometheus metric name (used in grep and log messages) + * @param varName shell variable name for the extracted value (e.g. "CONNS") + * @param idleMessage logged when idle condition is met (e.g. "All connections drained. Shutting down.") + * @param sleepSeconds polling interval in seconds + * @param maxRetries max consecutive curl failures before giving up + * @param prefixCommands optional commands to run before the polling loop (may be null) + */ + protected static String buildDrainScript( + String startupMessage, String metricName, String varName, + String idleMessage, int sleepSeconds, int maxRetries, + List prefixCommands, int metricsPort) { + List lines = new ArrayList<>(); + lines.add("#!/bin/bash"); + if (prefixCommands != null) { + lines.addAll(prefixCommands); + } + lines.add("echo '[preStop] " + startupMessage + + " (polling localhost:" + metricsPort + "/metrics)...'"); + lines.add("RETRIES=0"); + lines.add("while true; do"); + lines.add(" RESPONSE=$(curl -sf http://localhost:" + metricsPort + "/metrics)"); + lines.add(" if [ $? -ne 0 ]; then"); + lines.add(" RETRIES=$((RETRIES+1))"); + lines.add(" echo \"[preStop] ERROR: JMX Exporter unreachable on port " + metricsPort + " (attempt $RETRIES)\""); + lines.add(" if [ $RETRIES -ge " + maxRetries + " ]; then"); + lines.add(" echo '[preStop] JMX Exporter not responding after " + + (maxRetries * sleepSeconds) + "s. Proceeding with shutdown.'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" sleep " + sleepSeconds + "; continue"); + lines.add(" fi"); + lines.add(" " + varName + "=$(echo \"$RESPONSE\" | grep '^" + + metricName + " ' | awk '{print $2}')"); + lines.add(" if [ -z \"$" + varName + "\" ]; then"); + lines.add(" echo '[preStop] WARNING: " + metricName + + " metric not found. JMX Exporter may not be configured.'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" if [ \"${" + varName + "%.*}\" -le 0 ] 2>/dev/null; then"); + lines.add(" echo '[preStop] " + idleMessage + "'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" echo \"[preStop] " + metricName + "=$" + varName + " - waiting...\""); + lines.add(" RETRIES=0"); + lines.add(" sleep " + sleepSeconds); + lines.add("done"); + // Send SIGTERM directly to the Java process. Shell entrypoint scripts + // (PID 1) often don't forward signals, so K8s SIGTERM never reaches + // the JVM — causing a full grace-period wait before SIGKILL. + // Use 'java' pattern to avoid matching this script itself. + lines.add("echo '[preStop] Sending SIGTERM to Java process...'"); + lines.add("pkill -f 'java.*org.apache' || true"); + lines.add("exit 0"); + return String.join("\n", lines); + } + + /** + * Builds a preStop drain script that polls two Prometheus metrics and + * waits until available >= total (all executors idle). Used by LLAP. + * + * @param startupMessage logged at the start + * @param metricGrepA grep pattern for the first metric (e.g. includes trailing '{') + * @param varNameA shell variable for the first metric value (e.g. "AVAILABLE") + * @param metricGrepB grep pattern for the second metric + * @param varNameB shell variable for the second metric value (e.g. "TOTAL") + * @param notFoundWarning warning message when metrics are not found + * @param idleMessage logged when idle condition is met + * @param waitingFormat format for waiting log (with shell variable references) + * @param sleepSeconds polling interval in seconds + * @param maxRetries max consecutive curl failures before giving up + */ + protected static String buildDualMetricDrainScript( + String startupMessage, + String metricGrepA, String varNameA, + String metricGrepB, String varNameB, + String notFoundWarning, String idleMessage, + String waitingFormat, int sleepSeconds, int maxRetries, + int metricsPort) { + List lines = new ArrayList<>(); + lines.add("#!/bin/bash"); + lines.add("echo '[preStop] " + startupMessage + + " (polling localhost:" + metricsPort + "/metrics)...'"); + lines.add("RETRIES=0"); + lines.add("while true; do"); + lines.add(" RESPONSE=$(curl -sf http://localhost:" + metricsPort + "/metrics)"); + lines.add(" if [ $? -ne 0 ]; then"); + lines.add(" RETRIES=$((RETRIES+1))"); + lines.add(" echo \"[preStop] ERROR: JMX Exporter unreachable on port " + metricsPort + " (attempt $RETRIES)\""); + lines.add(" if [ $RETRIES -ge " + maxRetries + " ]; then"); + lines.add(" echo '[preStop] JMX Exporter not responding after " + + (maxRetries * sleepSeconds) + "s. Proceeding with shutdown.'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" sleep " + sleepSeconds + "; continue"); + lines.add(" fi"); + lines.add(" " + varNameA + "=$(echo \"$RESPONSE\" | grep '^" + + metricGrepA + "' | awk '{print $2}')"); + lines.add(" " + varNameB + "=$(echo \"$RESPONSE\" | grep '^" + + metricGrepB + "' | awk '{print $2}')"); + lines.add(" if [ -z \"$" + varNameA + "\" ] || [ -z \"$" + varNameB + "\" ]; then"); + lines.add(" echo '[preStop] WARNING: " + notFoundWarning + "'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" if [ \"${" + varNameA + "%.*}\" -ge \"${" + varNameB + "%.*}\" ] 2>/dev/null; then"); + lines.add(" echo '[preStop] " + idleMessage + "'"); + lines.add(" break"); + lines.add(" fi"); + lines.add(" echo \"[preStop] " + waitingFormat + "\""); + lines.add(" RETRIES=0"); + lines.add(" sleep " + sleepSeconds); + lines.add("done"); + // Send SIGTERM directly to the Java process. Shell entrypoint scripts + // (PID 1) often don't forward signals, so K8s SIGTERM never reaches + // the JVM — causing a full grace-period wait before SIGKILL. + // Use 'java' pattern to avoid matching this script itself. + lines.add("echo '[preStop] Sending SIGTERM to Java process...'"); + lines.add("pkill -f 'java.*org.apache' || true"); + lines.add("exit 0"); + return String.join("\n", lines); + } + /** * Computes a SHA-256 hash of the given input strings. * Used to annotate pod templates so that config changes trigger rolling updates. @@ -235,8 +447,8 @@ protected static void buildMetastoreVolumes( .withMountPath(CONF_MOUNT_PATH).build()); volumes.add(buildProjectedConfigVolume("hive-config", - MetastoreConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); + HiveConfigMapDependent.Metastore.resourceName(hiveCluster), + HiveConfigMapDependent.Hadoop.resourceName(hiveCluster))); } /** Builds Kubernetes ResourceRequirements from the operator's spec. */ @@ -422,4 +634,237 @@ protected static Probe buildTcpProbe(int port, ProbeSpec spec, int defaultInitia return builder.build(); } + /** + * Applies the autoscaling lifecycle to a workload's pod template: sets a preStop + * exec lifecycle hook, terminationGracePeriodSeconds, and Prometheus scrape annotations. + * + * @param podSpec the pod spec of the workload (Deployment or StatefulSet) + * @param podMetadata the pod template metadata (for annotations) + * @param preStopScript the shell script to run in the preStop hook + * @param gracePeriodSeconds termination grace period + */ + protected static void applyAutoscalingLifecycle( + io.fabric8.kubernetes.api.model.PodSpec podSpec, + io.fabric8.kubernetes.api.model.ObjectMeta podMetadata, + String preStopScript, int gracePeriodSeconds, + int metricsScrapeIntervalSeconds) { + io.fabric8.kubernetes.api.model.Lifecycle lifecycle = + new io.fabric8.kubernetes.api.model.LifecycleBuilder() + .withNewPreStop() + .withNewExec() + .withCommand("/bin/bash", "-c", preStopScript) + .endExec() + .endPreStop() + .build(); + podSpec.getContainers().get(0).setLifecycle(lifecycle); + podSpec.setTerminationGracePeriodSeconds((long) gracePeriodSeconds); + applyPrometheusScrapeAnnotations(podMetadata, metricsScrapeIntervalSeconds); + } + + /** + * Adds Prometheus scrape annotations to a pod template so that + * the JMX Exporter metrics endpoint is discovered by Prometheus. + */ + private static void applyPrometheusScrapeAnnotations( + io.fabric8.kubernetes.api.model.ObjectMeta podMetadata, + int scrapeIntervalSeconds) { + if (podMetadata.getAnnotations() == null) { + podMetadata.setAnnotations(new java.util.HashMap<>()); + } + podMetadata.getAnnotations().put("prometheus.io/scrape", "true"); + podMetadata.getAnnotations().put("prometheus.io/port", + String.valueOf(ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT)); + podMetadata.getAnnotations().put("prometheus.io/path", "/metrics"); + podMetadata.getAnnotations().put("prometheus.io/scrape-interval", + scrapeIntervalSeconds + "s"); + } + + /** + * Appends user-provided volumes and volume mounts to a workload's pod template. + * Handles both global (spec-level) and component-specific extras. + * + * @param podSpec the pod spec + * @param globalVolumes spec.volumes() (may be null) + * @param globalVolumeMounts spec.volumeMounts() (may be null) + * @param extraVolumes component-specific extraVolumes (may be null) + * @param extraVolumeMounts component-specific extraVolumeMounts (may be null) + */ + protected static void appendUserVolumes( + io.fabric8.kubernetes.api.model.PodSpec podSpec, + List globalVolumes, + List globalVolumeMounts, + List extraVolumes, + List extraVolumeMounts) { + if (globalVolumes != null) { + podSpec.getVolumes().addAll(globalVolumes); + } + if (globalVolumeMounts != null) { + podSpec.getContainers().get(0).getVolumeMounts().addAll(globalVolumeMounts); + } + if (extraVolumes != null) { + podSpec.getVolumes().addAll(extraVolumes); + } + if (extraVolumeMounts != null) { + podSpec.getContainers().get(0).getVolumeMounts().addAll(extraVolumeMounts); + } + } + + /** Path where the JMX Exporter agent JAR is stored inside the pod. */ + protected static final String JMX_EXPORTER_DIR = "/opt/jmx-exporter"; + protected static final String JMX_EXPORTER_JAR = JMX_EXPORTER_DIR + "/jmx_prometheus_javaagent.jar"; + protected static final String JMX_EXPORTER_CONFIG = JMX_EXPORTER_DIR + "/config.yaml"; + + /** + * Adds the Prometheus JMX Exporter agent infrastructure to a pod spec when + * autoscaling is enabled. This includes: + *

    + *
  • An emptyDir volume for the JMX exporter JAR and config
  • + *
  • An init container that downloads the agent JAR and writes a config file
  • + *
  • A volume mount on the main container
  • + *
  • A container port for the metrics endpoint (9404)
  • + *
  • The javaagent JVM argument appended to SERVICE_OPTS
  • + *
+ * + * @param image the container image (used for the init container) + * @param component the Hive component name (for JMX bean pattern matching) + * @param initContainers list to add the download init container to + * @param volumeMounts list to add the jmx-exporter mount to (main container) + * @param volumes list to add the emptyDir volume to + * @param envVars list of env vars — SERVICE_OPTS will be updated with the javaagent flag + * @param ports list to add the metrics port to + */ + protected static void addJmxExporter( + String image, String component, int metricsPort, + List initContainers, + List volumeMounts, + List volumes, + List envVars, + List ports) { + + // Volume for the JMX exporter JAR + config + volumes.add(new VolumeBuilder() + .withName("jmx-exporter") + .withNewEmptyDir().endEmptyDir().build()); + VolumeMount exporterMount = new VolumeMountBuilder() + .withName("jmx-exporter") + .withMountPath(JMX_EXPORTER_DIR).build(); + volumeMounts.add(exporterMount); + + // JMX exporter config: export all beans in a catch-all pattern + // The agent exposes metrics in Prometheus text format at /metrics + String jmxConfig = buildJmxExporterConfig(component); + + // Init container: download JAR + write config + String downloadCmd = String.format( + "wget -q --tries=3 --waitretry=5 -O %s '%s' && " + + "cat > %s << 'JMXEOF'\n%s\nJMXEOF", + JMX_EXPORTER_JAR, ConfigUtils.JMX_EXPORTER_JAR_URL, + JMX_EXPORTER_CONFIG, jmxConfig); + initContainers.add(new ContainerBuilder() + .withName("jmx-exporter-init") + .withImage(image) + .withCommand("/bin/bash", "-c", downloadCmd) + .withVolumeMounts(exporterMount) + .build()); + + // Expose the metrics port + ports.add(new io.fabric8.kubernetes.api.model.ContainerPortBuilder() + .withName("metrics") + .withContainerPort(metricsPort) + .withProtocol("TCP").build()); + + // Add javaagent flag to the appropriate JVM opts env var. + // LLAP uses LLAP_DAEMON_OPTS (its startup script ignores SERVICE_OPTS). + String agentArg = String.format("-javaagent:%s=%d:%s", + JMX_EXPORTER_JAR, metricsPort, JMX_EXPORTER_CONFIG); + String optsEnvVar = ConfigUtils.COMPONENT_LLAP.equals(component) ? "LLAP_DAEMON_OPTS" : "SERVICE_OPTS"; + boolean found = false; + for (int i = 0; i < envVars.size(); i++) { + if (optsEnvVar.equals(envVars.get(i).getName())) { + String existing = envVars.get(i).getValue(); + envVars.set(i, new EnvVar(optsEnvVar, + existing + " " + agentArg, null)); + found = true; + break; + } + } + if (!found) { + envVars.add(new EnvVar(optsEnvVar, agentArg, null)); + } + } + + /** + * Builds the JMX Exporter YAML config for a Hive component. + * Uses broad patterns to export all Hive/Hadoop metrics relevant to autoscaling. + */ + private static String buildJmxExporterConfig(String component) { + StringBuilder sb = new StringBuilder(); + sb.append("lowercaseOutputName: true\n"); + sb.append("lowercaseOutputLabelNames: true\n"); + sb.append("rules:\n"); + + switch (component) { + case ConfigUtils.COMPONENT_HIVESERVER2: + // HS2 session and operation metrics + sb.append("- pattern: 'metrics<>Value'\n"); + sb.append(" name: hs2_$1\n"); + sb.append(" type: GAUGE\n"); + sb.append("- pattern: 'metrics<>Count'\n"); + sb.append(" name: hs2_active_calls_$1\n"); + sb.append(" type: GAUGE\n"); + // Tez session pool metrics (pending tasks, backlog ratio, running tasks) + sb.append("- pattern: 'metrics<>Value'\n"); + sb.append(" name: tez_session_$1\n"); + sb.append(" type: GAUGE\n"); + // JVM CPU usage for CPU-based autoscaling + sb.append("- pattern: 'java.lang<>ProcessCpuLoad'\n"); + sb.append(" name: jvm_process_cpu_load\n"); + sb.append(" type: GAUGE\n"); + break; + case ConfigUtils.COMPONENT_METASTORE: + // HMS API call metrics + sb.append("- pattern: 'metrics<>Count'\n"); + sb.append(" name: api_$1_total\n"); + sb.append(" type: COUNTER\n"); + sb.append("- pattern: 'metrics<>Count'\n"); + sb.append(" name: hive_metastore_open_connections\n"); + sb.append(" type: GAUGE\n"); + // JVM CPU usage for CPU-based autoscaling + sb.append("- pattern: 'java.lang<>ProcessCpuLoad'\n"); + sb.append(" name: jvm_process_cpu_load\n"); + sb.append(" type: GAUGE\n"); + break; + case ConfigUtils.COMPONENT_LLAP: + // Only export the executor metrics the autoscaler and drain script need. + // A wildcard '.*' pattern serializes 600+ metrics every scrape interval, + // causing CPU spikes and GC pressure on the LLAP JVM. + // Internal format: Hadoop><>Attribute + // Separate rules per attribute — JMX Exporter 1.x caches per-bean, not per-attribute. + String llapBean = "Hadoop<>"; + sb.append("- pattern: '").append(llapBean).append("ExecutorNumQueuedRequests'\n"); + sb.append(" name: hadoop_llapdaemon_executornumqueuedrequests\n"); + sb.append(" type: GAUGE\n"); + sb.append("- pattern: '").append(llapBean).append("ExecutorNumExecutorsConfigured'\n"); + sb.append(" name: hadoop_llapdaemon_executornumexecutorsconfigured\n"); + sb.append(" type: GAUGE\n"); + sb.append("- pattern: '").append(llapBean).append("ExecutorNumExecutorsAvailable'\n"); + sb.append(" name: hadoop_llapdaemon_executornumexecutorsavailable\n"); + sb.append(" type: GAUGE\n"); + sb.append("- pattern: '").append(llapBean).append("ExecutorNumExecutors'\n"); + sb.append(" name: hadoop_llapdaemon_executornumexecutors\n"); + sb.append(" type: GAUGE\n"); + break; + case ConfigUtils.COMPONENT_TEZAM: + // TezAM DAG execution metrics + sb.append("- pattern: 'Hadoop<>(.+)'\n"); + sb.append(" name: tez_am_$1\n"); + sb.append(" type: GAUGE\n"); + break; + default: + sb.append("- pattern: '.*'\n"); + break; + } + return sb.toString(); + } + } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java new file mode 100644 index 000000000000..2deac1aa7740 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HivePdbDependent.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * PodDisruptionBudget dependent resource for workflow-managed Hive components. + * Uses maxUnavailable=1 to allow at most one pod to be disrupted at a time + * while still permitting node drains when replicas=1. + *

+ * Subclassed per component (HS2, Metastore) to satisfy JOSDK's requirement + * for distinct no-arg-constructible classes in the workflow. + * LLAP and TezAM PDBs are managed imperatively via {@link LlapResourceBuilder}. + */ +public abstract class HivePdbDependent + extends HiveDependentResource { + + private final String component; + + protected HivePdbDependent(String component) { + super(PodDisruptionBudget.class); + this.component = component; + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return primary.getMetadata().getName() + "-" + component + "-pdb"; + } + + @Override + protected PodDisruptionBudget desired(HiveCluster hiveCluster, + Context context) { + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-" + component + "-pdb") + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)) + .endMetadata() + .withNewSpec() + .withMaxUnavailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(Labels.selectorForComponent(hiveCluster, component)) + .endSelector() + .endSpec() + .build(); + } + + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class HiveServer2 extends HivePdbDependent { + public HiveServer2() { + super(ConfigUtils.COMPONENT_HIVESERVER2); + } + } + + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class Metastore extends HivePdbDependent { + public Metastore() { + super(ConfigUtils.COMPONENT_METASTORE); + } + } + +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java deleted file mode 100644 index 9bb0597cc960..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ConfigMapDependent.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the hive-site.xml ConfigMap for HiveServer2. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class HiveServer2ConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "hiveserver2"; - - public HiveServer2ConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - HiveClusterSpec spec = hiveCluster.getSpec(); - - Map props = - HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec); - Map tezProps = HiveConfigBuilder.getTezSite(spec); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("hive-site.xml", HadoopXmlBuilder.buildXml(props)) - .addToData("tez-site.xml", HadoopXmlBuilder.buildXml(tezProps)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-hiveserver2-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java index ccb3048dea98..3afb0af118d2 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2DeploymentDependent.java @@ -34,6 +34,7 @@ import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -48,13 +49,24 @@ public class HiveServer2DeploymentDependent extends HiveDependentResource { - public static final String COMPONENT = "hiveserver2"; + public static final String COMPONENT = ConfigUtils.COMPONENT_HIVESERVER2; private static final String SCRATCH_MOUNT_PATH = "/opt/hive/scratch"; public HiveServer2DeploymentDependent() { super(Deployment.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + + @Override + protected String getComponentName() { + return COMPONENT; + } + @Override protected Deployment desired(HiveCluster hiveCluster, Context context) { @@ -64,7 +76,7 @@ protected Deployment desired(HiveCluster hiveCluster, Labels.selectorForComponent(hiveCluster, COMPONENT); List envVars = new ArrayList<>(); - envVars.add(new EnvVar("SERVICE_NAME", "hiveserver2", null)); + envVars.add(new EnvVar("SERVICE_NAME", COMPONENT, null)); envVars.add(new EnvVar("IS_RESUME", "true", null)); envVars.add(new EnvVar("TEZ_AM_EXTERNAL_ID", "tez-session-hs2", null)); @@ -85,10 +97,11 @@ protected Deployment desired(HiveCluster hiveCluster, spec.zookeeper().quorum(), null)); } - if (spec.llap().isEnabled()) { - envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", - spec.llap().serviceHosts(), null)); - } + spec.llapClusters().stream() + .filter(l -> l.isEnabled()) + .findFirst() + .ifPresent(llap -> envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + llap.serviceHosts(), null))); int metastorePort = ConfigUtils.getInt( spec.metastore().configOverrides(), @@ -105,14 +118,17 @@ protected Deployment desired(HiveCluster hiveCluster, .append(ConfigUtils.HIVE_METASTORE_URIS_KEY) .append("=").append(metastoreUri); } - if (spec.llap().isEnabled()) { - serviceOpts.append(" -D") - .append(ConfigUtils.HIVE_EXECUTION_MODE_KEY) - .append("=llap"); - serviceOpts.append(" -D") - .append(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY) - .append("=").append(spec.llap().serviceHosts()); - } + spec.llapClusters().stream() + .filter(l -> l.isEnabled()) + .findFirst() + .ifPresent(llap -> { + serviceOpts.append(" -D") + .append(ConfigUtils.HIVE_EXECUTION_MODE_KEY) + .append("=llap"); + serviceOpts.append(" -D") + .append(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY) + .append("=").append(llap.serviceHosts()); + }); if (spec.tezAm().isEnabled()) { serviceOpts.append(" -D") .append(ConfigUtils.HIVE_ZOOKEEPER_QUORUM_KEY) @@ -125,21 +141,28 @@ protected Deployment desired(HiveCluster hiveCluster, hs2.configOverrides(), ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int hs2HttpPort = ConfigUtils.getInt( + hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); int hs2WebUiPort = ConfigUtils.getInt( hs2.configOverrides(), ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); - List ports = List.of( - new ContainerPortBuilder() - .withName("thrift") - .withContainerPort(hs2ThriftPort).build(), - new ContainerPortBuilder() - .withName("webui") - .withContainerPort(hs2WebUiPort).build() - ); + List ports = new ArrayList<>(); + ports.add(new ContainerPortBuilder() + .withName("thrift") + .withContainerPort(hs2ThriftPort).withProtocol("TCP").build()); + ports.add(new ContainerPortBuilder() + .withName("http") + .withContainerPort(hs2HttpPort).withProtocol("TCP").build()); + ports.add(new ContainerPortBuilder() + .withName("webui") + .withContainerPort(hs2WebUiPort).withProtocol("TCP").build()); - Probe readinessProbe = buildTcpProbe(hs2ThriftPort, hs2.readinessProbe(), 15, 10, 3); - Probe livenessProbe = buildTcpProbe(hs2ThriftPort, hs2.livenessProbe(), 120, 30, 10); + // Probes target the HTTP transport port (default mode) + Probe readinessProbe = buildTcpProbe(hs2HttpPort, hs2.readinessProbe(), 15, 10, 3); + Probe livenessProbe = buildTcpProbe(hs2HttpPort, hs2.livenessProbe(), 120, 30, 10); boolean tezAmEnabled = spec.tezAm().isEnabled(); @@ -155,8 +178,8 @@ protected Deployment desired(HiveCluster hiveCluster, List volumes = new ArrayList<>(); volumes.add(buildProjectedConfigVolume("hive-config", - HiveServer2ConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); + HiveConfigMapDependent.HiveServer2.resourceName(hiveCluster), + HiveConfigMapDependent.Hadoop.resourceName(hiveCluster))); if (tezAmEnabled) { volumeMounts.add( @@ -166,8 +189,7 @@ protected Deployment desired(HiveCluster hiveCluster, volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() .withName("scratch") .withNewPersistentVolumeClaim() - .withClaimName(ScratchPvcDependent - .resourceName(hiveCluster)) + .withClaimName(ScratchPvcDependent.resourceName(hiveCluster)) .endPersistentVolumeClaim() .build()); } @@ -185,6 +207,17 @@ protected Deployment desired(HiveCluster hiveCluster, replaceConfMountWithSubPaths(volumeMounts, "hive-config", "hive-site.xml", "tez-site.xml", "core-site.xml"); + // Add Prometheus JMX Exporter when HS2 autoscaling is enabled, or when + // LLAP/TezAM autoscaling needs the HS2 activation gate metric. + AutoscalingSpec autoscaling = hs2.autoscaling(); + boolean llapOrTezAmAutoscales = spec.llapClusters().stream().anyMatch( + l -> l.isEnabled() && (l.autoscaling().isEnabled() + || (spec.tezAm().isEnabled() && l.tezAm().autoscaling().isEnabled()))); + if (autoscaling.isEnabled() || llapOrTezAmAutoscales) { + addJmxExporter(spec.image(), COMPONENT, autoscaling.metricsPort(), + initContainers, volumeMounts, volumes, envVars, ports); + } + // Pre-compute config hash for the pod template annotation. // This ensures the Deployment is created with the correct hash // from the start (single ReplicaSet) and triggers rolling @@ -194,6 +227,12 @@ protected Deployment desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + AutoscalingSpec hs2Autoscaling = hs2.autoscaling(); + int initialReplicas = hs2Autoscaling != null && hs2Autoscaling.isEnabled() + ? Math.max(1, hs2Autoscaling.minReplicas()) : hs2.replicas(); + Integer replicas = resolveReplicaCount( + hiveCluster, context, hs2Autoscaling, hs2.replicas(), initialReplicas); + Deployment deployment = new DeploymentBuilder() .withNewMetadata() .withName(resourceName(hiveCluster)) @@ -201,20 +240,20 @@ protected Deployment desired(HiveCluster hiveCluster, .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) .endMetadata() .withNewSpec() - .withReplicas(hs2.replicas()) + .withReplicas(replicas) .withNewSelector() .withMatchLabels(selectorLabels) .endSelector() .withNewTemplate() .withNewMetadata() .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .addToAnnotations("kubectl.kubernetes.io/default-container", "hiveserver2") + .addToAnnotations("kubectl.kubernetes.io/default-container", COMPONENT) .addToAnnotations("hive.apache.org/config-hash", configHash) .endMetadata() .withNewSpec() .withInitContainers(initContainers) .addNewContainer() - .withName("hiveserver2") + .withName(COMPONENT) .withImage(spec.image()) .withImagePullPolicy(spec.imagePullPolicy()) .withEnv(envVars) @@ -233,21 +272,28 @@ protected Deployment desired(HiveCluster hiveCluster, applySpreadAffinityIfAbsent( deployment.getSpec().getTemplate().getSpec(), selectorLabels); - if (spec.volumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (hs2.extraVolumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(hs2.extraVolumes()); - } - if (hs2.extraVolumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(hs2.extraVolumeMounts()); + // Graceful scale-down: deregister from ZK, then poll JMX Exporter for sessions. + if (autoscaling.isEnabled()) { + List zkDeregister = List.of( + "echo '[preStop] Deregistering HiveServer2 from ZooKeeper...'", + "hive --service hiveserver2 --deregister $(hive --service version 2>/dev/null | head -1 || echo '4.0.0')" + + " || echo '[preStop] WARNING: ZK deregister failed'"); + String preStopScript = buildDrainScript( + "Waiting for open sessions to drain", + "hs2_open_sessions", "SESSIONS", + "All sessions drained. Shutting down.", + 5, 6, zkDeregister, autoscaling.metricsPort()); + applyAutoscalingLifecycle( + deployment.getSpec().getTemplate().getSpec(), + deployment.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds(), + autoscaling.metricsScrapeIntervalSeconds()); } + appendUserVolumes(deployment.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + hs2.extraVolumes(), hs2.extraVolumeMounts()); + return deployment; } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java deleted file mode 100644 index a9707ac0dfa6..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServer2ServiceDependent.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; -import org.apache.hive.kubernetes.operator.util.ConfigUtils; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the Kubernetes Service for HiveServer2 (Thrift and WebUI ports). */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class HiveServer2ServiceDependent - extends HiveDependentResource { - - public HiveServer2ServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - HiveServer2Spec hs2 = hiveCluster.getSpec().hiveServer2(); - int thriftPort = ConfigUtils.getInt(hs2.configOverrides(), - ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, - null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); - int webUiPort = ConfigUtils.getInt(hs2.configOverrides(), - ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, - null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); - - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-hiveserver2") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - HiveServer2DeploymentDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withType(hs2.serviceType()) - .withSelector(Labels.selectorForComponent(hiveCluster, - HiveServer2DeploymentDependent.COMPONENT)) - .addNewPort() - .withName("thrift") - .withPort(thriftPort) - .withTargetPort(new IntOrString(thriftPort)) - .endPort() - .addNewPort() - .withName("webui") - .withPort(webUiPort) - .withTargetPort(new IntOrString(webUiPort)) - .endPort() - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java new file mode 100644 index 000000000000..50d4dd8201dc --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/HiveServiceDependent.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.config.informer.Informer; +import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Unified Kubernetes Service dependent for all Hive components. + * Subclassed per component to define component-specific service type and ports. + */ +public abstract class HiveServiceDependent + extends HiveDependentResource { + + private final String component; + + protected HiveServiceDependent(String component) { + super(Service.class); + this.component = component; + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return primary.getMetadata().getName() + "-" + component; + } + + @Override + protected Service desired(HiveCluster hiveCluster, + Context context) { + ServiceBuilder builder = new ServiceBuilder() + .withNewMetadata() + .withName(hiveCluster.getMetadata().getName() + "-" + component) + .withNamespace(hiveCluster.getMetadata().getNamespace()) + .withLabels(Labels.forComponent(hiveCluster, component)) + .endMetadata() + .withNewSpec() + .withSelector(Labels.selectorForComponent(hiveCluster, component)) + .endSpec(); + customizeSpec(builder, hiveCluster); + return builder.build(); + } + + /** Subclasses override to set service type and add ports. */ + protected abstract void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster); + + /** HiveServer2 Service: configurable type, thrift + http + webui ports. */ + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=hiveserver2," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class HiveServer2 extends HiveServiceDependent { + public HiveServer2() { + super(ConfigUtils.COMPONENT_HIVESERVER2); + } + + @Override + protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + var hs2 = hiveCluster.getSpec().hiveServer2(); + int thriftPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_PORT_DEFAULT); + int httpPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT); + int webUiPort = ConfigUtils.getInt(hs2.configOverrides(), + ConfigUtils.HIVE_SERVER2_WEBUI_PORT_KEY, + null, ConfigUtils.HIVE_SERVER2_WEBUI_PORT_DEFAULT); + builder.editSpec() + .withType(hs2.serviceType()) + .withSessionAffinity("ClientIP") + .addNewPort().withName("thrift").withProtocol("TCP") + .withPort(thriftPort).withTargetPort(new IntOrString(thriftPort)).endPort() + .addNewPort().withName("http").withProtocol("TCP") + .withPort(httpPort).withTargetPort(new IntOrString(httpPort)).endPort() + .addNewPort().withName("webui").withProtocol("TCP") + .withPort(webUiPort).withTargetPort(new IntOrString(webUiPort)).endPort() + .endSpec(); + } + } + + /** Metastore Service: ClusterIP, thrift + rest ports. */ + @KubernetesDependent( + informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," + + "app.kubernetes.io/managed-by=hive-kubernetes-operator") + ) + public static class Metastore extends HiveServiceDependent { + public Metastore() { + super(ConfigUtils.COMPONENT_METASTORE); + } + + @Override + protected void customizeSpec(ServiceBuilder builder, HiveCluster hiveCluster) { + var overrides = hiveCluster.getSpec().metastore().configOverrides(); + int thriftPort = ConfigUtils.getInt(overrides, + ConfigUtils.METASTORE_THRIFT_PORT_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, + ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); + int restPort = ConfigUtils.getInt(overrides, + ConfigUtils.METASTORE_REST_HTTP_PORT_KEY, + null, ConfigUtils.METASTORE_REST_HTTP_PORT_DEFAULT); + builder.editSpec() + .withType("ClusterIP") + .addNewPort().withName("thrift").withProtocol("TCP") + .withPort(thriftPort).withTargetPort(new IntOrString(thriftPort)).endPort() + .addNewPort().withName("rest").withProtocol("TCP") + .withPort(restPort).withTargetPort(new IntOrString(restPort)).endPort() + .endSpec(); + } + } + +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java deleted file mode 100644 index 2ad6955dadb8..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapConfigMapDependent.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the llap-daemon-site.xml ConfigMap for LLAP daemons. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class LlapConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "llap"; - - public LlapConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - Map props = - HiveConfigBuilder.getLlapDaemonSite(hiveCluster.getSpec()); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("llap-daemon-site.xml", - HadoopXmlBuilder.buildXml(props)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-llap-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapResourceBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapResourceBuilder.java new file mode 100644 index 000000000000..504901197168 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapResourceBuilder.java @@ -0,0 +1,523 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.dependent; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import io.fabric8.kubernetes.api.model.ConfigMap; +import io.fabric8.kubernetes.api.model.ConfigMapBuilder; +import io.fabric8.kubernetes.api.model.Container; +import io.fabric8.kubernetes.api.model.ContainerPort; +import io.fabric8.kubernetes.api.model.ContainerPortBuilder; +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.OwnerReference; +import io.fabric8.kubernetes.api.model.OwnerReferenceBuilder; +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.Probe; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.fabric8.kubernetes.api.model.Volume; +import io.fabric8.kubernetes.api.model.VolumeMount; +import io.fabric8.kubernetes.api.model.apps.StatefulSet; +import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudget; +import io.fabric8.kubernetes.api.model.policy.v1.PodDisruptionBudgetBuilder; +import io.javaoperatorsdk.operator.api.reconciler.Context; +import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; +import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; +import org.apache.hive.kubernetes.operator.util.Labels; + +/** + * Static builder methods for LLAP Kubernetes resources. + * Used by the reconciler to imperatively manage multiple LLAP clusters. + *

+ * Extends {@link HiveDependentResource} solely to access protected helper methods + * (buildTcpProbe, addExternalJars, addJmxExporter, etc.). + */ +public class LlapResourceBuilder + extends HiveDependentResource { + + private static final LlapResourceBuilder INSTANCE = new LlapResourceBuilder(); + private static final String TEZAM_INFIX = "-tezam-"; + private static final String HIVE_CONFIG_VOLUME = "hive-config"; + private static final String LLAP_CONFIG_VOLUME = "llap-config"; + + LlapResourceBuilder() { + super(StatefulSet.class); + } + + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + throw new UnsupportedOperationException("LlapResourceBuilder is not a managed dependent"); + } + + @Override + protected StatefulSet desired(HiveCluster hiveCluster, + Context context) { + throw new UnsupportedOperationException("LlapResourceBuilder is not a managed dependent"); + } + + /** Creates an OwnerReference pointing to the HiveCluster CR for garbage collection. */ + private static OwnerReference ownerRef(HiveCluster hc) { + return new OwnerReferenceBuilder() + .withApiVersion(hc.getApiVersion()) + .withKind(hc.getKind()) + .withName(hc.getMetadata().getName()) + .withUid(hc.getMetadata().getUid()) + .withController(true) + .withBlockOwnerDeletion(true) + .build(); + } + + /** Resource name for a specific LLAP cluster: {clusterName}-{llapName}. */ + public static String resourceName(HiveCluster hc, LlapSpec llap) { + return hc.getMetadata().getName() + "-" + llap.name(); + } + + /** ConfigMap name for a specific LLAP cluster. */ + public static String configMapName(HiveCluster hc, LlapSpec llap) { + return hc.getMetadata().getName() + "-" + llap.name() + "-config"; + } + + /** PDB name for a specific LLAP cluster. */ + public static String pdbName(HiveCluster hc, LlapSpec llap) { + return hc.getMetadata().getName() + "-" + llap.name() + "-pdb"; + } + + /** Builds the StatefulSet for a specific LLAP cluster. */ + public static StatefulSet buildStatefulSet(HiveCluster hc, LlapSpec llap, Integer replicas) { + return INSTANCE.doBuildStatefulSet(hc, llap, replicas); + } + + /** Builds the headless Service for a specific LLAP cluster. */ + public static Service buildService(HiveCluster hc, LlapSpec llap) { + String ns = hc.getMetadata().getNamespace(); + String name = resourceName(hc, llap); + Map labels = Labels.forLlapCluster(hc, llap.name()); + Map selector = Labels.selectorForLlapCluster(hc, llap.name()); + + int managementPort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_MANAGEMENT_RPC_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_MANAGEMENT_RPC_PORT_DEFAULT); + int shufflePort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_DAEMON_SHUFFLE_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_DAEMON_SHUFFLE_PORT_DEFAULT); + int webPort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_DAEMON_WEB_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_DAEMON_WEB_PORT_DEFAULT); + + return new ServiceBuilder() + .withNewMetadata() + .withName(name) + .withNamespace(ns) + .withLabels(labels) + .withOwnerReferences(ownerRef(hc)) + .endMetadata() + .withNewSpec() + .withClusterIP("None") + .withSelector(selector) + .addNewPort().withName("management").withProtocol("TCP") + .withPort(managementPort) + .withTargetPort(new IntOrString(managementPort)).endPort() + .addNewPort().withName("shuffle").withProtocol("TCP") + .withPort(shufflePort) + .withTargetPort(new IntOrString(shufflePort)).endPort() + .addNewPort().withName("web").withProtocol("TCP") + .withPort(webPort) + .withTargetPort(new IntOrString(webPort)).endPort() + .endSpec() + .build(); + } + + /** Builds the ConfigMap for a specific LLAP cluster. */ + public static ConfigMap buildConfigMap(HiveCluster hc, LlapSpec llap) { + HiveClusterSpec spec = hc.getSpec(); + Map labels = Labels.forLlapCluster(hc, llap.name()); + Map llapDaemonSite = HiveConfigBuilder.getLlapDaemonSite(spec, llap); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(configMapName(hc, llap)) + .withNamespace(hc.getMetadata().getNamespace()) + .withLabels(labels) + .withOwnerReferences(ownerRef(hc)) + .endMetadata() + .addToData("llap-daemon-site.xml", HadoopXmlBuilder.buildXml(llapDaemonSite)) + .build(); + } + + /** Builds the PodDisruptionBudget for a specific LLAP cluster. */ + public static PodDisruptionBudget buildPdb(HiveCluster hc, LlapSpec llap) { + Map labels = Labels.forLlapCluster(hc, llap.name()); + Map selector = Labels.selectorForLlapCluster(hc, llap.name()); + + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(pdbName(hc, llap)) + .withNamespace(hc.getMetadata().getNamespace()) + .withLabels(labels) + .withOwnerReferences(ownerRef(hc)) + .endMetadata() + .withNewSpec() + .withMaxUnavailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(selector) + .endSelector() + .endSpec() + .build(); + } + + // --- TezAM resource builders (one TezAM per LLAP cluster) --- + + /** TezAM StatefulSet name for a specific LLAP cluster. */ + public static String tezAmResourceName(HiveCluster hc, LlapSpec llap) { + return hc.getMetadata().getName() + TEZAM_INFIX + llap.name(); + } + + /** TezAM ConfigMap name for a specific LLAP cluster. */ + public static String tezAmConfigMapName(HiveCluster hc, LlapSpec llap) { + return hc.getMetadata().getName() + TEZAM_INFIX + llap.name() + "-config"; + } + + /** TezAM PDB name for a specific LLAP cluster. */ + public static String tezAmPdbName(HiveCluster hc, LlapSpec llap) { + return hc.getMetadata().getName() + TEZAM_INFIX + llap.name() + "-pdb"; + } + + /** Builds the PodDisruptionBudget for a per-LLAP-cluster TezAM. */ + public static PodDisruptionBudget buildTezAmPdb(HiveCluster hc, LlapSpec llap) { + Map labels = Labels.forTezAmCluster(hc, llap.name()); + Map selector = Labels.selectorForTezAmCluster(hc, llap.name()); + + return new PodDisruptionBudgetBuilder() + .withNewMetadata() + .withName(tezAmPdbName(hc, llap)) + .withNamespace(hc.getMetadata().getNamespace()) + .withLabels(labels) + .withOwnerReferences(ownerRef(hc)) + .endMetadata() + .withNewSpec() + .withMaxUnavailable(new IntOrString(1)) + .withNewSelector() + .withMatchLabels(selector) + .endSelector() + .endSpec() + .build(); + } + + /** Builds the TezAM StatefulSet for a specific LLAP cluster. */ + public static StatefulSet buildTezAmStatefulSet(HiveCluster hc, LlapSpec llap, Integer replicas) { + return INSTANCE.doBuildTezAmStatefulSet(hc, llap, replicas); + } + + /** Builds the headless Service for a TezAM cluster. */ + public static Service buildTezAmService(HiveCluster hc, LlapSpec llap) { + String ns = hc.getMetadata().getNamespace(); + String name = tezAmResourceName(hc, llap); + Map labels = Labels.forTezAmCluster(hc, llap.name()); + Map selector = Labels.selectorForTezAmCluster(hc, llap.name()); + + return new ServiceBuilder() + .withNewMetadata() + .withName(name) + .withNamespace(ns) + .withLabels(labels) + .withOwnerReferences(ownerRef(hc)) + .endMetadata() + .withNewSpec() + .withClusterIP("None") + .withSelector(selector) + .endSpec() + .build(); + } + + /** Builds the ConfigMap for a per-LLAP-cluster TezAM (tez-site.xml). */ + public static ConfigMap buildTezAmConfigMap(HiveCluster hc, LlapSpec llap) { + HiveClusterSpec spec = hc.getSpec(); + Map labels = Labels.forTezAmCluster(hc, llap.name()); + Map tezSite = HiveConfigBuilder.getTezSite(spec, llap); + + return new ConfigMapBuilder() + .withNewMetadata() + .withName(tezAmConfigMapName(hc, llap)) + .withNamespace(hc.getMetadata().getNamespace()) + .withLabels(labels) + .withOwnerReferences(ownerRef(hc)) + .endMetadata() + .addToData("tez-site.xml", HadoopXmlBuilder.buildXml(tezSite)) + .build(); + } + + // --- Private instance methods that use protected helpers from HiveDependentResource --- + + private StatefulSet doBuildTezAmStatefulSet(HiveCluster hiveCluster, LlapSpec llap, + Integer replicas) { + HiveClusterSpec spec = hiveCluster.getSpec(); + String ns = hiveCluster.getMetadata().getNamespace(); + String ssName = tezAmResourceName(hiveCluster, llap); + Map allLabels = Labels.forTezAmCluster(hiveCluster, llap.name()); + Map selectorLabels = Labels.selectorForTezAmCluster(hiveCluster, llap.name()); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", ConfigUtils.COMPONENT_TEZAM, null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + envVars.add(new EnvVar("TEZ_FRAMEWORK_MODE", "STANDALONE_ZOOKEEPER", null)); + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + llap.serviceHosts(), null)); + + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + List volumeMounts = new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName(HIVE_CONFIG_VOLUME) + .withMountPath(CONF_MOUNT_PATH).build()); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName("scratch") + .withMountPath("/opt/hive/scratch").build()); + + List volumes = new ArrayList<>(); + // Projected volume: hive-site.xml from HS2 CM, tez-site.xml from per-LLAP CM, core-site.xml from Hadoop CM + String hs2CmName = HiveConfigMapDependent.HiveServer2.resourceName(hiveCluster); + String hadoopCmName = HiveConfigMapDependent.Hadoop.resourceName(hiveCluster); + String tezAmCmName = tezAmConfigMapName(hiveCluster, llap); + volumes.add(buildProjectedConfigVolume(HIVE_CONFIG_VOLUME, hs2CmName, tezAmCmName, hadoopCmName)); + volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() + .withName("scratch") + .withNewPersistentVolumeClaim() + .withClaimName(ScratchPvcDependent.resourceName(hiveCluster)) + .endPersistentVolumeClaim() + .build()); + + List ports = new ArrayList<>(); + List initContainers = new ArrayList<>(); + addExternalJars(spec.image(), spec.externalJars(), + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, HIVE_CONFIG_VOLUME, + "hive-site.xml", "tez-site.xml", "core-site.xml"); + + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec, llap)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + StatefulSet statefulSet = new StatefulSetBuilder() + .withNewMetadata() + .withName(ssName) + .withNamespace(ns) + .withLabels(allLabels) + .withOwnerReferences(ownerRef(hiveCluster)) + .endMetadata() + .withNewSpec() + .withReplicas(replicas) + .withPodManagementPolicy("Parallel") + .withServiceName(ssName) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(allLabels) + .addToAnnotations("kubectl.kubernetes.io/default-container", + ConfigUtils.COMPONENT_TEZAM) + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName(ConfigUtils.COMPONENT_TEZAM) + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withResources(buildResources(spec.tezAm().resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + applySpreadAffinityIfAbsent( + statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + + appendUserVolumes(statefulSet.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + spec.tezAm().extraVolumes(), spec.tezAm().extraVolumeMounts()); + + return statefulSet; + } + + private StatefulSet doBuildStatefulSet(HiveCluster hiveCluster, LlapSpec llap, Integer replicas) { + HiveClusterSpec spec = hiveCluster.getSpec(); + String ns = hiveCluster.getMetadata().getNamespace(); + String ssName = resourceName(hiveCluster, llap); + Map allLabels = Labels.forLlapCluster(hiveCluster, llap.name()); + Map selectorLabels = Labels.selectorForLlapCluster(hiveCluster, llap.name()); + + List envVars = new ArrayList<>(); + envVars.add(new EnvVar("SERVICE_NAME", "llap", null)); + envVars.add(new EnvVar("IS_RESUME", "true", null)); + envVars.add(new EnvVar("LLAP_MEMORY_MB", + String.valueOf(llap.memoryMb()), null)); + envVars.add(new EnvVar("LLAP_EXECUTORS", + String.valueOf(llap.executors()), null)); + envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", + spec.zookeeper().quorum(), null)); + envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", + llap.serviceHosts(), null)); + envVars.add(new EnvVar("LLAP_LOG4J2_PROPERTIES_FILE_NAME", + "llap-daemon-log4j2.properties", null)); + + if (spec.envVars() != null) { + envVars.addAll(spec.envVars()); + } + + int managementPort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_MANAGEMENT_RPC_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_MANAGEMENT_RPC_PORT_DEFAULT); + int shufflePort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_DAEMON_SHUFFLE_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_DAEMON_SHUFFLE_PORT_DEFAULT); + int webPort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_DAEMON_WEB_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_DAEMON_WEB_PORT_DEFAULT); + int outputPort = ConfigUtils.getInt(llap.configOverrides(), + ConfigUtils.HIVE_LLAP_DAEMON_OUTPUT_SERVICE_PORT_KEY, null, + ConfigUtils.HIVE_LLAP_DAEMON_OUTPUT_SERVICE_PORT_DEFAULT); + + List ports = new ArrayList<>(); + ports.add(new ContainerPortBuilder() + .withName("management").withContainerPort(managementPort) + .withProtocol("TCP").build()); + ports.add(new ContainerPortBuilder() + .withName("shuffle").withContainerPort(shufflePort) + .withProtocol("TCP").build()); + ports.add(new ContainerPortBuilder() + .withName("web").withContainerPort(webPort) + .withProtocol("TCP").build()); + ports.add(new ContainerPortBuilder() + .withName("output").withContainerPort(outputPort) + .withProtocol("TCP").build()); + + Probe readinessProbe = buildTcpProbe(managementPort, llap.readinessProbe(), 15, 10, 3); + + List volumeMounts = new ArrayList<>(); + volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() + .withName(LLAP_CONFIG_VOLUME) + .withMountPath(CONF_MOUNT_PATH).build()); + + List volumes = new ArrayList<>(); + String cmName = configMapName(hiveCluster, llap); + String hadoopCmName = HiveConfigMapDependent.Hadoop.resourceName(hiveCluster); + volumes.add(buildProjectedConfigVolume(LLAP_CONFIG_VOLUME, cmName, hadoopCmName)); + + List initContainers = new ArrayList<>(); + addExternalJars(spec.image(), spec.externalJars(), + initContainers, volumeMounts, volumes, envVars); + replaceConfMountWithSubPaths(volumeMounts, LLAP_CONFIG_VOLUME, + "llap-daemon-site.xml", "core-site.xml"); + + AutoscalingSpec autoscaling = llap.autoscaling(); + if (autoscaling.isEnabled()) { + addJmxExporter(spec.image(), ConfigUtils.COMPONENT_LLAP, autoscaling.metricsPort(), + initContainers, volumeMounts, volumes, envVars, ports); + } + + String configHash = sha256( + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(spec, llap)), + HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + + StatefulSet statefulSet = new StatefulSetBuilder() + .withNewMetadata() + .withName(ssName) + .withNamespace(ns) + .withLabels(allLabels) + .withOwnerReferences(ownerRef(hiveCluster)) + .endMetadata() + .withNewSpec() + .withReplicas(replicas) + .withPodManagementPolicy("Parallel") + .withServiceName(ssName) + .withNewSelector() + .withMatchLabels(selectorLabels) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(allLabels) + .addToAnnotations("kubectl.kubernetes.io/default-container", + ConfigUtils.COMPONENT_LLAP) + .addToAnnotations("hive.apache.org/config-hash", configHash) + .endMetadata() + .withNewSpec() + .withInitContainers(initContainers) + .addNewContainer() + .withName(ConfigUtils.COMPONENT_LLAP) + .withImage(spec.image()) + .withImagePullPolicy(spec.imagePullPolicy()) + .withEnv(envVars) + .withPorts(ports) + .withReadinessProbe(readinessProbe) + .withResources(buildResources(llap.resources())) + .withVolumeMounts(volumeMounts) + .endContainer() + .withVolumes(volumes) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + applySpreadAffinityIfAbsent( + statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); + + if (autoscaling.isEnabled()) { + String preStopScript = buildDualMetricDrainScript( + "Waiting for LLAP executors to become idle", + "hadoop_llapdaemon_executornumexecutorsavailable{", "AVAILABLE", + "hadoop_llapdaemon_executornumexecutors{", "TOTAL", + "LLAP executor metrics not found. JMX Exporter may not be configured.", + "All executors idle. Shutting down.", + "Executors available=$AVAILABLE / total=$TOTAL \u2014 waiting...", + 10, 6, autoscaling.metricsPort()); + applyAutoscalingLifecycle( + statefulSet.getSpec().getTemplate().getSpec(), + statefulSet.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds(), + autoscaling.metricsScrapeIntervalSeconds()); + } + + appendUserVolumes(statefulSet.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + llap.extraVolumes(), llap.extraVolumeMounts()); + + return statefulSet; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java deleted file mode 100644 index 108f29347a97..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapServiceDependent.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages the headless Kubernetes Service for LLAP daemons. - * Required by the StatefulSet for stable DNS entries and ZooKeeper registration. - */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class LlapServiceDependent - extends HiveDependentResource { - - public LlapServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-llap") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - LlapStatefulSetDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withClusterIP("None") - .withSelector(Labels.selectorForComponent(hiveCluster, - LlapStatefulSetDependent.COMPONENT)) - .addNewPort() - .withName("management") - .withPort(15004) - .withTargetPort(new IntOrString(15004)) - .endPort() - .addNewPort() - .withName("shuffle") - .withPort(15551) - .withTargetPort(new IntOrString(15551)) - .endPort() - .addNewPort() - .withName("web") - .withPort(15002) - .withTargetPort(new IntOrString(15002)) - .endPort() - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java deleted file mode 100644 index c8c044d22ce9..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/LlapStatefulSetDependent.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import io.fabric8.kubernetes.api.model.Container; -import io.fabric8.kubernetes.api.model.ContainerPort; -import io.fabric8.kubernetes.api.model.ContainerPortBuilder; -import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.Probe; -import io.fabric8.kubernetes.api.model.apps.StatefulSet; -import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; -import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages the Kubernetes StatefulSet for LLAP daemons. - * Uses StatefulSet for stable pod identities required by ZooKeeper registration. - */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=llap," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class LlapStatefulSetDependent - extends HiveDependentResource { - - public static final String COMPONENT = "llap"; - - public LlapStatefulSetDependent() { - super(StatefulSet.class); - } - - @Override - protected StatefulSet desired(HiveCluster hiveCluster, - Context context) { - HiveClusterSpec spec = hiveCluster.getSpec(); - LlapSpec llap = spec.llap(); - Map selectorLabels = - Labels.selectorForComponent(hiveCluster, COMPONENT); - - List envVars = new ArrayList<>(); - envVars.add(new EnvVar("SERVICE_NAME", "llap", null)); - envVars.add(new EnvVar("IS_RESUME", "true", null)); - envVars.add(new EnvVar("LLAP_MEMORY_MB", - String.valueOf(llap.memoryMb()), null)); - envVars.add(new EnvVar("LLAP_EXECUTORS", - String.valueOf(llap.executors()), null)); - envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", - spec.zookeeper().quorum(), null)); - envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", - llap.serviceHosts(), null)); - - // User-provided env vars (storage credentials, etc.) - if (spec.envVars() != null) { - envVars.addAll(spec.envVars()); - } - - List ports = List.of( - new ContainerPortBuilder() - .withName("management").withContainerPort(15004).build(), - new ContainerPortBuilder() - .withName("shuffle").withContainerPort(15551).build(), - new ContainerPortBuilder() - .withName("web").withContainerPort(15002).build(), - new ContainerPortBuilder() - .withName("output").withContainerPort(15003).build() - ); - - Probe readinessProbe = buildTcpProbe(15004, llap.readinessProbe(), 15, 10, 3); - - String headlessServiceName = - hiveCluster.getMetadata().getName() + "-llap"; - - List volumeMounts = - new ArrayList<>(); - volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() - .withName("llap-config") - .withMountPath(CONF_MOUNT_PATH).build()); - - List volumes = - new ArrayList<>(); - volumes.add(buildProjectedConfigVolume("llap-config", - LlapConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); - - List initContainers = new ArrayList<>(); - addExternalJars(spec.image(), spec.externalJars(), - initContainers, volumeMounts, volumes, envVars); - replaceConfMountWithSubPaths(volumeMounts, "llap-config", - "llap-daemon-site.xml", "core-site.xml"); - - // Pre-compute config hash for the pod template annotation. - String configHash = sha256( - HadoopXmlBuilder.buildXml(HiveConfigBuilder.getLlapDaemonSite(spec)), - HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); - - StatefulSet statefulSet = new StatefulSetBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .withNewSpec() - .withReplicas(llap.replicas()) - .withServiceName(headlessServiceName) - .withNewSelector() - .withMatchLabels(selectorLabels) - .endSelector() - .withNewTemplate() - .withNewMetadata() - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .addToAnnotations("kubectl.kubernetes.io/default-container", "llap") - .addToAnnotations("hive.apache.org/config-hash", configHash) - .endMetadata() - .withNewSpec() - .withInitContainers(initContainers) - .addNewContainer() - .withName("llap") - .withImage(spec.image()) - .withImagePullPolicy(spec.imagePullPolicy()) - .withEnv(envVars) - .withPorts(ports) - .withReadinessProbe(readinessProbe) - .withResources(buildResources(llap.resources())) - .withVolumeMounts(volumeMounts) - .endContainer() - .withVolumes(volumes) - .endSpec() - .endTemplate() - .endSpec() - .build(); - - applySpreadAffinityIfAbsent( - statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); - - if (spec.volumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (llap.extraVolumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(llap.extraVolumes()); - } - if (llap.extraVolumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(llap.extraVolumeMounts()); - } - return statefulSet; - } - - /** Returns the StatefulSet resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-llap"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java deleted file mode 100644 index b429335f76e0..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreConfigMapDependent.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.Map; - -import io.fabric8.kubernetes.api.model.ConfigMap; -import io.fabric8.kubernetes.api.model.ConfigMapBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the metastore-site.xml ConfigMap for the Hive Metastore. */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class MetastoreConfigMapDependent - extends HiveDependentResource { - - public static final String COMPONENT = "metastore"; - - public MetastoreConfigMapDependent() { - super(ConfigMap.class); - } - - @Override - protected ConfigMap desired(HiveCluster hiveCluster, - Context context) { - Map props = - HiveConfigBuilder.getMetastoreSite(hiveCluster.getSpec()); - - return new ConfigMapBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .addToData("metastore-site.xml", HadoopXmlBuilder.buildXml(props)) - .build(); - } - - /** Returns the ConfigMap resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-metastore-config"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java index 46a95426c969..ff19afd5c023 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreDeploymentDependent.java @@ -36,6 +36,7 @@ import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; +import org.apache.hive.kubernetes.operator.model.spec.AutoscalingSpec; import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; @@ -50,12 +51,23 @@ public class MetastoreDeploymentDependent extends HiveDependentResource { - public static final String COMPONENT = "metastore"; + public static final String COMPONENT = ConfigUtils.COMPONENT_METASTORE; public MetastoreDeploymentDependent() { super(Deployment.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + + @Override + protected String getComponentName() { + return COMPONENT; + } + @Override protected Deployment desired(HiveCluster hiveCluster, Context context) { @@ -65,7 +77,7 @@ protected Deployment desired(HiveCluster hiveCluster, Labels.selectorForComponent(hiveCluster, COMPONENT); List envVars = new ArrayList<>(); - envVars.add(new EnvVar("SERVICE_NAME", "metastore", null)); + envVars.add(new EnvVar("SERVICE_NAME", COMPONENT, null)); envVars.add(new EnvVar("IS_RESUME", "true", null)); envVars.addAll(buildDbEnvVars(db)); if (spec.envVars() != null) { @@ -77,12 +89,15 @@ protected Deployment desired(HiveCluster hiveCluster, ConfigUtils.METASTORE_THRIFT_PORT_KEY, ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); - List ports = List.of( - new ContainerPortBuilder() - .withName("thrift").withContainerPort(thriftPort).build(), - new ContainerPortBuilder() - .withName("rest").withContainerPort(9001).build() - ); + int restPort = ConfigUtils.getInt( + spec.metastore().configOverrides(), + ConfigUtils.METASTORE_REST_HTTP_PORT_KEY, + null, ConfigUtils.METASTORE_REST_HTTP_PORT_DEFAULT); + List ports = new ArrayList<>(); + ports.add(new ContainerPortBuilder() + .withName("thrift").withContainerPort(thriftPort).withProtocol("TCP").build()); + ports.add(new ContainerPortBuilder() + .withName("rest").withContainerPort(restPort).withProtocol("TCP").build()); Probe readinessProbe = buildTcpProbe(thriftPort, spec.metastore().readinessProbe(), 15, 10, 3); Probe livenessProbe = buildTcpProbe(thriftPort, spec.metastore().livenessProbe(), 60, 30, 5); @@ -107,6 +122,13 @@ protected Deployment desired(HiveCluster hiveCluster, replaceConfMountWithSubPaths(volumeMounts, "hive-config", "metastore-site.xml", "core-site.xml"); + // Add Prometheus JMX Exporter when autoscaling is enabled + AutoscalingSpec autoscaling = spec.metastore().autoscaling(); + if (autoscaling.isEnabled()) { + addJmxExporter(spec.image(), COMPONENT, autoscaling.metricsPort(), + initContainers, volumeMounts, volumes, envVars, ports); + } + // Pre-compute config hash for the pod template annotation. // This ensures the Deployment is created with the correct hash // from the start (single ReplicaSet) and triggers rolling @@ -115,6 +137,12 @@ protected Deployment desired(HiveCluster hiveCluster, HadoopXmlBuilder.buildXml(HiveConfigBuilder.getMetastoreSite(spec)), HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); + AutoscalingSpec msAutoscaling = spec.metastore().autoscaling(); + int initialReplicas = msAutoscaling != null && msAutoscaling.isEnabled() + ? Math.max(1, msAutoscaling.minReplicas()) : spec.metastore().replicas(); + Integer replicas = resolveReplicaCount( + hiveCluster, context, msAutoscaling, spec.metastore().replicas(), initialReplicas); + Deployment deployment = new DeploymentBuilder() .withNewMetadata() .withName(resourceName(hiveCluster)) @@ -122,20 +150,20 @@ protected Deployment desired(HiveCluster hiveCluster, .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) .endMetadata() .withNewSpec() - .withReplicas(spec.metastore().replicas()) + .withReplicas(replicas) .withNewSelector() .withMatchLabels(selectorLabels) .endSelector() .withNewTemplate() .withNewMetadata() .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .addToAnnotations("kubectl.kubernetes.io/default-container", "metastore") + .addToAnnotations("kubectl.kubernetes.io/default-container", COMPONENT) .addToAnnotations("hive.apache.org/config-hash", configHash) .endMetadata() .withNewSpec() .withInitContainers(initContainers) .addNewContainer() - .withName("metastore") + .withName(COMPONENT) .withImage(spec.image()) .withImagePullPolicy(spec.imagePullPolicy()) .withEnv(envVars) @@ -155,20 +183,26 @@ protected Deployment desired(HiveCluster hiveCluster, applySpreadAffinityIfAbsent( deployment.getSpec().getTemplate().getSpec(), selectorLabels); - if (spec.volumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (spec.metastore().extraVolumes() != null) { - deployment.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.metastore().extraVolumes()); - } - if (spec.metastore().extraVolumeMounts() != null) { - deployment.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.metastore().extraVolumeMounts()); + // HMS uses HTTP transport mode — connections are stateless, so no session + // drain is needed. The preStop hook simply sends SIGTERM directly to the + // JVM (the shell entrypoint doesn't forward signals from K8s). + if (autoscaling.isEnabled()) { + String preStopScript = String.join("\n", + "#!/bin/bash", + "echo '[preStop] Sending SIGTERM to Metastore Java process...'", + "pkill -f 'java.*org.apache' || true", + "exit 0"); + applyAutoscalingLifecycle( + deployment.getSpec().getTemplate().getSpec(), + deployment.getSpec().getTemplate().getMetadata(), + preStopScript, autoscaling.gracePeriodSeconds(), + autoscaling.metricsScrapeIntervalSeconds()); } + + appendUserVolumes(deployment.getSpec().getTemplate().getSpec(), + spec.volumes(), spec.volumeMounts(), + spec.metastore().extraVolumes(), spec.metastore().extraVolumeMounts()); + return deployment; } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java deleted file mode 100644 index 2620a24e01d7..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/MetastoreServiceDependent.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.IntOrString; -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.ConfigUtils; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** Manages the Kubernetes Service for the Hive Metastore (Thrift + REST ports). */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=metastore," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class MetastoreServiceDependent - extends HiveDependentResource { - - public MetastoreServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - int thriftPort = ConfigUtils.getInt( - hiveCluster.getSpec().metastore().configOverrides(), - ConfigUtils.METASTORE_THRIFT_PORT_KEY, - ConfigUtils.METASTORE_THRIFT_PORT_HIVE_KEY, - ConfigUtils.METASTORE_THRIFT_PORT_DEFAULT); - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-metastore") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - MetastoreDeploymentDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withType("ClusterIP") - .withSelector(Labels.selectorForComponent(hiveCluster, - MetastoreDeploymentDependent.COMPONENT)) - .addNewPort() - .withName("thrift") - .withPort(thriftPort) - .withTargetPort(new IntOrString(thriftPort)) - .endPort() - .addNewPort() - .withName("rest") - .withPort(9001) - .withTargetPort(new IntOrString(9001)) - .endPort() - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java index a23c0c477436..fb4b588401c9 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/SchemaInitJobDependent.java @@ -34,6 +34,7 @@ import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; import org.apache.hive.kubernetes.operator.model.spec.DatabaseConfig; import org.apache.hive.kubernetes.operator.model.spec.SecretKeyRef; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; import org.apache.hive.kubernetes.operator.util.Labels; /** @@ -53,6 +54,12 @@ public SchemaInitJobDependent() { super(Job.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected Job desired(HiveCluster hiveCluster, Context context) { @@ -60,7 +67,7 @@ protected Job desired(HiveCluster hiveCluster, DatabaseConfig db = spec.metastore().database(); List envVars = new ArrayList<>(); - envVars.add(new EnvVar("SERVICE_NAME", "metastore", null)); + envVars.add(new EnvVar("SERVICE_NAME", ConfigUtils.COMPONENT_METASTORE, null)); envVars.add(new EnvVar("IS_RESUME", "false", null)); envVars.add(new EnvVar("HIVE_CUSTOM_CONF_DIR", CONF_MOUNT_PATH, null)); diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java index 6a645f043574..230ba47edd13 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/ScratchPvcDependent.java @@ -55,6 +55,12 @@ public ScratchPvcDependent() { super(PersistentVolumeClaim.class); } + @Override + protected String getSecondaryResourceName(HiveCluster primary, + Context context) { + return resourceName(primary); + } + @Override protected PersistentVolumeClaim desired(HiveCluster hiveCluster, Context context) { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java deleted file mode 100644 index 781685286038..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmServiceDependent.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import io.fabric8.kubernetes.api.model.Service; -import io.fabric8.kubernetes.api.model.ServiceBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages the headless Kubernetes Service for Tez Application Master. - * Required by the StatefulSet for stable DNS entries so that - * HiveServer2 can resolve TezAM pod hostnames for RPC communication. - */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class TezAmServiceDependent - extends HiveDependentResource { - - public TezAmServiceDependent() { - super(Service.class); - } - - @Override - protected Service desired(HiveCluster hiveCluster, - Context context) { - return new ServiceBuilder() - .withNewMetadata() - .withName(hiveCluster.getMetadata().getName() + "-tezam") - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, - TezAmStatefulSetDependent.COMPONENT)) - .endMetadata() - .withNewSpec() - .withClusterIP("None") - .withSelector(Labels.selectorForComponent(hiveCluster, - TezAmStatefulSetDependent.COMPONENT)) - .endSpec() - .build(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java deleted file mode 100644 index 5cc7a3f800f3..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/TezAmStatefulSetDependent.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import io.fabric8.kubernetes.api.model.Container; -import io.fabric8.kubernetes.api.model.EnvVar; -import io.fabric8.kubernetes.api.model.apps.StatefulSet; -import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.config.informer.Informer; -import io.javaoperatorsdk.operator.processing.dependent.kubernetes.KubernetesDependent; -import org.apache.hive.kubernetes.operator.model.HiveCluster; -import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; -import org.apache.hive.kubernetes.operator.model.spec.TezAmSpec; -import org.apache.hive.kubernetes.operator.util.HadoopXmlBuilder; -import org.apache.hive.kubernetes.operator.util.HiveConfigBuilder; -import org.apache.hive.kubernetes.operator.util.Labels; - -/** - * Manages the Kubernetes StatefulSet for the Tez Application Master. - * Uses StatefulSet (with a headless Service) so that each TezAM pod - * gets a stable, DNS-resolvable hostname. HiveServer2 discovers - * TezAM pods via ZooKeeper and connects over RPC using the hostname, - * so the hostname must be resolvable within the cluster. - */ -@KubernetesDependent( - informer = @Informer(labelSelector = "app.kubernetes.io/component=tezam," - + "app.kubernetes.io/managed-by=hive-kubernetes-operator") -) -public class TezAmStatefulSetDependent - extends HiveDependentResource { - - public static final String COMPONENT = "tezam"; - private static final String SCRATCH_MOUNT_PATH = "/opt/hive/scratch"; - - public TezAmStatefulSetDependent() { - super(StatefulSet.class); - } - - @Override - protected StatefulSet desired(HiveCluster hiveCluster, - Context context) { - HiveClusterSpec spec = hiveCluster.getSpec(); - TezAmSpec tezAm = spec.tezAm(); - Map selectorLabels = - Labels.selectorForComponent(hiveCluster, COMPONENT); - - List envVars = new ArrayList<>(); - envVars.add(new EnvVar("SERVICE_NAME", "tezam", null)); - envVars.add(new EnvVar("IS_RESUME", "true", null)); - envVars.add(new EnvVar("HIVE_ZOOKEEPER_QUORUM", - spec.zookeeper().quorum(), null)); - envVars.add(new EnvVar("TEZ_FRAMEWORK_MODE", - "STANDALONE_ZOOKEEPER", null)); - - if (spec.llap().isEnabled()) { - envVars.add(new EnvVar("HIVE_LLAP_DAEMON_SERVICE_HOSTS", - spec.llap().serviceHosts(), null)); - } - - // User-provided env vars (storage credentials, etc.) - if (spec.envVars() != null) { - envVars.addAll(spec.envVars()); - } - - String headlessServiceName = - hiveCluster.getMetadata().getName() + "-tezam"; - - List volumeMounts = - new ArrayList<>(); - volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() - .withName("hive-config") - .withMountPath(CONF_MOUNT_PATH).build()); - volumeMounts.add(new io.fabric8.kubernetes.api.model.VolumeMountBuilder() - .withName("scratch") - .withMountPath(SCRATCH_MOUNT_PATH).build()); - - List volumes = - new ArrayList<>(); - volumes.add(buildProjectedConfigVolume("hive-config", - HiveServer2ConfigMapDependent.resourceName(hiveCluster), - HadoopConfigMapDependent.resourceName(hiveCluster))); - volumes.add(new io.fabric8.kubernetes.api.model.VolumeBuilder() - .withName("scratch") - .withNewPersistentVolumeClaim() - .withClaimName(ScratchPvcDependent.resourceName(hiveCluster)) - .endPersistentVolumeClaim() - .build()); - - List initContainers = new ArrayList<>(); - addExternalJars(spec.image(), spec.externalJars(), - initContainers, volumeMounts, volumes, envVars); - replaceConfMountWithSubPaths(volumeMounts, "hive-config", - "hive-site.xml", "tez-site.xml", "core-site.xml"); - - // Pre-compute config hash for the pod template annotation. - // TezAM uses the same ConfigMaps as HS2 (hive-site.xml + tez-site.xml + core-site.xml). - String configHash = sha256( - HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHiveServer2HiveSite(hiveCluster, spec)), - HadoopXmlBuilder.buildXml(HiveConfigBuilder.getTezSite(spec)), - HadoopXmlBuilder.buildXml(HiveConfigBuilder.getHadoopCoreSite(spec))); - - StatefulSet statefulSet = new StatefulSetBuilder() - .withNewMetadata() - .withName(resourceName(hiveCluster)) - .withNamespace(hiveCluster.getMetadata().getNamespace()) - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .endMetadata() - .withNewSpec() - .withReplicas(tezAm.replicas()) - .withServiceName(headlessServiceName) - .withNewSelector() - .withMatchLabels(selectorLabels) - .endSelector() - .withNewTemplate() - .withNewMetadata() - .withLabels(Labels.forComponent(hiveCluster, COMPONENT)) - .addToAnnotations("kubectl.kubernetes.io/default-container", "tezam") - .addToAnnotations("hive.apache.org/config-hash", configHash) - .endMetadata() - .withNewSpec() - .withInitContainers(initContainers) - .addNewContainer() - .withName("tezam") - .withImage(spec.image()) - .withImagePullPolicy(spec.imagePullPolicy()) - .withEnv(envVars) - .withResources(buildResources(tezAm.resources())) - .withVolumeMounts(volumeMounts) - .endContainer() - .withVolumes(volumes) - .endSpec() - .endTemplate() - .endSpec() - .build(); - - applySpreadAffinityIfAbsent( - statefulSet.getSpec().getTemplate().getSpec(), selectorLabels); - - if (spec.volumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(spec.volumes()); - } - if (spec.volumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(spec.volumeMounts()); - } - if (tezAm.extraVolumes() != null) { - statefulSet.getSpec().getTemplate().getSpec().getVolumes().addAll(tezAm.extraVolumes()); - } - if (tezAm.extraVolumeMounts() != null) { - statefulSet.getSpec().getTemplate().getSpec().getContainers().get(0).getVolumeMounts() - .addAll(tezAm.extraVolumeMounts()); - } - return statefulSet; - } - - /** Returns the StatefulSet resource name for this HiveCluster. */ - public static String resourceName(HiveCluster hiveCluster) { - return hiveCluster.getMetadata().getName() + "-tezam"; - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java deleted file mode 100644 index a36002dbf886..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/HiveServer2Precondition.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.apps.Deployment; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Precondition for HiveServer2 Deployment. - * If Metastore is external, proceed immediately. - * If managed, wait for Metastore pods to be ready. - */ -public class HiveServer2Precondition implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - - if (!primary.getSpec().metastore().isEnabled()) { - return true; - } - - int desiredReplicas = primary.getSpec().metastore().replicas(); - return context.getSecondaryResources(Deployment.class).stream() - .filter(d -> d.getMetadata().getName().equals(primary.getMetadata().getName() + "-metastore")) - .findFirst() - .map(deployment -> deployment.getStatus() != null - && deployment.getStatus().getReadyReplicas() != null - && deployment.getStatus().getReadyReplicas() >= desiredReplicas) - .orElse(false); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java deleted file mode 100644 index b1cb4139ac96..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreEnabledCondition.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for Metastore dependent resources. - * Returns true only when spec.metastore.enabled is true. - */ -public class MetastoreEnabledCondition implements Condition { - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().metastore().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java deleted file mode 100644 index 7b3169f32043..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/MetastoreReadyCondition.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.apps.Deployment; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Ready condition that checks whether the Metastore Deployment has the - * desired number of ready replicas. Used to gate HiveServer2 Deployment. - */ -public class MetastoreReadyCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - if (!primary.getSpec().metastore().isEnabled()) { - return true; - } - int desiredReplicas = primary.getSpec().metastore().replicas(); - return dependentResource.getSecondaryResource(primary, context) - .map(deployment -> deployment.getStatus() != null - && deployment.getStatus().getReadyReplicas() != null - && deployment.getStatus().getReadyReplicas() >= desiredReplicas) - .orElse(false); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java deleted file mode 100644 index 1b0b44318596..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/SchemaJobCompletedCondition.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.batch.v1.Job; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Ready condition that checks whether the schema initialization Job - * has completed successfully. Used to gate Metastore Deployment creation. - */ -public class SchemaJobCompletedCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - if (!primary.getSpec().metastore().isEnabled()) { - return true; - } - return dependentResource.getSecondaryResource(primary, context) - .map(job -> job.getStatus() != null - && job.getStatus().getSucceeded() != null - && job.getStatus().getSucceeded() >= 1) - .orElse(false); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java deleted file mode 100644 index 85ae7e45dbdb..000000000000 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/TezAmEnabledCondition.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.kubernetes.operator.dependent.condition; - -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; - -/** - * Activation condition for Tez AM dependent resources. - * Returns true only when spec.tezAm.enabled is true. - */ -public class TezAmEnabledCondition - implements Condition { - - @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().tezAm().isEnabled(); - } -} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java index 40dd8a771203..b9c0faf42c55 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterSpec.java @@ -28,6 +28,7 @@ import io.fabric8.kubernetes.api.model.EnvVar; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; +import org.apache.hive.kubernetes.operator.model.spec.AutoSuspendSpec; import org.apache.hive.kubernetes.operator.model.spec.HadoopSpec; import org.apache.hive.kubernetes.operator.model.spec.HiveServer2Spec; import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; @@ -45,8 +46,15 @@ public record HiveClusterSpec( MetastoreSpec metastore, @JsonPropertyDescription("HiveServer2 component configuration") HiveServer2Spec hiveServer2, - @JsonPropertyDescription("LLAP daemon configuration. Enabled by default.") - LlapSpec llap, + @JsonPropertyDescription("LLAP compute clusters. Each entry is an independent LLAP cluster " + + "with its own StatefulSet, autoscaling, and ZooKeeper registration. " + + "Users select a cluster via hive.llap.daemon.service.hosts=@{name} in their session.") + List llapClusters, + @JsonPropertyDescription("Server-side LLAP cluster routing rules. Maps users/groups to LLAP " + + "cluster names so clients don't need to specify namespace configs. " + + "Format: user:=,group:=,default=. " + + "Example: \"user:alice=llap1,group:eng=llap0,default=llap0\"") + String llapClusterRouting, @JsonPropertyDescription("Tez Application Master configuration. Enabled by default.") TezAmSpec tezAm, @Required @@ -73,14 +81,30 @@ public record HiveClusterSpec( "Volume mounts added to all component containers " + "(e.g., mounting a GCS key file at /etc/gcs/key.json)") @SchemaFrom(type = Object[].class) @PreserveUnknownFields - List volumeMounts) { + List volumeMounts, + @JsonPropertyDescription("Auto-suspend configuration. When enabled and all components " + + "are idle for the configured timeout, the cluster scales to 0 replicas.") + AutoSuspendSpec autoSuspend, + @JsonPropertyDescription("When true, the cluster is immediately suspended (all components " + + "scaled to 0). Set to false to wake a suspended cluster.") + Boolean suspend) { public HiveClusterSpec { Objects.requireNonNull(zookeeper, "zookeeper must be provided in the HiveCluster spec"); + metastore = metastore != null ? metastore : new MetastoreSpec( + 1, null, null, null, null, null, null, true, null, null, null, null); + hiveServer2 = hiveServer2 != null ? hiveServer2 : new HiveServer2Spec( + 1, null, null, null, null, null, null, null, null, null); + llapClusters = llapClusters != null ? llapClusters : List.of(); + tezAm = tezAm != null ? tezAm : new TezAmSpec( + 1, null, null, null, null, true, null, null, null); envVars = envVars != null ? envVars : List.of(); externalJars = externalJars != null ? externalJars : List.of(); volumes = volumes != null ? volumes : List.of(); volumeMounts = volumeMounts != null ? volumeMounts : List.of(); + autoSuspend = autoSuspend != null ? autoSuspend : new AutoSuspendSpec(false, 15, true); + suspend = suspend != null && suspend; } + } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java index d2432dda2246..f95fcd434be4 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/HiveClusterStatus.java @@ -19,8 +19,11 @@ package org.apache.hive.kubernetes.operator.model; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import io.fabric8.crd.generator.annotation.PrinterColumn; import io.fabric8.kubernetes.api.model.Condition; import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; @@ -30,9 +33,16 @@ public class HiveClusterStatus { private List conditions = new ArrayList<>(); private ComponentStatus metastore; private ComponentStatus hiveServer2; - private ComponentStatus llap; - private ComponentStatus tezAm; + private Map llapClusters = new LinkedHashMap<>(); + private Map tezAmClusters = new LinkedHashMap<>(); private Long observedGeneration; + @PrinterColumn(name = "Phase") + private String clusterPhase; + private String idleSince; + @PrinterColumn(name = "Idle (min)") + private Integer idleForMinutes; + @PrinterColumn(name = "Suspended Since", priority = 1) + private String suspendedSince; public List getConditions() { return conditions; @@ -58,20 +68,20 @@ public void setHiveServer2(ComponentStatus hiveServer2) { this.hiveServer2 = hiveServer2; } - public ComponentStatus getLlap() { - return llap; + public Map getLlapClusters() { + return llapClusters; } - public void setLlap(ComponentStatus llap) { - this.llap = llap; + public void setLlapClusters(Map llapClusters) { + this.llapClusters = llapClusters; } - public ComponentStatus getTezAm() { - return tezAm; + public Map getTezAmClusters() { + return tezAmClusters; } - public void setTezAm(ComponentStatus tezAm) { - this.tezAm = tezAm; + public void setTezAmClusters(Map tezAmClusters) { + this.tezAmClusters = tezAmClusters; } public Long getObservedGeneration() { @@ -82,6 +92,38 @@ public void setObservedGeneration(Long observedGeneration) { this.observedGeneration = observedGeneration; } + public String getClusterPhase() { + return clusterPhase; + } + + public void setClusterPhase(String clusterPhase) { + this.clusterPhase = clusterPhase; + } + + public String getIdleSince() { + return idleSince; + } + + public void setIdleSince(String idleSince) { + this.idleSince = idleSince; + } + + public Integer getIdleForMinutes() { + return idleForMinutes; + } + + public void setIdleForMinutes(Integer idleForMinutes) { + this.idleForMinutes = idleForMinutes; + } + + public String getSuspendedSince() { + return suspendedSince; + } + + public void setSuspendedSince(String suspendedSince) { + this.suspendedSince = suspendedSince; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -95,12 +137,17 @@ public boolean equals(Object o) { java.util.Objects.equals(conditions, that.conditions) && java.util.Objects.equals(metastore, that.metastore) && java.util.Objects.equals(hiveServer2, that.hiveServer2) && - java.util.Objects.equals(llap, that.llap) && - java.util.Objects.equals(tezAm, that.tezAm); + java.util.Objects.equals(llapClusters, that.llapClusters) && + java.util.Objects.equals(tezAmClusters, that.tezAmClusters) && + java.util.Objects.equals(clusterPhase, that.clusterPhase) && + java.util.Objects.equals(idleSince, that.idleSince) && + java.util.Objects.equals(idleForMinutes, that.idleForMinutes) && + java.util.Objects.equals(suspendedSince, that.suspendedSince); } @Override public int hashCode() { - return java.util.Objects.hash(conditions, metastore, hiveServer2, llap, tezAm, observedGeneration); + return java.util.Objects.hash(conditions, metastore, hiveServer2, llapClusters, tezAmClusters, + observedGeneration, clusterPhase, idleSince, idleForMinutes, suspendedSince); } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoSuspendSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoSuspendSpec.java new file mode 100644 index 000000000000..a08f9c88231a --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoSuspendSpec.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; + +/** + * Auto-suspend configuration. When enabled and all components are idle for the + * configured timeout, the operator scales the entire cluster to 0 replicas. + * Requires autoscaling to be enabled on all active components. + */ +public record AutoSuspendSpec( + @JsonPropertyDescription("Whether auto-suspend is enabled. Requires autoscaling " + + "to be enabled on all active components (HS2, LLAP if enabled, TezAM if enabled, " + + "and HMS if includeMetastore is true).") + @Default("false") + Boolean enabled, + @JsonPropertyDescription("Minutes of idle time (HS2=0 sessions, LLAP/TezAM at minReplicas) " + + "before the cluster auto-suspends.") + @Default("15") + Integer idleTimeoutMinutes, + @JsonPropertyDescription("Whether Metastore participates in auto-suspend. " + + "When false, HMS stays at minReplicas during suspend and HMS autoscaling " + + "is not required for auto-suspend to activate.") + @Default("true") + Boolean includeMetastore) { + + public AutoSuspendSpec { + enabled = enabled != null && enabled; + idleTimeoutMinutes = idleTimeoutMinutes != null ? idleTimeoutMinutes : 15; + includeMetastore = includeMetastore == null || includeMetastore; + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java new file mode 100644 index 000000000000..fac4b016766a --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/AutoscalingSpec.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.spec; + +import com.fasterxml.jackson.annotation.JsonPropertyDescription; +import io.fabric8.generator.annotation.Default; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; + +/** Autoscaling configuration for a Hive component. The operator scrapes JMX metrics directly from pods. */ +public record AutoscalingSpec( + @JsonPropertyDescription("Whether autoscaling is enabled for this component") + @Default("false") + Boolean enabled, + @JsonPropertyDescription("Minimum number of replicas (floor for scale-down). " + + "Set to 0 for scale-to-zero (LLAP, TezAM only; HS2 minimum is 1)") + @Default("0") + Integer minReplicas, + @JsonPropertyDescription("Threshold that triggers scale-up (component-specific: " + + "sessions per pod for HS2, request rate for HMS, busy slots per daemon for LLAP). " + + "Not used by TezAM (demand-based: 1 TezAM per session).") + @Default("80") + Integer scaleUpThreshold, + @JsonPropertyDescription("Stabilization window in seconds for scale-up decisions. " + + "Picks the highest recommendation within this window to prevent flapping.") + @Default("60") + Integer scaleUpStabilizationSeconds, + @JsonPropertyDescription("Stabilization window in seconds for scale-down decisions. " + + "How long metrics must consistently indicate fewer replicas before " + + "scale-down occurs. Also acts as the cooldown between consecutive scale-downs.") + @Default("600") + Integer scaleDownStabilizationSeconds, + @JsonPropertyDescription("Maximum time in seconds to wait for graceful drain " + + "during scale-down before the pod is forcibly terminated. " + + "The pod terminates immediately once sessions/connections drain to 0; " + + "this value is only the upper safety cap.") + @Default("3600") + Integer gracePeriodSeconds, + @JsonPropertyDescription("How often (seconds) the operator scrapes JMX metrics from pods. " + + "Lower values make autoscaling react faster.") + @Default("10") + Integer metricsScrapeIntervalSeconds, + @JsonPropertyDescription("CPU percentage (0-100) that triggers scale-up. " + + "Only applies to HS2 and HMS. Set to 0 to disable CPU-based scaling.") + @Default("90") + Integer cpuScaleUpThreshold, + @JsonPropertyDescription("CPU percentage (0-100) below which scale-down is considered. " + + "Only applies to HS2 and HMS.") + @Default("30") + Integer cpuScaleDownThreshold, + @JsonPropertyDescription("Port on which the Prometheus JMX Exporter serves metrics. " + + "The operator scrapes this port on each pod for autoscaling decisions.") + @Default("9404") + Integer metricsPort) { + + public AutoscalingSpec { + enabled = enabled != null ? enabled : false; + minReplicas = minReplicas != null ? minReplicas : 0; + scaleUpThreshold = scaleUpThreshold != null ? scaleUpThreshold : 80; + scaleUpStabilizationSeconds = scaleUpStabilizationSeconds != null ? scaleUpStabilizationSeconds : 60; + scaleDownStabilizationSeconds = scaleDownStabilizationSeconds != null ? scaleDownStabilizationSeconds : 600; + gracePeriodSeconds = gracePeriodSeconds != null ? gracePeriodSeconds : 3600; + metricsScrapeIntervalSeconds = metricsScrapeIntervalSeconds != null ? metricsScrapeIntervalSeconds : 10; + cpuScaleUpThreshold = cpuScaleUpThreshold != null ? cpuScaleUpThreshold : 90; + cpuScaleDownThreshold = cpuScaleDownThreshold != null ? cpuScaleDownThreshold : 30; + metricsPort = metricsPort != null ? metricsPort : ConfigUtils.PROMETHEUS_JMX_EXPORTER_PORT; + } + + public boolean isEnabled() { + return enabled; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java index 78164fb32de6..5f00a5c1dde5 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/HiveServer2Spec.java @@ -51,7 +51,9 @@ public record HiveServer2Spec( @JsonPropertyDescription("Readiness probe configuration") ProbeSpec readinessProbe, @JsonPropertyDescription("Liveness probe configuration") - ProbeSpec livenessProbe) { + ProbeSpec livenessProbe, + @JsonPropertyDescription("Autoscaling configuration (operator-driven, no external dependencies)") + AutoscalingSpec autoscaling) { public HiveServer2Spec { replicas = replicas != null ? replicas : 1; @@ -59,5 +61,7 @@ public record HiveServer2Spec( extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); externalJars = externalJars != null ? externalJars : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 1, 80, 60, 600, 300, 10, 90, 30, null); } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java index 17ff5967ff9a..9daa5e68c5fd 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/LlapSpec.java @@ -20,16 +20,22 @@ import java.util.List; import java.util.Map; +import java.util.Objects; import com.fasterxml.jackson.annotation.JsonPropertyDescription; import io.fabric8.crd.generator.annotation.PreserveUnknownFields; import io.fabric8.crd.generator.annotation.SchemaFrom; import io.fabric8.generator.annotation.Default; +import io.fabric8.generator.annotation.Required; import io.fabric8.kubernetes.api.model.Volume; import io.fabric8.kubernetes.api.model.VolumeMount; /** Configuration for LLAP (Live Long and Process) daemons. */ public record LlapSpec( + @Required + @JsonPropertyDescription("Unique name for this LLAP cluster (e.g. llap0, llap1). " + + "Used as the ZooKeeper registration namespace and Kubernetes resource suffix.") + String name, @JsonPropertyDescription("Number of replicas") @Default("1") Integer replicas, @@ -52,19 +58,51 @@ public record LlapSpec( @JsonPropertyDescription("Memory in MB per LLAP daemon instance") @Default("1024") Integer memoryMb, - @JsonPropertyDescription("LLAP service hosts identifier for ZooKeeper registration") + @JsonPropertyDescription("LLAP service hosts identifier for ZooKeeper registration. " + + "Defaults to @{name} (e.g. @llap0).") String serviceHosts, @JsonPropertyDescription("Readiness probe configuration") - ProbeSpec readinessProbe) { + ProbeSpec readinessProbe, + @JsonPropertyDescription("Autoscaling configuration (operator-driven, no external dependencies)") + AutoscalingSpec autoscaling, + @JsonPropertyDescription("Per-LLAP TezAM configuration. Each LLAP cluster gets its own TezAM " + + "with independent replica count and autoscaling.") + LlapTezAmSpec tezAm) { + + /** Per-LLAP-cluster TezAM replica and autoscaling overrides. */ + public record LlapTezAmSpec( + @JsonPropertyDescription("Max number of TezAM replicas for this LLAP cluster") + @Default("1") + Integer replicas, + @JsonPropertyDescription("Autoscaling configuration for this LLAP cluster's TezAM") + AutoscalingSpec autoscaling) { + + public LlapTezAmSpec { + replicas = replicas != null ? replicas : 1; + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 0, 0, 60, 600, 120, 10, 0, 0, null); + } + } + + private static final java.util.regex.Pattern VALID_NAME = + java.util.regex.Pattern.compile("[a-z0-9]+"); public LlapSpec { + Objects.requireNonNull(name, "llapClusters[].name is required"); + if (!VALID_NAME.matcher(name).matches()) { + throw new IllegalArgumentException( + "llapClusters[].name must be lowercase alphanumeric (no dashes, dots, or underscores): " + name); + } replicas = replicas != null ? replicas : 1; enabled = enabled != null ? enabled : true; executors = executors != null ? executors : 1; memoryMb = memoryMb != null ? memoryMb : 1024; - serviceHosts = serviceHosts != null ? serviceHosts : "@llap0"; + serviceHosts = serviceHosts != null ? serviceHosts : "@" + name; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 0, 1, 60, 900, 600, 10, 0, 0, null); + tezAm = tezAm != null ? tezAm : new LlapTezAmSpec(null, null); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java index 307c17221ee7..e1a0ac1452ae 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/MetastoreSpec.java @@ -56,7 +56,9 @@ public record MetastoreSpec( @JsonPropertyDescription("Readiness probe configuration") ProbeSpec readinessProbe, @JsonPropertyDescription("Liveness probe configuration") - ProbeSpec livenessProbe) { + ProbeSpec livenessProbe, + @JsonPropertyDescription("Autoscaling configuration (operator-driven, no external dependencies)") + AutoscalingSpec autoscaling) { public MetastoreSpec { replicas = replicas != null ? replicas : 1; @@ -66,6 +68,8 @@ public record MetastoreSpec( enabled = enabled != null ? enabled : true; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 1, 75, 60, 300, 60, 10, 90, 30, null); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java index a0494c2c5e73..606b0de14053 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/spec/TezAmSpec.java @@ -52,7 +52,9 @@ public record TezAmSpec( String scratchStorageSize, @JsonPropertyDescription("StorageClass for the shared scratch PVC. " + "Must support ReadWriteMany access. If null, uses cluster default.") - String scratchStorageClassName) { + String scratchStorageClassName, + @JsonPropertyDescription("Autoscaling configuration (operator-driven, no external dependencies)") + AutoscalingSpec autoscaling) { public TezAmSpec { replicas = replicas != null ? replicas : 1; @@ -60,6 +62,8 @@ public record TezAmSpec( scratchStorageSize = scratchStorageSize != null ? scratchStorageSize : "1Gi"; extraVolumes = extraVolumes != null ? extraVolumes : List.of(); extraVolumeMounts = extraVolumeMounts != null ? extraVolumeMounts : List.of(); + autoscaling = autoscaling != null ? autoscaling : new AutoscalingSpec( + false, 0, 0, 60, 600, 120, 10, 0, 0, null); } public boolean isEnabled() { diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/AutoscalingStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/AutoscalingStatus.java new file mode 100644 index 000000000000..5a074b7b222e --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/AutoscalingStatus.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.model.status; + +import java.util.Objects; + +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * Autoscaling status for a component, surfacing the operator's scaling decisions + * in the HiveCluster status subresource (replaces kubectl get hpa). + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class AutoscalingStatus { + + private int currentMetricValue; + private Integer scaleUpThreshold; + private Double currentCpuPercent; + private Integer cpuScaleUpThreshold; + private Integer cpuProposedReplicas; + private int proposedReplicas; + private String lastScaleTime; + + public int getCurrentMetricValue() { + return currentMetricValue; + } + + public void setCurrentMetricValue(int currentMetricValue) { + this.currentMetricValue = currentMetricValue; + } + + public Integer getScaleUpThreshold() { + return scaleUpThreshold; + } + + public void setScaleUpThreshold(Integer scaleUpThreshold) { + this.scaleUpThreshold = scaleUpThreshold; + } + + public Double getCurrentCpuPercent() { + return currentCpuPercent; + } + + public void setCurrentCpuPercent(Double currentCpuPercent) { + this.currentCpuPercent = currentCpuPercent; + } + + public Integer getCpuScaleUpThreshold() { + return cpuScaleUpThreshold; + } + + public void setCpuScaleUpThreshold(Integer cpuScaleUpThreshold) { + this.cpuScaleUpThreshold = cpuScaleUpThreshold; + } + + public Integer getCpuProposedReplicas() { + return cpuProposedReplicas; + } + + public void setCpuProposedReplicas(Integer cpuProposedReplicas) { + this.cpuProposedReplicas = cpuProposedReplicas; + } + + public int getProposedReplicas() { + return proposedReplicas; + } + + public void setProposedReplicas(int proposedReplicas) { + this.proposedReplicas = proposedReplicas; + } + + public String getLastScaleTime() { + return lastScaleTime; + } + + public void setLastScaleTime(String lastScaleTime) { + this.lastScaleTime = lastScaleTime; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + AutoscalingStatus that = (AutoscalingStatus) o; + return currentMetricValue == that.currentMetricValue + && Objects.equals(scaleUpThreshold, that.scaleUpThreshold) + && Objects.equals(currentCpuPercent, that.currentCpuPercent) + && Objects.equals(cpuScaleUpThreshold, that.cpuScaleUpThreshold) + && Objects.equals(cpuProposedReplicas, that.cpuProposedReplicas) + && proposedReplicas == that.proposedReplicas + && Objects.equals(lastScaleTime, that.lastScaleTime); + } + + @Override + public int hashCode() { + return Objects.hash(currentMetricValue, scaleUpThreshold, + currentCpuPercent, cpuScaleUpThreshold, cpuProposedReplicas, + proposedReplicas, lastScaleTime); + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java index 155c46f3a714..2b4f87b37e04 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/model/status/ComponentStatus.java @@ -23,24 +23,43 @@ */ public class ComponentStatus { + private int maxReplicas; + private int minReplicas; + private int currentReplicas; private int readyReplicas; - private int desiredReplicas; private String phase; + private AutoscalingStatus autoscaling; - public int getReadyReplicas() { - return readyReplicas; + public int getMaxReplicas() { + return maxReplicas; } - public void setReadyReplicas(int readyReplicas) { - this.readyReplicas = readyReplicas; + public void setMaxReplicas(int maxReplicas) { + this.maxReplicas = maxReplicas; + } + + public int getMinReplicas() { + return minReplicas; + } + + public void setMinReplicas(int minReplicas) { + this.minReplicas = minReplicas; + } + + public int getCurrentReplicas() { + return currentReplicas; } - public int getDesiredReplicas() { - return desiredReplicas; + public void setCurrentReplicas(int currentReplicas) { + this.currentReplicas = currentReplicas; } - public void setDesiredReplicas(int desiredReplicas) { - this.desiredReplicas = desiredReplicas; + public int getReadyReplicas() { + return readyReplicas; + } + + public void setReadyReplicas(int readyReplicas) { + this.readyReplicas = readyReplicas; } public String getPhase() { @@ -51,6 +70,14 @@ public void setPhase(String phase) { this.phase = phase; } + public AutoscalingStatus getAutoscaling() { + return autoscaling; + } + + public void setAutoscaling(AutoscalingStatus autoscaling) { + this.autoscaling = autoscaling; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -60,12 +87,15 @@ public boolean equals(Object o) { return false; } ComponentStatus that = (ComponentStatus) o; - return readyReplicas == that.readyReplicas && desiredReplicas == that.desiredReplicas && java.util.Objects.equals( - phase, that.phase); + return maxReplicas == that.maxReplicas && minReplicas == that.minReplicas + && currentReplicas == that.currentReplicas && readyReplicas == that.readyReplicas + && java.util.Objects.equals(phase, that.phase) + && java.util.Objects.equals(autoscaling, that.autoscaling); } @Override public int hashCode() { - return java.util.Objects.hash(readyReplicas, desiredReplicas, phase); + return java.util.Objects.hash(maxReplicas, minReplicas, currentReplicas, + readyReplicas, phase, autoscaling); } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java index 20332cb4127c..1bdc6a4ebe27 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveClusterReconciler.java @@ -18,48 +18,44 @@ package org.apache.hive.kubernetes.operator.reconciler; +import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Objects; -import java.util.function.Function; +import java.util.Set; import io.fabric8.kubernetes.api.model.Condition; import io.fabric8.kubernetes.api.model.HasMetadata; import io.fabric8.kubernetes.api.model.apps.Deployment; import io.fabric8.kubernetes.api.model.apps.StatefulSet; import io.fabric8.kubernetes.api.model.batch.v1.Job; +import io.fabric8.kubernetes.client.KubernetesClient; import io.javaoperatorsdk.operator.api.reconciler.Context; +import io.javaoperatorsdk.operator.api.reconciler.Cleaner; import io.javaoperatorsdk.operator.api.reconciler.ControllerConfiguration; +import io.javaoperatorsdk.operator.api.reconciler.DeleteControl; import io.javaoperatorsdk.operator.api.reconciler.ErrorStatusUpdateControl; import io.javaoperatorsdk.operator.api.reconciler.Reconciler; import io.javaoperatorsdk.operator.api.reconciler.UpdateControl; -import io.javaoperatorsdk.operator.api.reconciler.Workflow; -import io.javaoperatorsdk.operator.api.reconciler.dependent.Dependent; -import org.apache.hive.kubernetes.operator.dependent.HadoopConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2ConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; -import org.apache.hive.kubernetes.operator.dependent.HiveServer2ServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.LlapStatefulSetDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreConfigMapDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; -import org.apache.hive.kubernetes.operator.dependent.MetastoreServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; -import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; -import org.apache.hive.kubernetes.operator.dependent.TezAmServiceDependent; -import org.apache.hive.kubernetes.operator.dependent.TezAmStatefulSetDependent; -import org.apache.hive.kubernetes.operator.dependent.condition.HiveServer2Precondition; -import org.apache.hive.kubernetes.operator.dependent.condition.LlapEnabledCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreEnabledCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.MetastoreReadyCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.SchemaJobCompletedCondition; -import org.apache.hive.kubernetes.operator.dependent.condition.TezAmEnabledCondition; +import org.apache.hive.kubernetes.operator.autoscaling.BackgroundMetricsScraper; +import org.apache.hive.kubernetes.operator.autoscaling.HiveClusterAutoscaler; +import org.apache.hive.kubernetes.operator.autoscaling.MetricsCache; +import org.apache.hive.kubernetes.operator.autoscaling.MetricsScraper; +import org.apache.hive.kubernetes.operator.autoscaling.PodMetrics; +import org.apache.hive.kubernetes.operator.dependent.LlapResourceBuilder; import org.apache.hive.kubernetes.operator.model.HiveCluster; +import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; import org.apache.hive.kubernetes.operator.model.HiveClusterStatus; +import org.apache.hive.kubernetes.operator.model.spec.AutoSuspendSpec; +import org.apache.hive.kubernetes.operator.model.spec.LlapSpec; +import org.apache.hive.kubernetes.operator.model.status.AutoscalingStatus; import org.apache.hive.kubernetes.operator.model.status.ComponentStatus; +import org.apache.hive.kubernetes.operator.util.ConfigUtils; +import org.apache.hive.kubernetes.operator.util.Labels; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -68,66 +64,148 @@ * Orchestrates all dependent resources with proper dependency ordering. */ @ControllerConfiguration -@Workflow(dependents = { - // --- ConfigMap dependents --- - @Dependent(name = "hadoop-configmap", type = HadoopConfigMapDependent.class), - @Dependent(name = "metastore-configmap", type = MetastoreConfigMapDependent.class, - activationCondition = MetastoreEnabledCondition.class), - @Dependent(name = "hiveserver2-configmap", type = HiveServer2ConfigMapDependent.class), - // --- Job dependents --- - @Dependent(name = "schema-init-job", type = SchemaInitJobDependent.class, dependsOn = {"metastore-configmap", - "hadoop-configmap"}, readyPostcondition = SchemaJobCompletedCondition.class, - activationCondition = MetastoreEnabledCondition.class), - // --- Deployment dependents --- - @Dependent(name = "metastore-deployment", type = MetastoreDeploymentDependent.class, dependsOn = { - "schema-init-job"}, readyPostcondition = MetastoreReadyCondition.class, - activationCondition = MetastoreEnabledCondition.class), - // --- Service dependents --- - @Dependent(name = "metastore-service", type = MetastoreServiceDependent.class, dependsOn = { - "metastore-configmap"}, activationCondition = MetastoreEnabledCondition.class), - @Dependent(name = "hiveserver2-deployment", type = HiveServer2DeploymentDependent.class, dependsOn = { - "hiveserver2-configmap", "hadoop-configmap"}, reconcilePrecondition = HiveServer2Precondition.class), - @Dependent(name = "hiveserver2-service", type = HiveServer2ServiceDependent.class, dependsOn = { - "hiveserver2-configmap"}), - // --- LLAP (conditional) --- - @Dependent(name = "llap-configmap", type = LlapConfigMapDependent.class, - activationCondition = LlapEnabledCondition.class), - @Dependent(name = "llap-statefulset", type = LlapStatefulSetDependent.class, dependsOn = {"llap-configmap", - "hadoop-configmap"}, activationCondition = LlapEnabledCondition.class), - @Dependent(name = "llap-service", type = LlapServiceDependent.class, - activationCondition = LlapEnabledCondition.class), - // --- TezAM (conditional) --- - @Dependent(name = "scratch-pvc", type = ScratchPvcDependent.class, - activationCondition = TezAmEnabledCondition.class), - @Dependent(name = "tezam-service", type = TezAmServiceDependent.class, - activationCondition = TezAmEnabledCondition.class), - @Dependent(name = "tezam-statefulset", type = TezAmStatefulSetDependent.class, dependsOn = {"hiveserver2-configmap", - "hadoop-configmap", "tezam-service", "scratch-pvc"}, activationCondition = TezAmEnabledCondition.class)}) -public class HiveClusterReconciler implements Reconciler { +public class HiveClusterReconciler + implements Reconciler, Cleaner { private static final Logger LOG = LoggerFactory.getLogger(HiveClusterReconciler.class); + private volatile HiveClusterAutoscaler autoscaler; + private volatile BackgroundMetricsScraper bgScraper; + @Override public UpdateControl reconcile(HiveCluster resource, Context context) { - LOG.debug("Reconciling HiveCluster: {}/{}", resource.getMetadata().getNamespace(), - resource.getMetadata().getName()); + LOG.debug("Reconciling HiveCluster: {}/{} generation={}", + resource.getMetadata().getNamespace(), + resource.getMetadata().getName(), + resource.getMetadata().getGeneration()); HiveClusterStatus existingStatus = resource.getStatus(); HiveClusterStatus newStatus = buildStatus(resource, context, existingStatus); - if (Objects.equals(existingStatus, newStatus)) { - return UpdateControl.noUpdate(); + // --- Suspend / Wake evaluation (works regardless of autoscaling) --- + KubernetesClient client = context.getClient(); + SuspendAction action = evaluateSuspendState(resource, existingStatus, client); + int rescheduleSeconds = 0; + + switch (action) { + case SUSPEND_NOW: + suspendCluster(resource); + boolean manual = resource.getSpec().suspend(); + // Auto-suspend: set spec.suspend=true so the cluster stays suspended + // until the user explicitly sets it to false. + // The spec patch triggers a watch event → immediate re-reconcile where + // STAY_SUSPENDED sets the status cleanly. + if (!manual) { + patchSuspendSpec(client, resource, true); + return UpdateControl.noUpdate(); + } + String reason = "ManualSuspend"; + newStatus.setClusterPhase("Suspended"); + newStatus.setSuspendedSince(Instant.now().toString()); + newStatus.setIdleSince(null); + newStatus.getConditions().add(buildCondition("Suspended", "True", reason, + "Cluster suspended via spec.suspend", + existingStatus != null ? existingStatus.getConditions() : Collections.emptyList())); + rescheduleSeconds = 30; + break; + + case STAY_SUSPENDED: + newStatus.setClusterPhase("Suspended"); + newStatus.setSuspendedSince(existingStatus != null ? existingStatus.getSuspendedSince() : null); + newStatus.setIdleSince(null); + newStatus.getConditions().add(buildCondition("Suspended", "True", "Suspended", + "Cluster is suspended", + existingStatus != null ? existingStatus.getConditions() : Collections.emptyList())); + rescheduleSeconds = 30; + break; + + case WAKE: + wakeCluster(resource); + newStatus.setClusterPhase("Running"); + newStatus.setSuspendedSince(null); + newStatus.setIdleSince(null); + newStatus.getConditions().add(buildCondition("Suspended", "False", "Woken", + "Cluster woken up", + existingStatus != null ? existingStatus.getConditions() : Collections.emptyList())); + rescheduleSeconds = anyAutoscalingEnabled(resource.getSpec()) + ? getMinScrapeInterval(resource.getSpec()) : 30; + break; + + case IDLE_START: + newStatus.setClusterPhase("Idle"); + newStatus.setIdleSince(Instant.now().toString()); + newStatus.setIdleForMinutes(0); + newStatus.setSuspendedSince(null); + break; + + case IDLE_WAITING: + String idleSince = existingStatus != null ? existingStatus.getIdleSince() : null; + newStatus.setClusterPhase("Idle"); + newStatus.setIdleSince(idleSince); + newStatus.setIdleForMinutes(idleSince != null + ? (int) Duration.between(Instant.parse(idleSince), Instant.now()).toMinutes() : 0); + newStatus.setSuspendedSince(null); + break; + + case RUNNING: + default: + newStatus.setClusterPhase("Running"); + newStatus.setIdleSince(null); + newStatus.setIdleForMinutes(null); + newStatus.setSuspendedSince(null); + break; } + // --- Imperative LLAP cluster management --- + if (action != SuspendAction.STAY_SUSPENDED && action != SuspendAction.SUSPEND_NOW) { + reconcileLlapClusters(resource, client); + } + + // --- Autoscaling evaluation (only when enabled and not suspended) --- + if (rescheduleSeconds == 0 && anyAutoscalingEnabled(resource.getSpec())) { + HiveClusterAutoscaler scaler = getOrCreateAutoscaler(client); + HiveClusterAutoscaler.AutoscalingEvaluation eval = scaler.evaluate(resource, client); + for (Map.Entry entry : eval.patches().entrySet()) { + patchReplicas(client, resource, entry.getKey(), entry.getValue()); + } + applyAutoscalingStatuses(newStatus, eval.statuses()); + // Reschedule sooner if a two-phase scale-down is pending annotation propagation + rescheduleSeconds = scaler.hasPendingScaleDowns() + ? 2 : getMinScrapeInterval(resource.getSpec()); + } + + // --- Single exit point for status update --- + boolean statusNowChanged = !statusEqualsIgnoringTimestamps(existingStatus, newStatus); + if (!statusNowChanged && rescheduleSeconds == 0) { + return UpdateControl.noUpdate(); + } resource.setStatus(newStatus); + if (rescheduleSeconds > 0) { + return UpdateControl.patchStatus(resource) + .rescheduleAfter(Duration.ofSeconds(rescheduleSeconds)); + } return UpdateControl.patchStatus(resource); } + @Override + public DeleteControl cleanup(HiveCluster resource, Context context) { + String ns = resource.getMetadata().getNamespace(); + String name = resource.getMetadata().getName(); + if (autoscaler != null) { + autoscaler.cleanupCluster(ns, name); + } + if (bgScraper != null) { + bgScraper.unregisterCluster(ns, name); + } + LOG.info("Cleaned up autoscaler state for deleted cluster {}/{}", ns, name); + return DeleteControl.defaultDelete(); + } + @Override public ErrorStatusUpdateControl updateErrorStatus(HiveCluster resource, Context context, Exception e) { - LOG.error("Error reconciling HiveCluster: {}/{}", resource.getMetadata().getNamespace(), - resource.getMetadata().getName(), e); + LOG.error("Error reconciling HiveCluster: {}/{} - {}", resource.getMetadata().getNamespace(), + resource.getMetadata().getName(), e.getMessage(), e); HiveClusterStatus status = resource.getStatus() != null ? resource.getStatus() : new HiveClusterStatus(); @@ -172,16 +250,15 @@ private HiveClusterStatus buildStatus(HiveCluster resource, // Metastore status boolean metastoreReady; if (resource.getSpec().metastore().isEnabled()) { + int msMin = resource.getSpec().metastore().autoscaling().isEnabled() + ? Math.max(1, resource.getSpec().metastore().autoscaling().minReplicas()) + : resource.getSpec().metastore().replicas(); ComponentStatus metastoreStatus = buildComponentStatus(context, Deployment.class, resource.getMetadata().getName() + "-metastore", - resource.getSpec().metastore().replicas(), - d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? - d.getStatus().getReadyReplicas() : - 0); + resource.getSpec().metastore().replicas(), msMin); status.setMetastore(metastoreStatus); - metastoreReady = metastoreStatus.getReadyReplicas() >= metastoreStatus.getDesiredReplicas() - && metastoreStatus.getDesiredReplicas() > 0; + metastoreReady = metastoreStatus.getReadyReplicas() >= msMin && msMin > 0; conditions.add(buildCondition("MetastoreReady", metastoreReady ? "True" : "False", metastoreReady ? "DeploymentReady" : "DeploymentNotReady", @@ -193,34 +270,52 @@ private HiveClusterStatus buildStatus(HiveCluster resource, } // HiveServer2 status + int hs2Min = resource.getSpec().hiveServer2().autoscaling().isEnabled() + ? Math.max(1, resource.getSpec().hiveServer2().autoscaling().minReplicas()) + : resource.getSpec().hiveServer2().replicas(); ComponentStatus hs2Status = buildComponentStatus(context, Deployment.class, resource.getMetadata().getName() + "-hiveserver2", - resource.getSpec().hiveServer2().replicas(), - d -> d.getStatus() != null && d.getStatus().getReadyReplicas() != null ? d.getStatus().getReadyReplicas() : 0); + resource.getSpec().hiveServer2().replicas(), hs2Min); status.setHiveServer2(hs2Status); - boolean hs2Ready = - hs2Status.getReadyReplicas() >= hs2Status.getDesiredReplicas() && hs2Status.getDesiredReplicas() > 0; + boolean hs2Ready = hs2Status.getReadyReplicas() >= hs2Min; conditions.add(buildCondition("HiveServer2Ready", hs2Ready ? "True" : "False", hs2Ready ? "DeploymentReady" : "DeploymentNotReady", hs2Ready ? "HiveServer2 is ready" : "HiveServer2 not yet ready", existingConditions)); - // LLAP status (optional) - if (resource.getSpec().llap().isEnabled()) { - status.setLlap(buildComponentStatus(context, StatefulSet.class, - resource.getMetadata().getName() + "-llap", - resource.getSpec().llap().replicas(), - s -> s.getStatus() != null && s.getStatus().getReadyReplicas() != null ? - s.getStatus().getReadyReplicas() : 0)); + // LLAP clusters status + Map llapStatuses = new java.util.LinkedHashMap<>(); + for (var llapSpec : resource.getSpec().llapClusters()) { + if (!llapSpec.isEnabled()) { + continue; + } + String ssName = resource.getMetadata().getName() + "-" + llapSpec.name(); + int llapMin = llapSpec.autoscaling().isEnabled() + ? llapSpec.autoscaling().minReplicas() + : llapSpec.replicas(); + llapStatuses.put(llapSpec.name(), + buildComponentStatus(context, StatefulSet.class, ssName, llapSpec.replicas(), llapMin)); } + status.setLlapClusters(llapStatuses); - // TezAM status (optional) + // Per-LLAP TezAM status (one TezAM per LLAP cluster) if (resource.getSpec().tezAm().isEnabled()) { - status.setTezAm(buildComponentStatus(context, StatefulSet.class, resource.getMetadata().getName() + "-tezam", - resource.getSpec().tezAm().replicas(), - s -> s.getStatus() != null && - s.getStatus().getReadyReplicas() != null ? s.getStatus().getReadyReplicas() : 0)); + Map tezAmStatuses = new java.util.LinkedHashMap<>(); + for (var llapSpec : resource.getSpec().llapClusters()) { + if (!llapSpec.isEnabled()) { + continue; + } + LlapSpec.LlapTezAmSpec perLlapTezAm = llapSpec.tezAm(); + String tezAmSsName = LlapResourceBuilder.tezAmResourceName(resource, llapSpec); + int tezAmMin = perLlapTezAm.autoscaling().isEnabled() + ? perLlapTezAm.autoscaling().minReplicas() + : perLlapTezAm.replicas(); + tezAmStatuses.put(llapSpec.name(), + buildComponentStatus(context, StatefulSet.class, tezAmSsName, + perLlapTezAm.replicas(), tezAmMin)); + } + status.setTezAmClusters(tezAmStatuses); } // Overall Ready condition @@ -240,19 +335,51 @@ private HiveClusterStatus buildStatus(HiveCluster resource, */ private ComponentStatus buildComponentStatus( Context context, Class resourceClass, String expectedResourceName, - int desiredReplicas, Function readyExtractor) { + int maxReplicas, int minReplicas) { ComponentStatus cs = new ComponentStatus(); - cs.setDesiredReplicas(desiredReplicas); + cs.setMaxReplicas(maxReplicas); + cs.setMinReplicas(minReplicas); - int ready = context.getSecondaryResources(resourceClass).stream() + // Read actual spec.replicas and readyReplicas from the live workload + var workload = context.getSecondaryResources(resourceClass).stream() .filter(r -> r.getMetadata().getName().equals(expectedResourceName)) - .findFirst() - .map(readyExtractor) - .orElse(0); - + .findFirst(); + + int currentReplicas = workload.map(r -> { + if (r instanceof Deployment d) { + return d.getSpec() != null && d.getSpec().getReplicas() != null + ? d.getSpec().getReplicas() : 0; + } else if (r instanceof StatefulSet s) { + return s.getSpec() != null && s.getSpec().getReplicas() != null + ? s.getSpec().getReplicas() : 0; + } + return 0; + }).orElse(0); + + int ready = workload.map(r -> { + if (r instanceof Deployment d) { + return d.getStatus() != null && d.getStatus().getReadyReplicas() != null + ? d.getStatus().getReadyReplicas() : 0; + } else if (r instanceof StatefulSet s) { + return s.getStatus() != null && s.getStatus().getReadyReplicas() != null + ? s.getStatus().getReadyReplicas() : 0; + } + return 0; + }).orElse(0); + + cs.setCurrentReplicas(currentReplicas); cs.setReadyReplicas(ready); - cs.setPhase(ready >= desiredReplicas && desiredReplicas > 0 ? "Running" : "Pending"); + + if (currentReplicas == 0 && ready == 0) { + cs.setPhase("Idle"); + } else if (ready >= currentReplicas && currentReplicas > 0) { + cs.setPhase("Running"); + } else if (currentReplicas == 0 && ready > 0) { + cs.setPhase("ScalingDown"); + } else { + cs.setPhase("Pending"); + } return cs; } @@ -265,14 +392,590 @@ private Condition buildCondition(String type, String conditionStatus, condition.setReason(reason); condition.setMessage(message); - // Preserve lastTransitionTime when the condition status has not changed + // Preserve lastTransitionTime from ANY existing condition of this type + // (regardless of status) to avoid generating new timestamps on every + // reconcile which would cause an infinite status-patch loop. String preservedTime = existingConditions.stream() - .filter(c -> type.equals(c.getType()) && conditionStatus.equals(c.getStatus())) + .filter(c -> type.equals(c.getType())) .map(Condition::getLastTransitionTime) .findFirst() .orElse(null); - condition.setLastTransitionTime(preservedTime != null ? preservedTime : Instant.now().toString()); + if (preservedTime != null) { + // Only update the timestamp if the status actually changed + String oldStatus = existingConditions.stream() + .filter(c -> type.equals(c.getType())) + .map(Condition::getStatus) + .findFirst() + .orElse(null); + if (conditionStatus.equals(oldStatus)) { + condition.setLastTransitionTime(preservedTime); + } else { + condition.setLastTransitionTime(Instant.now().toString()); + } + } else { + condition.setLastTransitionTime(Instant.now().toString()); + } return condition; } + + /** + * Compares two HiveClusterStatus objects ignoring condition timestamps. + * This prevents infinite reconciliation loops caused by informer cache lag: + * after a status patch, the informer may still have the old status, causing + * the next reconcile to see a "different" status (new timestamp vs old) and + * patch again, perpetuating the loop. + */ + private boolean statusEqualsIgnoringTimestamps(HiveClusterStatus a, HiveClusterStatus b) { + if (a == b) { + return true; + } + if (a == null || b == null) { + return false; + } + if (!Objects.equals(a.getObservedGeneration(), b.getObservedGeneration())) { + return false; + } + if (!Objects.equals(a.getMetastore(), b.getMetastore())) { + return false; + } + if (!Objects.equals(a.getHiveServer2(), b.getHiveServer2())) { + return false; + } + if (!Objects.equals(a.getLlapClusters(), b.getLlapClusters())) { + return false; + } + if (!Objects.equals(a.getTezAmClusters(), b.getTezAmClusters())) { + return false; + } + // Compare conditions by type+status+reason+message, ignoring lastTransitionTime + return conditionsEqualIgnoringTime(a.getConditions(), b.getConditions()); + } + + private boolean conditionsEqualIgnoringTime(List a, List b) { + if (a == b) { + return true; + } + if (a == null || b == null) { + return a == null && b == null; + } + if (a.size() != b.size()) { + return false; + } + for (int i = 0; i < a.size(); i++) { + Condition ca = a.get(i); + Condition cb = b.get(i); + if (!Objects.equals(ca.getType(), cb.getType()) + || !Objects.equals(ca.getStatus(), cb.getStatus()) + || !Objects.equals(ca.getReason(), cb.getReason()) + || !Objects.equals(ca.getMessage(), cb.getMessage())) { + return false; + } + } + return true; + } + + private void applyAutoscalingStatuses(HiveClusterStatus status, + Map statuses) { + if (statuses.containsKey(ConfigUtils.COMPONENT_HIVESERVER2) && status.getHiveServer2() != null) { + status.getHiveServer2().setAutoscaling(statuses.get(ConfigUtils.COMPONENT_HIVESERVER2)); + } + if (statuses.containsKey(ConfigUtils.COMPONENT_METASTORE) && status.getMetastore() != null) { + status.getMetastore().setAutoscaling(statuses.get(ConfigUtils.COMPONENT_METASTORE)); + } + for (Map.Entry entry : status.getLlapClusters().entrySet()) { + String llapKey = ConfigUtils.llapComponentKey(entry.getKey()); + if (statuses.containsKey(llapKey)) { + entry.getValue().setAutoscaling(statuses.get(llapKey)); + } + } + for (Map.Entry entry : status.getTezAmClusters().entrySet()) { + String tezAmKey = ConfigUtils.tezAmComponentKey(entry.getKey()); + if (statuses.containsKey(tezAmKey)) { + entry.getValue().setAutoscaling(statuses.get(tezAmKey)); + } + } + } + + // --- Autoscaling helpers --- + + private HiveClusterAutoscaler getOrCreateAutoscaler(KubernetesClient client) { + if (autoscaler == null) { + MetricsScraper scraper = new MetricsScraper(client); + MetricsCache metricsCache = new MetricsCache(); + bgScraper = new BackgroundMetricsScraper(scraper, metricsCache); + autoscaler = new HiveClusterAutoscaler(scraper, bgScraper, metricsCache); + } + return autoscaler; + } + + private static boolean anyAutoscalingEnabled(HiveClusterSpec spec) { + if (spec.hiveServer2().autoscaling().isEnabled()) { + return true; + } + if (spec.metastore().isEnabled() && spec.metastore().autoscaling().isEnabled()) { + return true; + } + for (var llap : spec.llapClusters()) { + if (llap.isEnabled() && llap.autoscaling().isEnabled()) { + return true; + } + if (llap.isEnabled() && spec.tezAm().isEnabled() && llap.tezAm().autoscaling().isEnabled()) { + return true; + } + } + return false; + } + + private static int getMinScrapeInterval(HiveClusterSpec spec) { + int min = Integer.MAX_VALUE; + if (spec.hiveServer2().autoscaling().isEnabled()) { + min = Math.min(min, spec.hiveServer2().autoscaling().metricsScrapeIntervalSeconds()); + } + if (spec.metastore().isEnabled() && spec.metastore().autoscaling().isEnabled()) { + min = Math.min(min, spec.metastore().autoscaling().metricsScrapeIntervalSeconds()); + } + for (var llap : spec.llapClusters()) { + if (llap.isEnabled() && llap.autoscaling().isEnabled()) { + min = Math.min(min, llap.autoscaling().metricsScrapeIntervalSeconds()); + } + if (llap.isEnabled() && spec.tezAm().isEnabled() && llap.tezAm().autoscaling().isEnabled()) { + min = Math.min(min, llap.tezAm().autoscaling().metricsScrapeIntervalSeconds()); + } + } + return min == Integer.MAX_VALUE ? 10 : min; + } + + private void patchReplicas(KubernetesClient client, HiveCluster resource, + String component, int replicas) { + String namespace = resource.getMetadata().getNamespace(); + // Component keys use prefixes: "llap-{name}" → workload "{cluster}-{name}", + // "tezam-{name}" → workload "{cluster}-tezam-{name}". + String workloadName; + if (component.startsWith(ConfigUtils.COMPONENT_LLAP + "-")) { + String llapName = component.substring(ConfigUtils.COMPONENT_LLAP.length() + 1); + workloadName = resource.getMetadata().getName() + "-" + llapName; + } else if (component.startsWith(ConfigUtils.COMPONENT_TEZAM + "-")) { + String llapName = component.substring(ConfigUtils.COMPONENT_TEZAM.length() + 1); + workloadName = resource.getMetadata().getName() + "-tezam-" + llapName; + } else { + workloadName = resource.getMetadata().getName() + "-" + component; + } + try { + if (component.startsWith(ConfigUtils.COMPONENT_LLAP + "-") + || component.startsWith(ConfigUtils.COMPONENT_TEZAM + "-")) { + client.apps().statefulSets().inNamespace(namespace).withName(workloadName).scale(replicas); + } else { + client.apps().deployments().inNamespace(namespace).withName(workloadName).scale(replicas); + } + LOG.info("Scaled {}/{} to {} replicas", namespace, workloadName, replicas); + } catch (Exception e) { + LOG.debug("Could not scale {}/{}: {}", namespace, workloadName, e.getMessage()); + } + } + + private void patchSuspendSpec(KubernetesClient client, HiveCluster resource, boolean suspend) { + String ns = resource.getMetadata().getNamespace(); + String name = resource.getMetadata().getName(); + client.resources(HiveCluster.class).inNamespace(ns).withName(name) + .edit(hc -> { + // Records are immutable so we build a new spec with the updated suspend value + HiveClusterSpec oldSpec = hc.getSpec(); + HiveClusterSpec newSpec = new HiveClusterSpec( + oldSpec.image(), oldSpec.imagePullPolicy(), oldSpec.metastore(), + oldSpec.hiveServer2(), oldSpec.llapClusters(), oldSpec.llapClusterRouting(), + oldSpec.tezAm(), oldSpec.zookeeper(), + oldSpec.hadoop(), oldSpec.envVars(), oldSpec.externalJars(), + oldSpec.volumes(), oldSpec.volumeMounts(), oldSpec.autoSuspend(), suspend); + hc.setSpec(newSpec); + return hc; + }); + LOG.info("Patched spec.suspend={} on {}/{}", suspend, ns, name); + } + + // --- Imperative LLAP cluster management --- + + /** + * Creates or updates LLAP cluster resources (ConfigMap, Service, StatefulSet, PDB) + * imperatively via server-side apply. Also garbage-collects resources for removed clusters. + */ + private void reconcileLlapClusters(HiveCluster resource, KubernetesClient client) { + String ns = resource.getMetadata().getNamespace(); + String clusterName = resource.getMetadata().getName(); + Set desiredNames = new HashSet<>(); + + for (LlapSpec llapSpec : resource.getSpec().llapClusters()) { + if (!llapSpec.isEnabled()) { + continue; + } + desiredNames.add(llapSpec.name()); + int replicas = resolveLlapReplicaCount(resource, llapSpec, ns, clusterName); + + // --- LLAP resources --- + client.configMaps().inNamespace(ns) + .resource(LlapResourceBuilder.buildConfigMap(resource, llapSpec)) + .serverSideApply(); + client.services().inNamespace(ns) + .resource(LlapResourceBuilder.buildService(resource, llapSpec)) + .serverSideApply(); + + // Always include replicas in SSA with forceConflicts to avoid the + // brief scale-up-then-down on first create (K8s defaults to 1 if omitted). + // resolveLlapReplicaCount already reads the autoscaler's managed value, + // so this is always the correct replica count. + client.apps().statefulSets().inNamespace(ns) + .resource(LlapResourceBuilder.buildStatefulSet(resource, llapSpec, replicas)) + .forceConflicts() + .serverSideApply(); + if (llapSpec.autoscaling().isEnabled()) { + client.policy().v1().podDisruptionBudget().inNamespace(ns) + .resource(LlapResourceBuilder.buildPdb(resource, llapSpec)) + .serverSideApply(); + } + + // --- Per-LLAP TezAM resources (one TezAM per LLAP cluster) --- + if (resource.getSpec().tezAm().isEnabled()) { + int tezAmReplicas = resolveTezAmReplicaCount(resource, ns, clusterName, llapSpec); + client.configMaps().inNamespace(ns) + .resource(LlapResourceBuilder.buildTezAmConfigMap(resource, llapSpec)) + .serverSideApply(); + client.services().inNamespace(ns) + .resource(LlapResourceBuilder.buildTezAmService(resource, llapSpec)) + .serverSideApply(); + client.apps().statefulSets().inNamespace(ns) + .resource(LlapResourceBuilder.buildTezAmStatefulSet(resource, llapSpec, tezAmReplicas)) + .forceConflicts() + .serverSideApply(); + if (llapSpec.tezAm().autoscaling().isEnabled()) { + client.policy().v1().podDisruptionBudget().inNamespace(ns) + .resource(LlapResourceBuilder.buildTezAmPdb(resource, llapSpec)) + .serverSideApply(); + } + } + } + + garbageCollectLlapResources(client, ns, clusterName, desiredNames); + } + + /** + * Resolves the replica count for a LLAP cluster, respecting autoscaler-managed values + * and suspend state. + */ + private int resolveLlapReplicaCount(HiveCluster resource, + LlapSpec llapSpec, String ns, String clusterName) { + if (resource.getSpec().suspend()) { + return 0; + } + String componentKey = ConfigUtils.llapComponentKey(llapSpec.name()); + Integer managed = HiveClusterAutoscaler.getManagedReplicas(ns, clusterName, componentKey); + if (managed != null) { + return managed; + } + // First reconcile before autoscaler runs: start at minReplicas if autoscaling enabled + if (llapSpec.autoscaling().isEnabled()) { + return llapSpec.autoscaling().minReplicas(); + } + return llapSpec.replicas(); + } + + /** + * Resolves the replica count for a per-LLAP TezAM cluster. + * TezAM follows its paired LLAP cluster's lifecycle. + */ + private int resolveTezAmReplicaCount(HiveCluster resource, + String ns, String clusterName, LlapSpec llapSpec) { + if (resource.getSpec().suspend()) { + return 0; + } + LlapSpec.LlapTezAmSpec tezAmSpec = llapSpec.tezAm(); + // Check if autoscaler has a managed value for this specific TezAM + String tezAmComponentKey = ConfigUtils.tezAmComponentKey(llapSpec.name()); + Integer tezAmManaged = HiveClusterAutoscaler.getManagedReplicas(ns, clusterName, tezAmComponentKey); + if (tezAmManaged != null) { + return tezAmManaged; + } + // TezAM follows LLAP's autoscaling gate: only run if LLAP is running. + String llapComponentKey = ConfigUtils.llapComponentKey(llapSpec.name()); + Integer llapManaged = HiveClusterAutoscaler.getManagedReplicas(ns, clusterName, llapComponentKey); + if (llapManaged != null && llapManaged == 0) { + return 0; + } + if (llapSpec.autoscaling().isEnabled() && llapManaged == null) { + // First reconcile, LLAP starts at minReplicas (likely 0) — TezAM matches + return llapSpec.autoscaling().minReplicas() > 0 + ? tezAmSpec.replicas() : 0; + } + return tezAmSpec.replicas(); + } + + /** + * Deletes LLAP and per-LLAP TezAM resources that belong to this HiveCluster + * but are no longer in the desired set of LLAP cluster names. + */ + private void garbageCollectLlapResources(KubernetesClient client, String ns, + String clusterName, Set desiredNames) { + Map llapSelector = Map.of( + Labels.MANAGED_BY, Labels.MANAGED_BY_VALUE, + Labels.APP_INSTANCE, clusterName, + Labels.APP_COMPONENT, ConfigUtils.COMPONENT_LLAP); + + // Find StatefulSets owned by this cluster with component=llap + client.apps().statefulSets().inNamespace(ns).withLabels(llapSelector).list().getItems() + .stream() + .filter(ss -> { + String llapName = ss.getMetadata().getLabels().get(Labels.LLAP_CLUSTER); + return llapName != null && !desiredNames.contains(llapName); + }) + .forEach(ss -> { + String llapName = ss.getMetadata().getLabels().get(Labels.LLAP_CLUSTER); + LOG.info("Garbage-collecting LLAP cluster '{}' resources in {}/{}", llapName, ns, clusterName); + client.apps().statefulSets().inNamespace(ns).withName(ss.getMetadata().getName()).delete(); + client.services().inNamespace(ns).withName(ss.getMetadata().getName()).delete(); + client.configMaps().inNamespace(ns) + .withName(ss.getMetadata().getName() + "-config").delete(); + client.policy().v1().podDisruptionBudget().inNamespace(ns) + .withName(ss.getMetadata().getName() + "-pdb").delete(); + }); + + // Garbage-collect per-LLAP TezAM resources + Map tezamSelector = Map.of( + Labels.MANAGED_BY, Labels.MANAGED_BY_VALUE, + Labels.APP_INSTANCE, clusterName, + Labels.APP_COMPONENT, ConfigUtils.COMPONENT_TEZAM); + + client.apps().statefulSets().inNamespace(ns).withLabels(tezamSelector).list().getItems() + .stream() + .filter(ss -> { + String llapName = ss.getMetadata().getLabels().get(Labels.LLAP_CLUSTER); + return llapName != null && !desiredNames.contains(llapName); + }) + .forEach(ss -> { + String llapName = ss.getMetadata().getLabels().get(Labels.LLAP_CLUSTER); + LOG.info("Garbage-collecting TezAM for LLAP cluster '{}' in {}/{}", llapName, ns, clusterName); + client.apps().statefulSets().inNamespace(ns).withName(ss.getMetadata().getName()).delete(); + client.services().inNamespace(ns).withName(ss.getMetadata().getName()).delete(); + client.configMaps().inNamespace(ns) + .withName(ss.getMetadata().getName() + "-config").delete(); + client.policy().v1().podDisruptionBudget().inNamespace(ns) + .withName(ss.getMetadata().getName() + "-pdb").delete(); + }); + } + + // --- Auto-Suspend / Wake --- + + enum SuspendAction { RUNNING, IDLE_START, IDLE_WAITING, SUSPEND_NOW, STAY_SUSPENDED, WAKE } + + private SuspendAction evaluateSuspendState(HiveCluster resource, + HiveClusterStatus existingStatus, KubernetesClient client) { + + // 1. Manual suspend: spec.suspend = true → suspend immediately + if (resource.getSpec().suspend()) { + if (existingStatus != null && "Suspended".equals(existingStatus.getClusterPhase())) { + return SuspendAction.STAY_SUSPENDED; + } + return SuspendAction.SUSPEND_NOW; + } + + // 2. Currently suspended and spec.suspend = false → wake + if (existingStatus != null && "Suspended".equals(existingStatus.getClusterPhase())) { + return SuspendAction.WAKE; + } + + // 3. Auto-suspend evaluation (only if enabled and all autoscaling is on) + AutoSuspendSpec autoSuspend = resource.getSpec().autoSuspend(); + if (!autoSuspend.isEnabled()) { + LOG.debug("Auto-suspend disabled"); + return SuspendAction.RUNNING; + } + if (!allAutoscalingEnabled(resource.getSpec())) { + LOG.debug("Auto-suspend skipped: not all components have autoscaling enabled"); + return SuspendAction.RUNNING; + } + + // 4. Check idle conditions + boolean allIdle = isClusterIdle(resource, client); + if (!allIdle) { + return SuspendAction.RUNNING; + } + + // 5. Check idle duration + String idleSince = existingStatus != null ? existingStatus.getIdleSince() : null; + if (idleSince == null) { + return SuspendAction.IDLE_START; + } + + Instant idleStart = Instant.parse(idleSince); + if (Duration.between(idleStart, Instant.now()).toMinutes() >= autoSuspend.idleTimeoutMinutes()) { + return SuspendAction.SUSPEND_NOW; + } + + return SuspendAction.IDLE_WAITING; + } + + + private boolean isClusterIdle(HiveCluster resource, KubernetesClient client) { + HiveClusterSpec spec = resource.getSpec(); + String ns = resource.getMetadata().getNamespace(); + String name = resource.getMetadata().getName(); + + // All LLAP clusters must be at minReplicas + for (var llap : spec.llapClusters()) { + if (llap.isEnabled() + && !isAtMinReplicas(client, ns, name + "-" + llap.name(), true, + llap.autoscaling().minReplicas())) { + return false; + } + } + if (spec.tezAm().isEnabled()) { + for (var llap : spec.llapClusters()) { + if (llap.isEnabled() + && !isAtMinReplicas(client, ns, name + "-tezam-" + llap.name(), true, + llap.tezAm().autoscaling().minReplicas())) { + return false; + } + } + } + if (!isAtMinReplicas(client, ns, name + "-" + ConfigUtils.COMPONENT_HIVESERVER2, false, + Math.max(1, spec.hiveServer2().autoscaling().minReplicas()))) { + return false; + } + + // HS2 must have 0 open sessions. + // If metrics scrape fails (empty list), assume NOT idle to prevent accidental suspend. + HiveClusterAutoscaler scaler = getOrCreateAutoscaler(client); + List hs2Metrics = scaler.getHs2MetricsFromCache(resource); + if (hs2Metrics.isEmpty()) { + LOG.debug("Idle check: HS2 metrics unavailable, assuming not idle"); + return false; + } + int totalSessions = hs2Metrics.stream() + .mapToInt(pm -> pm.metrics().getOrDefault("hs2_open_sessions", 0.0).intValue()) + .sum(); + if (totalSessions > 0) { + LOG.debug("Idle check failed: HS2 has {} open sessions", totalSessions); + return false; + } + + // HMS must be at minReplicas (only checked if includeMetastore=true) + if (spec.metastore().isEnabled() && spec.autoSuspend().includeMetastore() + && !isAtMinReplicas(client, ns, name + "-" + ConfigUtils.COMPONENT_METASTORE, false, + Math.max(1, spec.metastore().autoscaling().minReplicas()))) { + return false; + } + + return true; + } + + /** Returns true if the workload is absent or its replicas <= minReplicas. */ + private boolean isAtMinReplicas(KubernetesClient client, String ns, + String workloadName, boolean statefulSet, int minReplicas) { + try { + Integer currentReplicas = null; + if (statefulSet) { + var ss = client.apps().statefulSets().inNamespace(ns).withName(workloadName).get(); + if (ss != null && ss.getSpec() != null) { + currentReplicas = ss.getSpec().getReplicas(); + } + } else { + var deploy = client.apps().deployments().inNamespace(ns).withName(workloadName).get(); + if (deploy != null && deploy.getSpec() != null) { + currentReplicas = deploy.getSpec().getReplicas(); + } + } + if (currentReplicas != null && currentReplicas > minReplicas) { + LOG.debug("Idle check failed: {} replicas {} > min {}", workloadName, currentReplicas, minReplicas); + return false; + } + return true; + } catch (Exception e) { + LOG.debug("Idle check: could not read {}: {}", workloadName, e.getMessage()); + return true; + } + } + + private void suspendCluster(HiveCluster resource) { + String ns = resource.getMetadata().getNamespace(); + String name = resource.getMetadata().getName(); + HiveClusterSpec spec = resource.getSpec(); + + // Set MANAGED_REPLICAS to 0 so autoscaler doesn't fight the suspend. + // Actual scaling to 0 is handled by the DependentResources which check + // spec.suspend() in resolveReplicaCount(). + HiveClusterAutoscaler.setManagedReplicas(ns, name, ConfigUtils.COMPONENT_HIVESERVER2, 0); + if (spec.metastore().isEnabled() && spec.autoSuspend().includeMetastore()) { + HiveClusterAutoscaler.setManagedReplicas(ns, name, ConfigUtils.COMPONENT_METASTORE, 0); + } + for (var llap : spec.llapClusters()) { + if (llap.isEnabled()) { + HiveClusterAutoscaler.setManagedReplicas(ns, name, ConfigUtils.llapComponentKey(llap.name()), 0); + } + } + if (spec.tezAm().isEnabled()) { + for (var llap : spec.llapClusters()) { + if (llap.isEnabled()) { + HiveClusterAutoscaler.setManagedReplicas(ns, name, + ConfigUtils.tezAmComponentKey(llap.name()), 0); + } + } + } + + LOG.info("Cluster {}/{} suspended", ns, name); + } + + private void wakeCluster(HiveCluster resource) { + HiveClusterSpec spec = resource.getSpec(); + String ns = resource.getMetadata().getNamespace(); + String name = resource.getMetadata().getName(); + + // Set MANAGED_REPLICAS to wake values. The JOSDK workflow will recreate + // the dependent resources (Deployments/StatefulSets) on the next reconcile + // and use these values for spec.replicas. We don't call patchReplicas() + // because the workloads may have been garbage-collected while suspended. + int hs2Min = Math.max(1, spec.hiveServer2().autoscaling().minReplicas()); + HiveClusterAutoscaler.setManagedReplicas(ns, name, ConfigUtils.COMPONENT_HIVESERVER2, hs2Min); + + if (spec.metastore().isEnabled() && spec.autoSuspend().includeMetastore()) { + int hmsMin = Math.max(1, spec.metastore().autoscaling().minReplicas()); + HiveClusterAutoscaler.setManagedReplicas(ns, name, ConfigUtils.COMPONENT_METASTORE, hmsMin); + } + + for (var llap : spec.llapClusters()) { + if (llap.isEnabled()) { + int llapWake = llap.autoscaling().minReplicas(); + HiveClusterAutoscaler.setManagedReplicas(ns, name, ConfigUtils.llapComponentKey(llap.name()), llapWake); + } + } + + if (spec.tezAm().isEnabled()) { + for (var llap : spec.llapClusters()) { + if (llap.isEnabled()) { + int tezWake = llap.tezAm().autoscaling().minReplicas(); + HiveClusterAutoscaler.setManagedReplicas(ns, name, + ConfigUtils.tezAmComponentKey(llap.name()), tezWake); + } + } + } + + LOG.info("Cluster {}/{} woken up — restored to minReplicas", ns, name); + } + + private static boolean allAutoscalingEnabled(HiveClusterSpec spec) { + if (!spec.hiveServer2().autoscaling().isEnabled()) { + return false; + } + // Skip HMS check if includeMetastore=false (HMS doesn't participate in suspend) + if (spec.metastore().isEnabled() && spec.autoSuspend().includeMetastore() + && !spec.metastore().autoscaling().isEnabled()) { + return false; + } + for (var llap : spec.llapClusters()) { + if (llap.isEnabled() && !llap.autoscaling().isEnabled()) { + return false; + } + if (llap.isEnabled() && spec.tezAm().isEnabled() && !llap.tezAm().autoscaling().isEnabled()) { + return false; + } + } + return true; + } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java new file mode 100644 index 000000000000..c40b432d0dd9 --- /dev/null +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/reconciler/HiveWorkflowSpec.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.kubernetes.operator.reconciler; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.javaoperatorsdk.operator.api.config.dependent.DependentResourceSpec; +import io.javaoperatorsdk.operator.api.config.workflow.WorkflowSpec; +import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; +import org.apache.hive.kubernetes.operator.dependent.HiveConfigMapDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServer2DeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.HivePdbDependent; +import org.apache.hive.kubernetes.operator.dependent.HiveServiceDependent; +import org.apache.hive.kubernetes.operator.dependent.MetastoreDeploymentDependent; +import org.apache.hive.kubernetes.operator.dependent.SchemaInitJobDependent; +import org.apache.hive.kubernetes.operator.dependent.ScratchPvcDependent; +import org.apache.hive.kubernetes.operator.model.HiveCluster; + +/** + * Programmatic workflow specification for the Hive Kubernetes Operator. + * Replaces the annotation-based {@code @Workflow} on the reconciler with + * explicit {@link DependentResourceSpec} entries and inline lambda conditions. + * This eliminates 12 single-method condition wrapper classes. + */ +public final class HiveWorkflowSpec implements WorkflowSpec { + + // Dependent resource spec names (used as identifiers and dependency references) + private static final String HADOOP_CONFIGMAP = "hadoop-configmap"; + private static final String METASTORE_CONFIGMAP = "metastore-configmap"; + private static final String HIVESERVER2_CONFIGMAP = "hiveserver2-configmap"; + private static final String SCHEMA_INIT_JOB = "schema-init-job"; + private static final String METASTORE_DEPLOYMENT = "metastore-deployment"; + private static final String METASTORE_SERVICE = "metastore-service"; + private static final String HIVESERVER2_DEPLOYMENT = "hiveserver2-deployment"; + private static final String HIVESERVER2_SERVICE = "hiveserver2-service"; + private static final String SCRATCH_PVC = "scratch-pvc"; + private static final String HS2_PDB = "hs2-pdb"; + private static final String METASTORE_PDB = "metastore-pdb"; + + private static final Condition METASTORE_ENABLED = + (dr, primary, ctx) -> primary.getSpec().metastore().isEnabled(); + + private static final Condition METASTORE_AUTOSCALING = + (dr, primary, ctx) -> primary.getSpec().metastore().isEnabled() + && primary.getSpec().metastore().autoscaling().isEnabled(); + + private static final Condition HS2_AUTOSCALING = + (dr, primary, ctx) -> primary.getSpec().hiveServer2().autoscaling().isEnabled(); + + private static final Condition TEZAM_ENABLED = + (dr, primary, ctx) -> primary.getSpec().tezAm().isEnabled(); + + + // SPECS must be declared AFTER all conditions to avoid static init order issues. + private static final List SPECS = buildSpecs(); + + @SuppressWarnings({"rawtypes", "unchecked"}) + private static List buildSpecs() { + List specs = new ArrayList<>(); + + // --- ConfigMap dependents --- + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.Hadoop.class, HADOOP_CONFIGMAP, + Set.of(), null, null, null, null, null)); + + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.Metastore.class, METASTORE_CONFIGMAP, + Set.of(), null, null, null, METASTORE_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveConfigMapDependent.HiveServer2.class, HIVESERVER2_CONFIGMAP, + Set.of(), null, null, null, null, null)); + + // --- Job dependents --- + specs.add(new DependentResourceSpec( + SchemaInitJobDependent.class, SCHEMA_INIT_JOB, + Set.of(METASTORE_CONFIGMAP, HADOOP_CONFIGMAP), + schemaJobCompleted(), null, null, METASTORE_ENABLED, null)); + + // --- Deployment dependents --- + specs.add(new DependentResourceSpec( + MetastoreDeploymentDependent.class, METASTORE_DEPLOYMENT, + Set.of(SCHEMA_INIT_JOB), + metastoreReady(), null, null, METASTORE_ENABLED, null)); + + // --- Service dependents --- + specs.add(new DependentResourceSpec( + HiveServiceDependent.Metastore.class, METASTORE_SERVICE, + Set.of(METASTORE_CONFIGMAP), + null, null, null, METASTORE_ENABLED, null)); + + // --- Shared Scratch PVC (Required for HS2 to TezAM communication) --- + specs.add(new DependentResourceSpec( + ScratchPvcDependent.class, SCRATCH_PVC, + Set.of(), null, null, null, TEZAM_ENABLED, null)); + + specs.add(new DependentResourceSpec( + HiveServer2DeploymentDependent.class, HIVESERVER2_DEPLOYMENT, + Set.of(HIVESERVER2_CONFIGMAP, HADOOP_CONFIGMAP), + null, hs2Precondition(), null, null, null)); + + specs.add(new DependentResourceSpec( + HiveServiceDependent.HiveServer2.class, HIVESERVER2_SERVICE, + Set.of(HIVESERVER2_CONFIGMAP), + null, null, null, null, null)); + + // --- Autoscaling: PodDisruptionBudgets (conditional) --- + specs.add(new DependentResourceSpec( + HivePdbDependent.HiveServer2.class, HS2_PDB, + Set.of(HIVESERVER2_DEPLOYMENT), + null, HS2_AUTOSCALING, null, null, null)); + + specs.add(new DependentResourceSpec( + HivePdbDependent.Metastore.class, METASTORE_PDB, + Set.of(METASTORE_DEPLOYMENT), + null, METASTORE_AUTOSCALING, null, null, null)); + + return Collections.unmodifiableList(specs); + } + + /** + * Ready postcondition: schema initialization Job must complete successfully + * before the Metastore Deployment is created. + */ + private static Condition schemaJobCompleted() { + return (dependentResource, primary, context) -> { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + return dependentResource.getSecondaryResource(primary, context) + .map(job -> { + var j = (io.fabric8.kubernetes.api.model.batch.v1.Job) job; + return j.getStatus() != null + && j.getStatus().getSucceeded() != null + && j.getStatus().getSucceeded() >= 1; + }) + .orElse(false); + }; + } + + /** + * Ready postcondition: Metastore Deployment must have the desired number + * of ready replicas before downstream dependents proceed. + */ + private static Condition metastoreReady() { + return (dependentResource, primary, context) -> { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + int desiredReplicas; + if (primary.getSpec().metastore().autoscaling().isEnabled()) { + desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); + } else { + desiredReplicas = primary.getSpec().metastore().replicas(); + } + return dependentResource.getSecondaryResource(primary, context) + .map(resource -> { + var deployment = (Deployment) resource; + return deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas; + }) + .orElse(false); + }; + } + + /** + * Reconcile precondition for HiveServer2: if Metastore is managed, + * wait for it to be ready before reconciling HS2. + */ + private static Condition hs2Precondition() { + return (dependentResource, primary, context) -> { + if (!primary.getSpec().metastore().isEnabled()) { + return true; + } + int desiredReplicas; + if (primary.getSpec().metastore().autoscaling().isEnabled()) { + desiredReplicas = Math.max(1, primary.getSpec().metastore().autoscaling().minReplicas()); + } else { + desiredReplicas = primary.getSpec().metastore().replicas(); + } + return context.getSecondaryResources(Deployment.class).stream() + .filter(d -> d.getMetadata().getName().equals( + primary.getMetadata().getName() + "-metastore")) + .findFirst() + .map(deployment -> deployment.getStatus() != null + && deployment.getStatus().getReadyReplicas() != null + && deployment.getStatus().getReadyReplicas() >= desiredReplicas) + .orElse(false); + }; + } + + @Override + public List getDependentResourceSpecs() { + return SPECS; + } + + @Override + public boolean isExplicitInvocation() { + return false; + } + + @Override + public boolean handleExceptionsInReconciler() { + return true; + } +} diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java index 0f86201817e7..6dbc38f9b672 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/ConfigUtils.java @@ -25,6 +25,22 @@ public final class ConfigUtils { private ConfigUtils() { } + // Component names used across the operator for labels, resource naming, and autoscaling keys. + public static final String COMPONENT_HIVESERVER2 = "hiveserver2"; + public static final String COMPONENT_METASTORE = "metastore"; + public static final String COMPONENT_LLAP = "llap"; + public static final String COMPONENT_TEZAM = "tezam"; + + /** Returns the autoscaler component key for a specific LLAP cluster (e.g., "llap-llap0"). */ + public static String llapComponentKey(String llapName) { + return COMPONENT_LLAP + "-" + llapName; + } + + /** Returns the autoscaler component key for a per-LLAP TezAM (e.g., "tezam-llap0"). */ + public static String tezAmComponentKey(String llapName) { + return COMPONENT_TEZAM + "-" + llapName; + } + public static final String METASTORE_THRIFT_PORT_KEY = "metastore.thrift.port"; public static final String METASTORE_THRIFT_PORT_HIVE_KEY = "hive.metastore.port"; public static final int METASTORE_THRIFT_PORT_DEFAULT = 9083; @@ -58,6 +74,12 @@ private ConfigUtils() { public static final String HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE_KEY = "hive.server2.tez.external.sessions.namespace"; + /** + * ZK path prefix that Tez's STANDALONE_ZOOKEEPER mode prepends to tez.am.registry.namespace + * when creating the session availability node. Must match the Docker template convention. + */ + public static final String TEZ_EXTERNAL_SESSIONS_ZK_PREFIX = "/tez-external-sessions"; + public static final String HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_REGISTRY_CLASS_KEY = "hive.server2.tez.external.sessions.registry.class"; @@ -67,20 +89,80 @@ private ConfigUtils() { public static final String HIVE_LLAP_EXECUTION_MODE_KEY = "hive.llap.execution.mode"; + public static final String HIVE_LLAP_CLUSTER_ROUTING_RULES_KEY = "hive.llap.cluster.routing.rules"; + public static final String HIVE_LLAP_CLUSTER_PREFIX = "hive.llap.cluster."; + public static final String HIVE_LLAP_CLUSTER_SESSIONS_NS_SUFFIX = ".sessions.namespace"; + public static final String HIVE_LLAP_CLUSTER_REGISTRY_NS_SUFFIX = ".registry.namespace"; + public static final String HIVE_LLAP_CLUSTER_SERVICE_HOSTS_SUFFIX = ".service.hosts"; + public static final String HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY = "hive.llap.daemon.service.hosts"; public static final String HIVE_LLAP_DAEMON_MEMORY_MB_KEY = "hive.llap.daemon.memory.per.instance.mb"; public static final String HIVE_LLAP_DAEMON_NUM_EXECUTORS_KEY = "hive.llap.daemon.num.executors"; + public static final String HIVE_LLAP_DAEMON_RPC_PORT_KEY = "hive.llap.daemon.rpc.port"; + public static final int HIVE_LLAP_DAEMON_RPC_PORT_DEFAULT = 15001; + + public static final String HIVE_LLAP_MANAGEMENT_RPC_PORT_KEY = "hive.llap.management.rpc.port"; + public static final int HIVE_LLAP_MANAGEMENT_RPC_PORT_DEFAULT = 15004; + + public static final String HIVE_LLAP_DAEMON_SHUFFLE_PORT_KEY = "hive.llap.daemon.yarn.shuffle.port"; + public static final int HIVE_LLAP_DAEMON_SHUFFLE_PORT_DEFAULT = 15551; + + public static final String HIVE_LLAP_DAEMON_WEB_PORT_KEY = "hive.llap.daemon.web.port"; + public static final int HIVE_LLAP_DAEMON_WEB_PORT_DEFAULT = 15002; + + public static final String HIVE_LLAP_DAEMON_OUTPUT_SERVICE_PORT_KEY = "hive.llap.daemon.output.service.port"; + public static final int HIVE_LLAP_DAEMON_OUTPUT_SERVICE_PORT_DEFAULT = 15003; + + public static final String HIVE_LLAP_DAEMON_UMBILICAL_PORT_KEY = "hive.llap.daemon.umbilical.port"; + public static final String HIVE_LLAP_DAEMON_UMBILICAL_PORT_DEFAULT = "0"; + + public static final String METASTORE_SERVER_TRANSPORT_MODE_KEY = "metastore.server.thrift.transport.mode"; + public static final String METASTORE_SERVER_TRANSPORT_MODE_DEFAULT = "http"; + + public static final String METASTORE_SERVER_HTTP_PATH_KEY = "metastore.server.thrift.http.path"; + public static final String METASTORE_SERVER_HTTP_PATH_DEFAULT = "metastore"; + + public static final String METASTORE_CLIENT_TRANSPORT_MODE_KEY = "hive.metastore.client.thrift.transport.mode"; + public static final String METASTORE_CLIENT_TRANSPORT_MODE_DEFAULT = "http"; + + public static final String METASTORE_CLIENT_HTTP_PATH_KEY = "metastore.client.thrift.http.path"; + public static final String METASTORE_CLIENT_HTTP_PATH_DEFAULT = "metastore"; + + public static final String METASTORE_SERVER_MAX_THREADS_KEY = "metastore.server.max.threads"; + public static final String METASTORE_SERVER_MAX_THREADS_HIVE_KEY = "hive.metastore.server.max.threads"; + public static final int METASTORE_SERVER_MAX_THREADS_DEFAULT = 1000; + + public static final String METASTORE_REST_HTTP_PORT_KEY = "metastore.rest.http.port"; + public static final int METASTORE_REST_HTTP_PORT_DEFAULT = 9001; + public static final String HIVE_METASTORE_URIS_KEY = "hive.metastore.uris"; + public static final String HIVE_SERVER2_THRIFT_PORT_KEY = "hive.server2.thrift.port"; public static final int HIVE_SERVER2_THRIFT_PORT_DEFAULT = 10000; + public static final String HIVE_SERVER2_THRIFT_HTTP_PORT_KEY = "hive.server2.thrift.http.port"; + public static final int HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT = 10001; + + public static final String HIVE_SERVER2_THRIFT_HTTP_PATH_KEY = "hive.server2.thrift.http.path"; + public static final String HIVE_SERVER2_THRIFT_HTTP_PATH_DEFAULT = "cliservice"; + + public static final String HIVE_SERVER2_TRANSPORT_MODE_KEY = "hive.server2.transport.mode"; + public static final String HIVE_SERVER2_TRANSPORT_MODE_DEFAULT = "http"; + public static final String HIVE_SERVER2_WEBUI_PORT_KEY = "hive.server2.webui.port"; public static final int HIVE_SERVER2_WEBUI_PORT_DEFAULT = 10002; + /** Port for the Prometheus JMX Exporter agent (serves /metrics in text format). */ + public static final int PROMETHEUS_JMX_EXPORTER_PORT = 9404; + + /** Default URL for the Prometheus JMX Exporter javaagent JAR. */ + public static final String JMX_EXPORTER_JAR_URL = + "https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/1.0.1/jmx_prometheus_javaagent-1.0.1.jar"; + public static final String TEZ_AM_SESSION_MODE_KEY = "tez.am.mode.session"; public static final String TEZ_IGNORE_LIB_URIS_KEY = "tez.ignore.lib.uris"; @@ -115,4 +197,15 @@ public static int getInt(Map overrides, } return defaultVal; } + + public static boolean getBoolean(Map overrides, + String key, boolean defaultVal) { + if (overrides != null) { + String val = overrides.get(key); + if (val != null) { + return Boolean.parseBoolean(val); + } + } + return defaultVal; + } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java index 5db24e95d3f3..cfcfbacc8886 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/HiveConfigBuilder.java @@ -20,6 +20,7 @@ import java.util.LinkedHashMap; import java.util.Map; +import java.util.Optional; import org.apache.hive.kubernetes.operator.model.HiveCluster; import org.apache.hive.kubernetes.operator.model.HiveClusterSpec; @@ -57,9 +58,20 @@ public static Map getHiveServer2HiveSite( if (metastoreUri != null && !metastoreUri.isEmpty()) { props.put(ConfigUtils.METASTORE_URIS_KEY, metastoreUri); } + // Client-side HTTP transport mode to match metastore server config. + props.put(ConfigUtils.METASTORE_CLIENT_TRANSPORT_MODE_KEY, + ConfigUtils.METASTORE_CLIENT_TRANSPORT_MODE_DEFAULT); + props.put(ConfigUtils.METASTORE_CLIENT_HTTP_PATH_KEY, + ConfigUtils.METASTORE_CLIENT_HTTP_PATH_DEFAULT); props.put(ConfigUtils.HIVE_METASTORE_WAREHOUSE_KEY, spec.metastore().warehouseDir()); props.put(ConfigUtils.HIVE_SERVER2_ENABLE_DOAS_KEY, "false"); + props.put(ConfigUtils.HIVE_SERVER2_TRANSPORT_MODE_KEY, + ConfigUtils.HIVE_SERVER2_TRANSPORT_MODE_DEFAULT); + props.put(ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_KEY, + String.valueOf(ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PORT_DEFAULT)); + props.put(ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PATH_KEY, + ConfigUtils.HIVE_SERVER2_THRIFT_HTTP_PATH_DEFAULT); props.put(ConfigUtils.HIVE_TEZ_EXEC_INPLACE_PROGRESS_KEY, "false"); props.put(ConfigUtils.HIVE_TEZ_EXEC_SUMMARY_KEY, "true"); props.put(ConfigUtils.HIVE_JAR_DIRECTORY_KEY, "/tmp"); @@ -71,23 +83,32 @@ public static Map getHiveServer2HiveSite( if (tezAmEnabled) { props.put(ConfigUtils.HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS_KEY, "true"); + // Default external sessions namespace points to first LLAP cluster's TezAM. + // Client routes to other clusters by overriding both properties in JDBC URL: + // hive.server2.tez.external.sessions.namespace=/ + // tez.am.registry.namespace=/ + // + // Path relationship (matches Docker template convention): + // tez.am.registry.namespace = / + // Tez registers session node at: // + // hive.server2.tez.external.sessions.namespace = / + String defaultRegistryNs = defaultLlapCluster(spec) + .map(llap -> "/" + llap.name()).orElse("/default"); + String defaultNamespace = ConfigUtils.TEZ_EXTERNAL_SESSIONS_ZK_PREFIX + defaultRegistryNs; props.put(ConfigUtils.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE_KEY, - "/tez-external-sessions/tez_am/server"); + defaultNamespace); props.put(ConfigUtils.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_REGISTRY_CLASS_KEY, "org.apache.hadoop.hive.ql.exec.tez.ZookeeperExternalSessionsRegistryClient"); props.put(ConfigUtils.HIVE_ZOOKEEPER_QUORUM_KEY, zkQuorum); - // tez.am.framework.mode, tez.am.registry.namespace, tez.am.zookeeper.quorum - // are only in Tez 1.0.0+ props.put(ConfigUtils.TEZ_AM_FRAMEWORK_MODE_KEY, "STANDALONE_ZOOKEEPER"); - props.put(ConfigUtils.TEZ_AM_REGISTRY_NAMESPACE_KEY, "/tez_am/server"); + props.put(ConfigUtils.TEZ_AM_REGISTRY_NAMESPACE_KEY, defaultRegistryNs); props.put(ConfigUtils.TEZ_AM_ZOOKEEPER_QUORUM_KEY, zkQuorum); - LlapSpec llap = spec.llap(); - if (llap.isEnabled()) { + defaultLlapCluster(spec).ifPresent(llap -> { props.put(ConfigUtils.HIVE_EXECUTION_MODE_KEY, "llap"); props.put(ConfigUtils.HIVE_LLAP_EXECUTION_MODE_KEY, "all"); props.put(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY, llap.serviceHosts()); - } + }); } else { props.put(ConfigUtils.HIVE_SERVER2_TEZ_USE_EXTERNAL_SESSIONS_KEY, "false"); props.put(ConfigUtils.TEZ_LOCAL_MODE_KEY, "true"); @@ -95,14 +116,46 @@ public static Map getHiveServer2HiveSite( props.put("mapreduce.framework.name", "local"); } + // Server-side LLAP cluster routing: emit per-cluster definitions and routing rules. + if (spec.llapClusterRouting() != null && !spec.llapClusterRouting().isEmpty()) { + props.put(ConfigUtils.HIVE_LLAP_CLUSTER_ROUTING_RULES_KEY, spec.llapClusterRouting()); + for (LlapSpec llap : spec.llapClusters()) { + if (!llap.isEnabled()) { + continue; + } + String sessionsNs = ConfigUtils.TEZ_EXTERNAL_SESSIONS_ZK_PREFIX + "/" + llap.name(); + String registryNs = "/" + llap.name(); + props.put(ConfigUtils.HIVE_LLAP_CLUSTER_PREFIX + llap.name() + + ConfigUtils.HIVE_LLAP_CLUSTER_SESSIONS_NS_SUFFIX, sessionsNs); + props.put(ConfigUtils.HIVE_LLAP_CLUSTER_PREFIX + llap.name() + + ConfigUtils.HIVE_LLAP_CLUSTER_REGISTRY_NS_SUFFIX, registryNs); + props.put(ConfigUtils.HIVE_LLAP_CLUSTER_PREFIX + llap.name() + + ConfigUtils.HIVE_LLAP_CLUSTER_SERVICE_HOSTS_SUFFIX, llap.serviceHosts()); + } + } + + // Enable JMX metrics when autoscaling is active for HS2, OR if LLAP/TezAM rely on them. + boolean llapOrTezAmAutoscales = spec.llapClusters().stream().anyMatch( + l -> l.isEnabled() && (l.autoscaling().isEnabled() + || (spec.tezAm().isEnabled() && l.tezAm().autoscaling().isEnabled()))); + if (spec.hiveServer2().autoscaling().isEnabled() || llapOrTezAmAutoscales) { + props.put("hive.server2.metrics.enabled", "true"); + props.put("hive.server2.metrics.reporter", "JMX"); + } + if (spec.hiveServer2().configOverrides() != null) { props.putAll(spec.hiveServer2().configOverrides()); } return props; } - /** Builds tez-site.xml properties for HiveServer2 and TezAM. */ + /** Builds tez-site.xml properties for HiveServer2 (uses first LLAP cluster as default). */ public static Map getTezSite(HiveClusterSpec spec) { + return getTezSite(spec, defaultLlapCluster(spec).orElse(null)); + } + + /** Builds tez-site.xml properties for a specific LLAP cluster's TezAM. */ + public static Map getTezSite(HiveClusterSpec spec, LlapSpec llap) { boolean tezAmEnabled = spec.tezAm().isEnabled(); String zkQuorum = spec.zookeeper().quorum(); @@ -117,17 +170,26 @@ public static Map getTezSite(HiveClusterSpec spec) { if (tezAmEnabled) { tezProps.put(ConfigUtils.TEZ_LOCAL_MODE_KEY, "false"); tezProps.put(ConfigUtils.TEZ_AM_FRAMEWORK_MODE_KEY, "STANDALONE_ZOOKEEPER"); - tezProps.put(ConfigUtils.TEZ_AM_REGISTRY_NAMESPACE_KEY, "/tez_am/server"); + // Per-LLAP-cluster TezAM: each registers under its own ZK namespace. + // tez.am.registry.namespace is the path WITHIN Tez's "tez-external-sessions" + // Curator namespace (chroot). The absolute ZK path becomes: + // /tez-external-sessions// + String registryNamespace = llap != null + ? "/" + llap.name() : "/default"; + tezProps.put(ConfigUtils.TEZ_AM_REGISTRY_NAMESPACE_KEY, registryNamespace); } else { tezProps.put(ConfigUtils.TEZ_LOCAL_MODE_KEY, "true"); } - LlapSpec llap = spec.llap(); - if (llap.isEnabled()) { + if (llap != null) { tezProps.put(ConfigUtils.HIVE_LLAP_DAEMON_SERVICE_HOSTS_KEY, llap.serviceHosts()); } + // Required by LlapTaskCommunicator — Tez's Configuration doesn't get HiveConf defaults + tezProps.put(ConfigUtils.HIVE_LLAP_DAEMON_UMBILICAL_PORT_KEY, + ConfigUtils.HIVE_LLAP_DAEMON_UMBILICAL_PORT_DEFAULT); + if (spec.tezAm().configOverrides() != null) { tezProps.putAll(spec.tezAm().configOverrides()); } @@ -149,6 +211,13 @@ public static Map getMetastoreSite(HiveClusterSpec spec) { MetastoreSpec metastore = spec.metastore(); Map props = new LinkedHashMap<>(); + // HTTP transport mode: stateless connections allow safe scale-down + // without breaking active client connections. + props.put(ConfigUtils.METASTORE_SERVER_TRANSPORT_MODE_KEY, + ConfigUtils.METASTORE_SERVER_TRANSPORT_MODE_DEFAULT); + props.put(ConfigUtils.METASTORE_SERVER_HTTP_PATH_KEY, + ConfigUtils.METASTORE_SERVER_HTTP_PATH_DEFAULT); + props.put(ConfigUtils.METASTORE_WAREHOUSE_KEY, metastore.warehouseDir()); @@ -165,15 +234,22 @@ public static Map getMetastoreSite(HiveClusterSpec spec) { } } + // Enable JMX metrics when autoscaling is active. + // The Prometheus JMX Exporter agent reads JMX MBeans and exposes them + // in Prometheus text format at /metrics on the metrics port. + if (metastore.autoscaling().isEnabled()) { + props.put("metastore.metrics.enabled", "true"); + props.put("metastore.metrics.reporter", "JMX"); + } + if (metastore.configOverrides() != null) { props.putAll(metastore.configOverrides()); } return props; } - /** Builds llap-daemon-site.xml properties. */ - public static Map getLlapDaemonSite(HiveClusterSpec spec) { - LlapSpec llap = spec.llap(); + /** Builds llap-daemon-site.xml properties for a specific LLAP cluster. */ + public static Map getLlapDaemonSite(HiveClusterSpec spec, LlapSpec llap) { Map props = new LinkedHashMap<>(); props.put(ConfigUtils.HIVE_LLAP_DAEMON_MEMORY_MB_KEY, @@ -190,4 +266,11 @@ public static Map getLlapDaemonSite(HiveClusterSpec spec) { } return props; } + + /** Returns the first enabled LLAP cluster (used as the default for HS2/TezAM config). */ + private static Optional defaultLlapCluster(HiveClusterSpec spec) { + return spec.llapClusters().stream() + .filter(LlapSpec::isEnabled) + .findFirst(); + } } diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java index dcf0cc43b3c6..965ccbbf115f 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java +++ b/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/util/Labels.java @@ -31,6 +31,8 @@ public final class Labels { public static final String APP_COMPONENT = "app.kubernetes.io/component"; public static final String MANAGED_BY = "app.kubernetes.io/managed-by"; public static final String MANAGED_BY_VALUE = "hive-kubernetes-operator"; + public static final String LLAP_CLUSTER = "hive.apache.org/llap-cluster"; + private static final String APP_NAME_VALUE = "apache-hive"; private Labels() { } @@ -45,7 +47,7 @@ private Labels() { public static Map forComponent(HiveCluster hc, String component) { Map labels = new LinkedHashMap<>(); - labels.put(APP_NAME, "apache-hive"); + labels.put(APP_NAME, APP_NAME_VALUE); labels.put(APP_INSTANCE, hc.getMetadata().getName()); labels.put(APP_COMPONENT, component); labels.put(MANAGED_BY, MANAGED_BY_VALUE); @@ -66,4 +68,53 @@ public static Map selectorForComponent(HiveCluster hc, selector.put(APP_COMPONENT, component); return selector; } + + /** + * Returns the full label set for a specific LLAP cluster instance. + * Includes the per-cluster discriminator label. + */ + public static Map forLlapCluster(HiveCluster hc, String llapName) { + Map labels = new LinkedHashMap<>(); + labels.put(APP_NAME, APP_NAME_VALUE); + labels.put(APP_INSTANCE, hc.getMetadata().getName()); + labels.put(APP_COMPONENT, ConfigUtils.COMPONENT_LLAP); + labels.put(MANAGED_BY, MANAGED_BY_VALUE); + labels.put(LLAP_CLUSTER, llapName); + return labels; + } + + /** + * Returns the selector labels for a specific LLAP cluster instance. + */ + public static Map selectorForLlapCluster(HiveCluster hc, String llapName) { + Map selector = new LinkedHashMap<>(); + selector.put(APP_INSTANCE, hc.getMetadata().getName()); + selector.put(APP_COMPONENT, ConfigUtils.COMPONENT_LLAP); + selector.put(LLAP_CLUSTER, llapName); + return selector; + } + + /** + * Returns the full label set for a per-LLAP-cluster TezAM instance. + */ + public static Map forTezAmCluster(HiveCluster hc, String llapName) { + Map labels = new LinkedHashMap<>(); + labels.put(APP_NAME, APP_NAME_VALUE); + labels.put(APP_INSTANCE, hc.getMetadata().getName()); + labels.put(APP_COMPONENT, ConfigUtils.COMPONENT_TEZAM); + labels.put(MANAGED_BY, MANAGED_BY_VALUE); + labels.put(LLAP_CLUSTER, llapName); + return labels; + } + + /** + * Returns the selector labels for a per-LLAP-cluster TezAM instance. + */ + public static Map selectorForTezAmCluster(HiveCluster hc, String llapName) { + Map selector = new LinkedHashMap<>(); + selector.put(APP_INSTANCE, hc.getMetadata().getName()); + selector.put(APP_COMPONENT, ConfigUtils.COMPONENT_TEZAM); + selector.put(LLAP_CLUSTER, llapName); + return selector; + } } diff --git a/parser/src/test/org/apache/hadoop/hive/ql/parse/TestParseCreateView.java b/parser/src/test/org/apache/hadoop/hive/ql/parse/TestParseCreateView.java new file mode 100644 index 000000000000..22c31f74d0b3 --- /dev/null +++ b/parser/src/test/org/apache/hadoop/hive/ql/parse/TestParseCreateView.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.junit.Test; + +import static org.junit.Assert.assertTrue; + +public class TestParseCreateView { + + private final ParseDriver parseDriver = new ParseDriver(); + + @Test + public void testParseCreateViewWithTableProperties() throws Exception { + ASTNode tree = + parseDriver + .parse( + "create view v1 tblproperties ('view-format'='iceberg') as select * from t", null) + .getTree(); + assertTrue(tree.dump(), tree.toStringTree().contains("tok_createview")); + assertTrue(tree.dump(), tree.toStringTree().contains("tok_tableproperties")); + assertTrue(tree.dump(), tree.toStringTree().contains("view-format")); + assertTrue(tree.dump(), tree.toStringTree().contains("iceberg")); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/llap/LlapHiveUtils.java b/ql/src/java/org/apache/hadoop/hive/llap/LlapHiveUtils.java index ba62b8d89c22..76034ca69a57 100644 --- a/ql/src/java/org/apache/hadoop/hive/llap/LlapHiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/llap/LlapHiveUtils.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.io.CacheTag; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.io.HdfsUtils; @@ -74,18 +75,21 @@ public static PartitionDesc partitionDescForPath(Path path, Map>>> entities; + public record PartitionSpec(Map spec) {} + public record CatalogDb(String catalog, String database){} + private final Map>> entities; - private Request(Map>>> entities) { + private Request(Map>> entities) { this.entities = entities; } - public Map>>> getEntities() { + public Map>> getEntities() { return entities; } @@ -172,15 +172,8 @@ public boolean isEmpty() { return entities.isEmpty(); } - /** - * Request often times only contains tables/partitions of 1 DB only. - * @return the single DB name, null if the count of DBs present is not exactly 1. - */ - public String getSingleDbName() { - if (entities.size() == 1) { - return entities.keySet().stream().findFirst().get(); - } - return null; + public boolean hasDatabaseName(String catalogName, String dbName) { + return entities.containsKey(new CatalogDb(catalogName, dbName)); } /** @@ -188,41 +181,39 @@ public String getSingleDbName() { * @return list of request instances ready to be sent over protobuf. */ public List toProtoRequests() { - - List protoRequests = new LinkedList<>(); - - for (Map.Entry>>> dbEntry : entities.entrySet()) { - String dbName = dbEntry.getKey(); - Map>> tables = dbEntry.getValue(); - - LlapDaemonProtocolProtos.EvictEntityRequestProto.Builder requestBuilder = - LlapDaemonProtocolProtos.EvictEntityRequestProto.newBuilder(); - LlapDaemonProtocolProtos.TableProto.Builder tableBuilder = null; - - requestBuilder.setDbName(dbName.toLowerCase()); - for (Map.Entry>> tableEntry : tables.entrySet()) { - String tableName = tableEntry.getKey(); - tableBuilder = LlapDaemonProtocolProtos.TableProto.newBuilder(); - tableBuilder.setTableName(tableName.toLowerCase()); - - Set> partitions = tableEntry.getValue(); - Set partitionKeys = null; - - for (Map partitionSpec : partitions) { - if (partitionKeys == null) { + return entities.entrySet().stream() + .map(entry -> { + CatalogDb catalogDb = entry.getKey(); + Map> tables = entry.getValue(); + LlapDaemonProtocolProtos.EvictEntityRequestProto.Builder requestBuilder = + LlapDaemonProtocolProtos.EvictEntityRequestProto.newBuilder(); + + requestBuilder.setCatalogName(catalogDb.catalog().toLowerCase()); + requestBuilder.setDbName(catalogDb.database().toLowerCase()); + + tables.forEach((tableName, partitions) -> { + LlapDaemonProtocolProtos.TableProto.Builder tableBuilder = + LlapDaemonProtocolProtos.TableProto.newBuilder(); + + tableBuilder.setTableName(tableName.toLowerCase()); + + Set partitionKeys = null; + + for (PartitionSpec partitionSpec : partitions) { + if (partitionKeys == null) { + partitionKeys = new LinkedHashSet<>(partitionSpec.spec().keySet()); + tableBuilder.addAllPartKey(partitionKeys); + } + for (String partKey : tableBuilder.getPartKeyList()) { + tableBuilder.addPartVal(partitionSpec.spec().get(partKey)); + } + } // For a given table the set of partition columns (keys) should not change. - partitionKeys = new LinkedHashSet<>(partitionSpec.keySet()); - tableBuilder.addAllPartKey(partitionKeys); - } - for (String partKey : tableBuilder.getPartKeyList()) { - tableBuilder.addPartVal(partitionSpec.get(partKey)); - } - } - requestBuilder.addTable(tableBuilder.build()); - } - protoRequests.add(requestBuilder.build()); - } - return protoRequests; + requestBuilder.addTable(tableBuilder.build()); + }); + return requestBuilder.build(); + }) + .toList(); } /** @@ -233,19 +224,19 @@ public List toProtoRequests() * @return true if cacheTag matches and the related buffer is eligible for proactive eviction, false otherwise. */ public boolean isTagMatch(CacheTag cacheTag) { - String db = getSingleDbName(); - if (db == null) { - // Number of DBs in the request was not exactly 1. - throw new UnsupportedOperationException("Predicate only implemented for 1 DB case."); - } - TableName tagTableName = TableName.fromString(cacheTag.getTableName(), null, null); - - // Check against DB. - if (!db.equals(tagTableName.getDb())) { + // Parse the tag once and derive catalog/db from the parsed result, so that 2-part + // (db.table), 3-part (catalog.db.table), 4-part (catalog.db.table.metaTable) and + // snapshot-ref names are all interpreted consistently. + TableName tagTableName = parseCacheTagTableName(cacheTag.getTableName()); + String catalog = tagTableName.getCat(); + String db = tagTableName.getDb(); + + // Check that the tag's catalog and database is present in the eviction request. + if (!entities.containsKey(new CatalogDb(catalog, db))) { return false; } - Map>> tables = entities.get(db); + Map> tables = entities.getOrDefault(new CatalogDb(catalog, db), Map.of()); // If true, must be a drop DB event and this cacheTag matches. if (tables.isEmpty()) { @@ -257,31 +248,60 @@ public boolean isTagMatch(CacheTag cacheTag) { tagPartDescMap = ((CacheTag.PartitionCacheTag) cacheTag).getPartitionDescMap(); } + String tagDbTable = tagTableName.getNotEmptyDbTable(); // Check against table name. - for (String tableAndDbName : tables.keySet()) { - if (tableAndDbName.equals(tagTableName.getNotEmptyDbTable())) { - - Set> partDescs = tables.get(tableAndDbName); - - // If true, must be a drop table event, and this cacheTag matches. - if (partDescs == null) { - return true; + for (Map.Entry> tableEntry : tables.entrySet()) { + String tableAndDbName = tableEntry.getKey(); + Set partDescs = tableEntry.getValue(); + if (!tableAndDbName.equals(tagDbTable)) { + // Drop-table requests use db.table; Iceberg metadata tables are tagged db.table.metaTable. + if (partDescs != null || !tagDbTable.startsWith(tableAndDbName + ".")) { + continue; } + return true; + } - // Check against partition keys and values and alas for drop partition event. - if (!(cacheTag instanceof CacheTag.PartitionCacheTag)) { - throw new IllegalArgumentException("CacheTag has no partition information, while trying" + - " to evict due to (and based on) a drop partition DDL statement.."); - } + // If true, must be a drop table event, and this cacheTag matches. + if (partDescs == null) { + return true; + } - if (partDescs.contains(tagPartDescMap)) { - return true; - } + // Check against partition keys and values and alas for drop partition event. + if (!(cacheTag instanceof CacheTag.PartitionCacheTag)) { + throw new IllegalArgumentException("CacheTag has no partition information, while trying" + + " to evict due to (and based on) a drop partition DDL statement.."); + } + + if (partDescs.contains(new PartitionSpec(tagPartDescMap))) { + return true; } } return false; } + /** + * Parses a cache-tag table name into a {@link TableName}. Supports legacy {@code db.table}, + * catalog-qualified {@code catalog.db.table}, and Iceberg metadata tables + * {@code catalog.db.table.metaTable}. + */ + private static TableName parseCacheTagTableName(String fullTableName) { + String[] names = fullTableName.split("\\."); + switch (names.length) { + case 2: + return new TableName(Warehouse.DEFAULT_CATALOG_NAME, names[0], names[1], null); + case 3: + if (TableName.SNAPSHOT_REF.matcher(names[2]).matches()) { + return new TableName(Warehouse.DEFAULT_CATALOG_NAME, names[0], names[1], names[2]); + } + return new TableName(names[0], names[1], names[2], null); + case 4: + return new TableName(names[0], names[1], names[2], names[3]); + default: + throw new UnsupportedOperationException( + "Cache tag table name must have 2-4 dot-separated components: " + fullTableName); + } + } + @Override public String toString() { return "Request { entities = " + entities + " }"; @@ -292,7 +312,7 @@ public String toString() { */ public static final class Builder { - private final Map>>> entities; + private final Map>> entities; private Builder() { this.entities = new HashMap<>(); @@ -302,45 +322,64 @@ public static Builder create() { return new Builder(); } - public Builder addPartitionOfATable(String db, String tableName, LinkedHashMap partSpec) { - ensureDb(db); - ensureTable(db, tableName); - entities.get(db).get(tableName).add(partSpec); + /** + * Add a partition of a table scoped to the given catalog. + */ + public Builder addPartitionOfATable(String catalog, String db, String tableName, + Map partSpec) { + ensureTable(catalog, db, tableName); + entities.get(new CatalogDb(catalog, db)).get(tableName).add(new PartitionSpec(partSpec)); return this; } + /** + * Add a partition of a table scoped to the default catalog. + */ + public Builder addPartitionOfATable(String db, String tableName, Map partSpec) { + return addPartitionOfATable(Warehouse.DEFAULT_CATALOG_NAME, db, tableName, partSpec); + } + + /** + * Add a database scoped to the given catalog. + */ + public Builder addDb(String catalog, String db) { + ensureDb(catalog, db); + return this; + } + + /** + * Add a database scoped to the default catalog. + */ public Builder addDb(String db) { - ensureDb(db); + return addDb(Warehouse.DEFAULT_CATALOG_NAME, db); + } + + /** + * Add a table scoped to the given catalog. + */ + public Builder addTable(String catalog, String db, String table) { + ensureTable(catalog, db, table); return this; } + /** + * Add a table scoped to the default catalog. + */ public Builder addTable(String db, String table) { - ensureDb(db); - ensureTable(db, table); - return this; + return addTable(Warehouse.DEFAULT_CATALOG_NAME, db, table); } public Request build() { return new Request(entities); } - private void ensureDb(String dbName) { - Map>> tables = entities.get(dbName); - if (tables == null) { - tables = new HashMap<>(); - entities.put(dbName, tables); - } + private void ensureDb(String catalogName, String dbName) { + entities.computeIfAbsent(new CatalogDb(catalogName, dbName), k -> new HashMap<>()); } - private void ensureTable(String dbName, String tableName) { - ensureDb(dbName); - Map>> tables = entities.get(dbName); - - Set> partitions = tables.get(tableName); - if (partitions == null) { - partitions = new HashSet<>(); - tables.put(tableName, partitions); - } + private void ensureTable(String catalogName, String dbName, String tableName) { + ensureDb(catalogName, dbName); + entities.get(new CatalogDb(catalogName, dbName)).computeIfAbsent(tableName, k -> new HashSet<>()); } /** @@ -350,9 +389,10 @@ private void ensureTable(String dbName, String tableName) { */ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto protoRequest) { entities.clear(); + String catalogName = protoRequest.getCatalogName().toLowerCase(); String dbName = protoRequest.getDbName().toLowerCase(); - Map>> entitiesInDb = new HashMap<>(); + Map> entitiesInDb = new HashMap<>(); List tables = protoRequest.getTableList(); if (tables != null && !tables.isEmpty()) { @@ -364,8 +404,8 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto entitiesInDb.put(dbAndTableName, null); continue; } - Set> partitions = new HashSet<>(); - LinkedHashMap partDesc = new LinkedHashMap<>(); + Set partitions = new HashSet<>(); + Map partDesc = new HashMap<>(); for (int valIx = 0; valIx < table.getPartValCount(); ++valIx) { int keyIx = valIx % table.getPartKeyCount(); @@ -373,15 +413,15 @@ public Builder fromProtoRequest(LlapDaemonProtocolProtos.EvictEntityRequestProto partDesc.put(table.getPartKey(keyIx).toLowerCase(), table.getPartVal(valIx)); if (keyIx == table.getPartKeyCount() - 1) { - partitions.add(partDesc); - partDesc = new LinkedHashMap<>(); + partitions.add(new PartitionSpec(partDesc)); + partDesc = new HashMap<>(); } } entitiesInDb.put(dbAndTableName, partitions); } } - entities.put(dbName, entitiesInDb); + entities.put(new CatalogDb(catalogName, dbName), entitiesInDb); return this; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseAnalyzer.java index 11f8917334b1..256b3456ff79 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseAnalyzer.java @@ -67,6 +67,9 @@ public void analyzeInternal(ASTNode root) throws SemanticException { if (database == null) { return; } + if (catalogName == null) { + catalogName = database.getCatalogName(); + } // if cascade=true, then we need to authorize the drop table action as well, and add the tables to the outputs boolean isDbLevelLock = true; if (cascade) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java index b544b7b4a24b..fe4d153f1d28 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/database/drop/DropDatabaseOperation.java @@ -53,7 +53,7 @@ public int execute() throws HiveException { if (LlapHiveUtils.isLlapMode(context.getConf())) { ProactiveEviction.Request.Builder llapEvictRequestBuilder = ProactiveEviction.Request.Builder.create(); - llapEvictRequestBuilder.addDb(dbName); // TODO catalog. add catalog for the cache. Depend on HIVE-29281 + llapEvictRequestBuilder.addDb(catName, dbName); ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build()); } // Unregister the functions as well diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/AbstractFunctionAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/AbstractFunctionAnalyzer.java index 4d8cae0a7caa..ee90b1af1050 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/AbstractFunctionAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/AbstractFunctionAnalyzer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.hooks.Entity.Type; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -44,6 +45,10 @@ public AbstractFunctionAnalyzer(QueryState queryState) throws SemanticException super(queryState); } + protected AbstractFunctionAnalyzer(QueryState queryState, Hive db) throws SemanticException { + super(queryState, db); + } + /** * Add write entities to the semantic analyzer to restrict function creation to privileged users. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/drop/DropFunctionAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/drop/DropFunctionAnalyzer.java index 23e76d897713..a5dd6188e630 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/drop/DropFunctionAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/function/drop/DropFunctionAnalyzer.java @@ -24,14 +24,19 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.FunctionUtils; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory.DDLType; import org.apache.hadoop.hive.ql.ddl.function.AbstractFunctionAnalyzer; import org.apache.hadoop.hive.ql.ddl.DDLWork; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.SemanticException; +import java.util.List; + /** * Analyzer for function dropping commands. */ @@ -41,6 +46,10 @@ public DropFunctionAnalyzer(QueryState queryState) throws SemanticException { super(queryState); } + public DropFunctionAnalyzer(QueryState queryState, Hive db) throws SemanticException { + super(queryState, db); + } + @Override public void analyzeInternal(ASTNode root) throws SemanticException { String functionName = root.getChild(0).getText(); @@ -50,10 +59,16 @@ public void analyzeInternal(ASTNode root) throws SemanticException { FunctionInfo info = FunctionRegistry.getFunctionInfo(functionName); if (info == null) { - if (throwException) { + // getFunctionInfo returns null when the function's JAR resource cannot be loaded (e.g. the + // HDFS file was deleted). For permanent functions fall back to a direct metastore lookup so + // that an orphaned definition can still be removed without the JAR being present. + if (!isTemporary && functionExistsInMetastore(functionName)) { + LOG.warn("Function {} has unavailable resources; proceeding with drop using metastore metadata only.", + functionName); + } else if (throwException) { throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName)); } else { - return; // Fail silently + return; } } else if (info.isBuiltIn()) { throw new SemanticException(ErrorMsg.DROP_NATIVE_FUNCTION.getMsg(functionName)); @@ -62,6 +77,17 @@ public void analyzeInternal(ASTNode root) throws SemanticException { DropFunctionDesc desc = new DropFunctionDesc(functionName, isTemporary, null); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc))); - addEntities(functionName, info.getClassName(), isTemporary, null); + String className = info != null ? info.getClassName() : null; + addEntities(functionName, className, isTemporary, null); + } + + private boolean functionExistsInMetastore(String functionName) { + try { + String[] parts = FunctionUtils.getQualifiedFunctionNameParts(functionName.toLowerCase()); + List functions = db.getFunctions(parts[0], parts[1]); + return functions != null && functions.contains(parts[1]); + } catch (HiveException e) { + return false; + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java index 289479b7ee79..cd40dc919668 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/show/ShowColumnsOperation.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.UDFLike; /** * Operation process of showing the columns. @@ -77,10 +78,9 @@ private Matcher getMatcher() { if (columnPattern == null) { columnPattern = "*"; } - columnPattern = columnPattern.toLowerCase(); - columnPattern = columnPattern.replaceAll("\\*", ".*"); - Pattern pattern = Pattern.compile(columnPattern); + String regex = UDFLike.likePatternToRegExp(columnPattern, false, true); + Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); return pattern.matcher(""); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java index b253ec5df5ff..7e6c93ebdcad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/drop/DropTableOperation.java @@ -115,7 +115,7 @@ public int execute() throws HiveException { if (LlapHiveUtils.isLlapMode(context.getConf())) { TableName tableName = HiveTableName.of(table); ProactiveEviction.Request.Builder llapEvictRequestBuilder = ProactiveEviction.Request.Builder.create(); - llapEvictRequestBuilder.addTable(tableName.getDb(), tableName.getTable()); + llapEvictRequestBuilder.addTable(table.getCatName(), tableName.getDb(), tableName.getTable()); ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java index a0eda1ab4eff..89ad090efb57 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/drop/AlterTableDropPartitionOperation.java @@ -147,7 +147,8 @@ private void dropPartitions(boolean isRepl) throws HiveException { DDLUtils.addIfAbsentByName(new WriteEntity(partition, WriteEntity.WriteType.DDL_NO_LOCK), context); if (llapEvictRequestBuilder != null) { - llapEvictRequestBuilder.addPartitionOfATable(tableName.getDb(), tableName.getTable(), partition.getSpec()); + llapEvictRequestBuilder.addPartitionOfATable( + tableName.getCat(), tableName.getDb(), tableName.getTable(), partition.getSpec()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewAnalyzer.java index 60b123dbb7bd..38d4081f5208 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewAnalyzer.java @@ -24,7 +24,9 @@ import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -34,6 +36,7 @@ import org.apache.hadoop.hive.ql.ddl.DDLUtils; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -41,12 +44,16 @@ import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.StorageFormat; /** * Analyzer for create view commands. */ @DDLType(types = HiveParser.TOK_CREATEVIEW) public class CreateViewAnalyzer extends AbstractCreateViewAnalyzer { + + private static final String VIEW_FORMAT_TABLE_PROPERTY = "view-format"; + public CreateViewAnalyzer(QueryState queryState) throws SemanticException { super(queryState); } @@ -75,6 +82,14 @@ public void analyzeInternal(ASTNode root) throws SemanticException { List partitionColumnNames = children.containsKey(HiveParser.TOK_VIEWPARTCOLS) ? getColumnNames((ASTNode) children.remove(HiveParser.TOK_VIEWPARTCOLS).getChild(0)) : null; + String storageHandlerClassFromTableProps = getViewStorageHandlerClassFromTableProps(properties); + String storageHandlerClass = resolveViewStorageHandlerClass(storageHandlerClassFromTableProps); + + if (storageHandlerClassFromTableProps != null && storageHandlerClass == null) { + throw new SemanticException( + ErrorMsg.VIEW_STORAGE_HANDLER_UNSUPPORTED.format(storageHandlerClassFromTableProps)); + } + assert children.isEmpty(); if (ifNotExists && orReplace) { @@ -94,7 +109,7 @@ public void analyzeInternal(ASTNode root) throws SemanticException { List partitionColumns = getPartitionColumns(partitionColumnNames); setColumnAccessInfo(analyzer.getColumnAccessInfo()); CreateViewDesc desc = new CreateViewDesc(fqViewName, schema, comment, properties, partitionColumnNames, - ifNotExists, orReplace, originalText, expandedText, partitionColumns); + ifNotExists, orReplace, originalText, expandedText, partitionColumns, storageHandlerClass); validateCreateView(desc, analyzer); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc))); @@ -191,6 +206,52 @@ private List getPartitionColumns(List partitionColumnNames) return partitionColumnsCopy; } + /** + * Returns the FQCN of the storage handler that should own external logical view metadata, or {@code null} for a + * classic HMS virtual view. Uses {@code storageHandlerClassFromTableProps} when non-null (from the + * {@code view-format} table property); otherwise the Hive config {@code hive.default.storage.handler.class}. + */ + private String resolveViewStorageHandlerClass(String storageHandlerClassFromTableProps) + throws SemanticException { + + String storageHandlerClassFromConfig = + StringUtils.trimToNull(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER)); + + String storageHandlerClass = + storageHandlerClassFromTableProps != null ? storageHandlerClassFromTableProps : storageHandlerClassFromConfig; + + if (StringUtils.isBlank(storageHandlerClass)) { + return null; + } + + try { + HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(conf, storageHandlerClass); + + if (storageHandler != null && storageHandler.supportsExternalViewCatalog()) { + return storageHandlerClass; + } + } catch (HiveException e) { + throw new SemanticException(e); + } + + return null; + } + + private static String getViewStorageHandlerClassFromTableProps(Map properties) + throws SemanticException { + if (properties == null) { + return null; + } + for (Map.Entry e : properties.entrySet()) { + if (e.getKey() != null + && VIEW_FORMAT_TABLE_PROPERTY.equalsIgnoreCase(e.getKey()) + && StringUtils.isNotBlank(e.getValue())) { + return StorageFormat.resolveStorageHandlerClassName(e.getValue().trim()); + } + } + return null; + } + private void validateCreateView(CreateViewDesc desc, SemanticAnalyzer analyzer) throws SemanticException { try { validateTablesUsed(analyzer); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewDesc.java index e71cbce773f1..bcbe536bf559 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewDesc.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.Map; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; @@ -40,13 +41,14 @@ public class CreateViewDesc extends AbstractCreateViewDesc { private final boolean ifNotExists; private final boolean replace; private final List partitionColumns; + private final String storageHandlerClass; private ReplicationSpec replicationSpec = null; private String ownerName = null; public CreateViewDesc(String viewName, List schema, String comment, Map properties, List partitionColumnNames, boolean ifNotExists, boolean replace, String originalText, - String expandedText, List partitionColumns) { + String expandedText, List partitionColumns, String storageHandlerClass) { super(viewName, schema, originalText, expandedText); this.comment = comment; this.properties = properties; @@ -54,6 +56,7 @@ public CreateViewDesc(String viewName, List schema, String comment, this.ifNotExists = ifNotExists; this.replace = replace; this.partitionColumns = partitionColumns; + this.storageHandlerClass = storageHandlerClass; } @Explain(displayName = "partition columns") @@ -89,6 +92,19 @@ public boolean isReplace() { return replace; } + /** + * @return FQCN of the {@link org.apache.hadoop.hive.ql.metadata.HiveStorageHandler} that stores view metadata in an + * external catalog, or {@code null} for a classic HMS-only virtual view. + */ + @Explain(displayName = "external logical view storage handler", displayOnlyOnTrue = true) + public String getStorageHandlerClass() { + return storageHandlerClass; + } + + public boolean usesStorageHandler() { + return !StringUtils.isBlank(storageHandlerClass); + } + /** * @param replicationSpec Sets the replication spec governing this create. * This parameter will have meaningful values only for creates happening as a result of a replication. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewOperation.java index b6a41a8d4fb8..fe58ff0aa644 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateViewOperation.java @@ -82,6 +82,10 @@ public int execute() throws HiveException { if (desc.getProperties() != null) { oldview.getTTable().getParameters().putAll(desc.getProperties()); } + if (!desc.usesStorageHandler()) { + // External view is replaced with a native Hive view + clearStorageHandlerProp(oldview); + } oldview.setPartCols(desc.getPartitionColumns()); oldview.checkValidity(null); @@ -105,6 +109,18 @@ public int execute() throws HiveException { return 0; } + private void clearStorageHandlerProp(Table oldview) { + Map params = oldview.getParameters(); + if (params == null) { + return; + } + String fqcn = params.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + if (fqcn == null) { + return; + } + params.remove(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + } + private Table createViewObject() throws HiveException { TableName name = HiveTableName.of(desc.getViewName()); Table view = new Table(name.getDb(), name.getTable()); @@ -129,6 +145,13 @@ private Table createViewObject() throws HiveException { StorageFormat storageFormat = new StorageFormat(context.getConf()); storageFormat.fillDefaultStorageFormat(false, false); + if (desc.usesStorageHandler()) { + storageFormat.setStorageHandler(desc.getStorageHandlerClass()); + view.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + desc.getStorageHandlerClass().trim()); + } + view.setInputFormatClass(storageFormat.getInputFormat()); view.setOutputFormatClass(storageFormat.getOutputFormat()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index a5bc66733f46..b6d198264fab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -26,6 +26,7 @@ import com.google.common.collect.Sets; import java.util.Comparator; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -239,7 +240,7 @@ public class DDLPlanUtils { + TABLE_NAME + "> PARTITION <" + PARTITION_NAME + "> FOR COLUMN <" + COLUMN_NAME + "> BUT IT IS NOT SUPPORTED YET. THE BASE64 VALUE FOR THE HISTOGRAM IS <" + BASE_64_VALUE + "> "; - + /** * Returns the create database query for a give database name. * @@ -516,13 +517,14 @@ public String getAlterTableStmtCol(ColumnStatisticsData columnStatisticsData, St * Parses the ColumnStatistics for all the columns in a given table and adds the alter table update * statistics command for each column. * + * @param conf * @param tbl */ - public List getAlterTableStmtTableStatsColsAll(Table tbl) + public List getAlterTableStmtTableStatsColsAll(HiveConf conf, Table tbl) throws HiveException { List alterTblStmt = new ArrayList<>(); List accessedColumns = getTableColumnNames(tbl); - List tableColumnStatistics = Hive.get().getTableColumnStatistics( + List tableColumnStatistics = Hive.get(conf).getTableColumnStatistics( tbl, accessedColumns, true); ColumnStatisticsObj[] columnStatisticsObj = tableColumnStatistics.toArray(new ColumnStatisticsObj[0]); @@ -648,24 +650,24 @@ public String getAlterTableStmtPartitionStatsBasic(Partition pt) { return command.render(); } - public List getDDLPlanForPartitionWithStats(Table table, + public List getDDLPlanForPartitionWithStats(HiveConf conf, Table table, Map> tableToPartitionList ) throws HiveException { - List alterTableStmt = new ArrayList(); + List alterTableStmt = new ArrayList<>(); String tableName = table.getTableName(); for (Partition pt : tableToPartitionList.get(tableName)) { alterTableStmt.add(getAlterTableAddPartition(pt)); alterTableStmt.add(getAlterTableStmtPartitionStatsBasic(pt)); } String databaseName = table.getDbName(); - List partNames = new ArrayList(); + List partNames = new ArrayList<>(); //TODO : Check if only Accessed Column Statistics Can be Retrieved From the HMS. List columnNames = getTableColumnNames(table); tableToPartitionList.get(tableName).forEach(p -> partNames.add(p.getName())); Map> partitionColStats = - Hive.get().getPartitionColumnStatistics(databaseName, - tableName, partNames, columnNames, - true); + Hive.get(conf).getPartitionColumnStatistics(databaseName, + tableName, partNames, columnNames, + true); Map partitionToActualName = new HashMap<>(); tableToPartitionList.get(tableName).forEach(p -> partitionToActualName.put(p.getName(), getPartitionActualName(p))); partitionColStats.keySet().stream().sorted().forEach(partitionName -> diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 703b92fa6270..b83e0a88a5e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -475,9 +475,9 @@ public void addStats(Table table,List alterTableStmt ,Map explainStmt, DDLPlanUtils ddlPl public void getDDLPlan(PrintStream out) throws Exception { DDLPlanUtils ddlPlanUtils = new DDLPlanUtils(); - Set createDatabase = new TreeSet(); - List tableCreateStmt = new LinkedList(); - List tableBasicDef = new LinkedList(); - List createViewList = new LinkedList(); - List alterTableStmt = new LinkedList(); - List explainStmt = new LinkedList(); + Set createDatabase = new TreeSet<>(); + List tableCreateStmt = new LinkedList<>(); + List tableBasicDef = new LinkedList<>(); + List createViewList = new LinkedList<>(); + List alterTableStmt = new LinkedList<>(); + List explainStmt = new LinkedList<>(); Map tableMap = new HashMap<>(); Map> tablePartitionsMap = new HashMap<>(); for (ReadEntity ent : work.getInputs()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index a29e532b113b..f58fafc85566 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -770,8 +770,7 @@ public static TableDesc getTableDesc(Table tbl) { if (tbl.getSnapshotRef() != null) { props.put(SNAPSHOT_REF, tbl.getSnapshotRef()); } - return (new TableDesc(tbl.getInputFormatClass(), tbl - .getOutputFormatClass(), props)); + return new TableDesc(tbl.getInputFormatClass(), tbl.getOutputFormatClass(), props, tbl.getCatName()); } // column names and column types are all delimited by comma diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java index 9287fd75e766..787cadd256b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java @@ -547,8 +547,9 @@ public static Task createViewTask(MetaData metaData, String dbNameToLoadIn, H // texts using new DB name. Currently it refers to the source database name. } - CreateViewDesc desc = new CreateViewDesc(dbDotView, table.getCols(), null, table.getParameters(), - table.getPartColNames(), false, false, viewOriginalText, viewExpandedText, table.getPartCols()); + CreateViewDesc desc = new CreateViewDesc(dbDotView, table.getCols(), null, table.getParameters(), + table.getPartColNames(), false, false, viewOriginalText, viewExpandedText, + table.getPartCols(), null); desc.setReplicationSpec(metaData.getReplicationSpec()); desc.setOwnerName(table.getOwner()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ExternalSessionsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ExternalSessionsRegistry.java index 7f279c3648d8..8f822c2585c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ExternalSessionsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ExternalSessionsRegistry.java @@ -40,4 +40,11 @@ public interface ExternalSessionsRegistry { * Closes the external session registry. */ void close(); + + /** + * Returns true if this registry instance currently holds a claim on the given AM. + */ + default boolean isClaimed(String appId) { + return true; // Non-ZK registries case is always true + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezExternalSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezExternalSessionState.java index b3103d3f5918..bb4cc106d5b5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezExternalSessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezExternalSessionState.java @@ -19,17 +19,26 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.io.IOException; +import java.util.concurrent.TimeUnit; +import com.google.protobuf.ServiceException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.client.TezClient; +import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.api.client.DAGClient; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC; +import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; /** @@ -97,6 +106,12 @@ protected void openInternal(String[] additionalFilesNotFromConf, boolean llapMode = isLlapMode(); TezConfiguration tezConfig = new TezConfiguration(defaultTezConfiguration); + // Propagate per-session tez.am.registry.namespace override (set via JDBC URL) + // so the TezClient locates the correct AM for the target LLAP cluster. + String registryNs = conf.get("tez.am.registry.namespace"); + if (registryNs != null && !registryNs.isEmpty()) { + tezConfig.set("tez.am.registry.namespace", registryNs); + } setupSessionAcls(tezConfig, conf); ServicePluginsDescriptor spd = createServicePluginDescriptor(llapMode, tezConfig); Credentials llapCredentials = createLlapCredentials(llapMode, tezConfig); @@ -133,6 +148,11 @@ public void close(boolean keepDagFilesDir) throws Exception { // We never close external sessions that don't have errors. try { if (externalAppId != null) { + LOG.debug("Returning external session with appID: {}", externalAppId); + SessionState sessionState = SessionState.get(); + if (sessionState != null) { + sessionState.setTezSession(null); + } registry.returnSession(externalAppId); } } catch (Exception e) { @@ -181,4 +201,73 @@ public boolean killQuery(String reason) throws HiveException { killQuery.killQuery(queryId, reason, conf, false); return true; } + + @Override + public DAGClient submitDAG(DAG dag) throws TezException, IOException { + if (!registry.isClaimed(externalAppId)) { + throw new TezException("Cannot submit DAG as the Tez Session no-longer owns the AM: " + externalAppId); + } + try { + return getTezClient().submitDAG(dag); + } catch (TezException e) { + if (e.getMessage() == null || !e.getMessage().contains("App master already running a DAG")) { + throw e; + } + tryKillRunningDAGs(getTezClient()); + return getTezClient().submitDAG(dag); + } + } + + private void tryKillRunningDAGs(TezClient session) throws TezException { + if (!registry.isClaimed(externalAppId)) { + throw new TezException("Cannot kill running DAG as the Tez Session no-longer owns the AM: " + externalAppId); + } + LOG.info("External session has an AM which is already running a DAG on app ID {}", externalAppId); + DAGClientAMProtocolBlockingPB proxy = session.sendAMHeartbeat(null); + if (proxy == null) { + throw new TezException("Error while trying to connect to AM for app ID " + externalAppId); + } + long killTimeoutMs = TimeUnit.SECONDS.toMillis( + HiveConf.getIntVar(conf, ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_WAIT_MAX_ATTEMPTS)); + try { + DAGClientAMProtocolRPC.GetAllDAGsResponseProto allDAGSResponse = + proxy.getAllDAGs(null, DAGClientAMProtocolRPC.GetAllDAGsRequestProto.newBuilder().build()); + for (String dagId : allDAGSResponse.getDagIdList()) { + LOG.info("External session: attempting to kill dagId {} on app ID {}", dagId, externalAppId); + proxy.tryKillDAG(null, DAGClientAMProtocolRPC.TryKillDAGRequestProto.newBuilder().setDagId(dagId).build()); + waitForDagTerminal(proxy, dagId, killTimeoutMs); + } + } catch (Exception e) { + throw new TezException("Error while trying to kill existing DAG running on app ID " + externalAppId, e); + } + } + + private void waitForDagTerminal(DAGClientAMProtocolBlockingPB proxy, String dagId, long timeoutMs) + throws TezException, ServiceException { + long startTimeMs = System.currentTimeMillis(); + long pollIntervalMs = conf.getTimeVar(ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS); + while (System.currentTimeMillis() - startTimeMs < timeoutMs) { + long remainingMs = timeoutMs - (System.currentTimeMillis() - startTimeMs); + DAGClientAMProtocolRPC.GetDAGStatusResponseProto response = proxy.getDAGStatus(null, + DAGClientAMProtocolRPC.GetDAGStatusRequestProto.newBuilder() + .setDagId(dagId) + .setTimeout(Math.min(pollIntervalMs, remainingMs)) + .build()); + if (response.hasDagStatus() && response.getDagStatus().hasState() + && isTerminalDagState(response.getDagStatus().getState())) { + LOG.info("External session: dagId {} on app ID {} reached terminal state {}", dagId, externalAppId, + response.getDagStatus().getState()); + return; + } + } + throw new TezException("Timed out after " + timeoutMs + " ms waiting for orphan DAG " + dagId + + " on app ID " + externalAppId + " to reach terminal state after kill"); + } + + private static boolean isTerminalDagState(DAGProtos.DAGStatusStateProto state) { + return switch (state) { + case DAG_SUCCEEDED, DAG_KILLED, DAG_FAILED, DAG_ERROR -> true; + default -> false; + }; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSession.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSession.java index 68844bd81728..2f64ec41a58f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSession.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSession.java @@ -34,7 +34,9 @@ import org.apache.hadoop.hive.ql.wm.WmContext; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.client.TezClient; +import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGStatus; /** @@ -86,6 +88,7 @@ public String toString() { HiveConf getConf(); TezClient getTezClient(); + DAGClient submitDAG(DAG dag) throws TezException, IOException; boolean isOpen(); boolean isOpening(); boolean getDoAsEnabled(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java index 605a92ebc8f5..9b9fbfebd9f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java @@ -353,6 +353,20 @@ void returnSession(TezSession tezSessionState) { + " belongs to the pool. Put it back in"); defaultSessionPool.returnSession((TezSessionPoolSession)tezSessionState); } + + if (useExternalSessions && !tezSessionState.isDefault()) { + if (tezSessionState.getTezClient() != null + && tezSessionState.getTezClient().getAppMasterApplicationId() != null) { + try { + tezSessionState.close(false); + } catch (Exception ex) { + LOG.warn("Failed to return external Tez session {}", tezSessionState.getSessionId(), ex); + } + } else { + LOG.warn("Not returning session '{}' as tez client or app id is null", tezSessionState.getSessionId()); + } + } + // non default session nothing changes. The user can continue to use the existing // session in the SessionState } finally { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolSession.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolSession.java index 415072f221da..a473b32e8879 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolSession.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolSession.java @@ -34,7 +34,9 @@ import org.apache.hadoop.hive.registry.impl.TezAmInstance; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.client.TezClient; +import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -337,6 +339,11 @@ public TezClient getTezClient() { return baseSession.getTezClient(); } + @Override + public DAGClient submitDAG(DAG dag) throws TezException, IOException { + return baseSession.submitDAG(dag); + } + @Override public boolean isOpening() { return baseSession.isOpening(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java index 2924416ad480..b50c15cf9b63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java @@ -76,11 +76,13 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.client.TezClient; import org.apache.tez.common.TezUtils; +import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.PreWarmVertex; import org.apache.tez.dag.api.SessionNotRunning; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.UserPayload; +import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGStatus; import org.apache.tez.dag.api.client.Progress; import org.apache.tez.mapreduce.hadoop.DeprecatedKeys; @@ -820,6 +822,11 @@ public TezClient getTezClient() { return session; } + @Override + public DAGClient submitDAG(DAG dag) throws TezException, IOException { + return getTezClient().submitDAG(dag); + } + @Override public LocalResource getAppJarLr() { return appJarLr; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index baba623567d2..38cd117a92dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -476,9 +476,8 @@ void ensureSessionHasResources( TezSession session, String[] nonConfResources) throws Exception { TezClient client = session.getTezClient(); // TODO null can also mean that this operation was interrupted. Should we really try to re-create the session in that case ? - if (client == null) { - // Note: the only sane case where this can happen is the non-pool one. We should get rid - // of it, in non-pool case perf doesn't matter so we might as well open at get time + if (client == null || !session.isOpen()) { + // Note: We should get rid of it, in non-pool case perf doesn't matter so we might as well open at get time // and then call update like we do in the else. // Can happen if the user sets the tez flag after the session was established. LOG.info("Tez session hasn't been created yet. Opening session"); @@ -696,7 +695,7 @@ DAGClient submit(DAG dag, Ref sessionStateRef) throws Exception { private DAGClient submitInternal(DAG dag, TezSession sessionState) throws TezException, IOException { runtimeContext.init(sessionState); - return sessionState.getTezClient().submitDAG(dag); + return sessionState.submitDAG(dag); } private void sessionDestroyOrReturnToPool(Ref sessionStateRef, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ZookeeperExternalSessionsRegistryClient.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ZookeeperExternalSessionsRegistryClient.java index 550c77e573ab..08df3bc922ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ZookeeperExternalSessionsRegistryClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ZookeeperExternalSessionsRegistryClient.java @@ -21,6 +21,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; +import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; import org.apache.curator.framework.CuratorFramework; @@ -31,9 +32,14 @@ import org.apache.curator.framework.recipes.cache.CuratorCacheListener; import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; +import org.apache.curator.framework.recipes.locks.InterProcessMutex; +import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,15 +48,20 @@ // TODO: tez should provide this registry public class ZookeeperExternalSessionsRegistryClient implements ExternalSessionsRegistry { private static final Logger LOG = LoggerFactory.getLogger(ZookeeperExternalSessionsRegistryClient.class); + private static final String PATH_SEPARATOR = "/"; private final HiveConf initConf; private final Set available = new HashSet<>(); private final Set taken = new HashSet<>(); private final Object lock = new Object(); private final int maxAttempts; - + private CuratorFramework client; private CuratorCache cache; + private CuratorCache claimsCache; + private InterProcessMutex globalQueue; + private String claimsPath; private volatile boolean isInitialized; + private volatile boolean zkConnectionHealthy = false; public ZookeeperExternalSessionsRegistryClient(final HiveConf initConf) { @@ -59,25 +70,91 @@ public ZookeeperExternalSessionsRegistryClient(final HiveConf initConf) { } private static String getApplicationId(final ChildData childData) { - return childData.getPath().substring(childData.getPath().lastIndexOf("/") + 1); + return childData.getPath().substring(childData.getPath().lastIndexOf(PATH_SEPARATOR) + 1); } private void init() { String zkServer = HiveConf.getVar(initConf, ConfVars.HIVE_ZOOKEEPER_QUORUM); + int sessionTimeoutMs = (int) HiveConf.getTimeVar(initConf, ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, + TimeUnit.MILLISECONDS); + int connectionTimeoutMs = (int) HiveConf.getTimeVar(initConf, ConfVars.HIVE_ZOOKEEPER_CONNECTION_TIMEOUT, + TimeUnit.MILLISECONDS); + int baseSleepTimeMs = (int) HiveConf.getTimeVar(initConf, ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME, + TimeUnit.MILLISECONDS); + int maxRetries = HiveConf.getIntVar(initConf, ConfVars.HIVE_ZOOKEEPER_CONNECTION_MAX_RETRIES); String zkNamespace = HiveConf.getVar(initConf, ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE); String effectivePath = normalizeZkPath(zkNamespace); - CuratorFramework client = CuratorFrameworkFactory.newClient(zkServer, new ExponentialBackoffRetry(1000, 3)); + this.claimsPath = effectivePath + "-claims"; + // After connection state changes to SUSPENDED, the client has already consumed ~2/3 of the negotiated session + // timeout. Use 33% of the remaining window so LOST aligns with when the ZK server expires the session and drops + // ephemeral claim nodes. For Ref: Curator TN14 + this.client = CuratorFrameworkFactory.builder() + .connectString(zkServer) + .sessionTimeoutMs(sessionTimeoutMs) + .connectionTimeoutMs(connectionTimeoutMs) + .simulatedSessionExpirationPercent(33) + .retryPolicy(new ExponentialBackoffRetry(baseSleepTimeMs, maxRetries)) + .build(); + synchronized (lock) { + client.getConnectionStateListenable().addListener((curatorClient, newState) -> { + if (newState == ConnectionState.CONNECTED || newState == ConnectionState.RECONNECTED) { + zkConnectionHealthy = true; + } else if (newState == ConnectionState.LOST) { + zkConnectionHealthy = false; + Set sessionsToKill; + synchronized (lock) { + LOG.error("ZK connection state has changed to lost; killing running DAGs on claimed AMs: {}", taken); + sessionsToKill = new HashSet<>(taken); + taken.clear(); + } + sessionsToKill.forEach(TezJobMonitor::killRunningDAGsForApplication); + } + }); client.start(); + + this.globalQueue = new InterProcessMutex(client, effectivePath + "-queue"); this.cache = CuratorCache.build(client, effectivePath); CuratorCacheListener listener = CuratorCacheListener.builder() .forPathChildrenCache(effectivePath, client, new ExternalSessionsPathListener()) .build(); cache.listenable().addListener(listener); cache.start(); + + this.claimsCache = CuratorCache.build(client, claimsPath); + CuratorCacheListener claimsListener = CuratorCacheListener.builder().forCreates( + childData -> { + if (childData == null) { + return; + } + String applicationId = getApplicationId(childData); + synchronized (lock) { + available.remove(applicationId); + } + }).forDeletes( + childData -> { + if (childData == null) { + return; + } + String applicationId = getApplicationId(childData); + synchronized (lock) { + if (!taken.contains(applicationId)) { + if (cache.get(effectivePath + PATH_SEPARATOR + applicationId).isPresent()) { + available.add(applicationId); + lock.notifyAll(); + } else { + LOG.info("Ignoring AM claim removal for {} because the base AM node no longer exists.", + applicationId); + } + } + } + }).build(); + claimsCache.listenable().addListener(claimsListener); + claimsCache.start(); + cache.stream() .filter(childData -> childData.getPath() != null - && childData.getPath().startsWith(effectivePath + "/")) + && childData.getPath().startsWith(effectivePath + PATH_SEPARATOR)) .forEach(childData -> available.add(getApplicationId(childData))); LOG.info("Initial external sessions: {}", available); isInitialized = true; @@ -86,27 +163,54 @@ private void init() { @VisibleForTesting static String normalizeZkPath(String zkNamespace) { - return (zkNamespace.startsWith("/") ? zkNamespace : "/" + zkNamespace); + return (zkNamespace.startsWith(PATH_SEPARATOR) ? zkNamespace : PATH_SEPARATOR + zkNamespace); } @Override public String getSession() throws Exception { - synchronized (lock) { - if (!isInitialized) { - init(); - } - long endTimeNs = System.nanoTime() + (1000000000L * maxAttempts); - while (available.isEmpty() && ((endTimeNs - System.nanoTime()) > 0)) { - lock.wait(1000L); + if (!isInitialized) { + synchronized (lock) { + if (!isInitialized) { + init(); + } } - Iterator iter = available.iterator(); - if (!iter.hasNext()) { - throw new IOException("Cannot get a session after " + maxAttempts + " attempts"); + } + + long startTimeNs = System.nanoTime(); + long timeoutNs = TimeUnit.SECONDS.toNanos(maxAttempts); + long queueWaitTimeMs = Math.max(0, (timeoutNs - (System.nanoTime() - startTimeNs)) / 1000000L); + if (!globalQueue.acquire(queueWaitTimeMs, TimeUnit.MILLISECONDS)) { + throw new IOException("Cannot get a session (timed out in queue) after " + maxAttempts + " seconds"); + } + try { + synchronized (lock) { + while (System.nanoTime() - startTimeNs < timeoutNs) { + Iterator iter = available.iterator(); + + while (iter.hasNext()) { + String appId = iter.next(); + try { + String claimNodePath = claimsPath + PATH_SEPARATOR + appId; + client.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(claimNodePath); + iter.remove(); + taken.add(appId); + return appId; + } catch (KeeperException.NodeExistsException e) { + iter.remove(); + } + } + long remainingTimeNs = timeoutNs - (System.nanoTime() - startTimeNs); + if (remainingTimeNs > 0) { + // Add one to remainingTime milliseconds computation to prevent the case where + // (remainingTimeNs / 1000000L) can return 0 causing the lock to be held indefinitely. + long waitTimeMs = Math.min(1000L, (remainingTimeNs / 1000000L) + 1); + lock.wait(waitTimeMs); + } + } + throw new IOException("Cannot get a session after waiting for " + maxAttempts + " seconds (timeout exhausted)"); } - String appId = iter.next(); - iter.remove(); - taken.add(appId); - return appId; + } finally { + globalQueue.release(); } } @@ -119,6 +223,16 @@ public void returnSession(String appId) { if (!taken.remove(appId)) { return; // Session has been removed from ZK. } + + try { + client.delete().guaranteed().forPath(claimsPath + PATH_SEPARATOR + appId); + } catch (KeeperException.NoNodeException e) { + // If the claim Node has already been deleted, we can ignore it. + LOG.debug("Claim Node has already been deleted for the session {}", appId, e); + } catch (Exception e) { + LOG.warn("Failed to delete claim node for session {}", appId, e); + } + available.add(appId); lock.notifyAll(); } @@ -126,9 +240,15 @@ public void returnSession(String appId) { @Override public void close() { + if (claimsCache != null) { + claimsCache.close(); + } if (cache != null) { cache.close(); } + if (client != null) { + client.close(); + } } private final class ExternalSessionsPathListener implements PathChildrenCacheListener { @@ -146,23 +266,38 @@ public void childEvent(final CuratorFramework client, final PathChildrenCacheEve synchronized (lock) { switch (event.getType()) { - case CHILD_UPDATED, CHILD_ADDED: - if (available.contains(applicationId) || taken.contains(applicationId)) { - return; // We do not expect updates to existing sessions; ignore them for now. - } - available.add(applicationId); - break; - case CHILD_REMOVED: - if (taken.remove(applicationId)) { - LOG.warn("The session in use has disappeared from the registry ({})", applicationId); - } else if (!available.remove(applicationId)) { - LOG.warn("An unknown session has been removed ({})", applicationId); - } - break; - default: - // Ignore all the other events; logged above. + case CHILD_UPDATED, CHILD_ADDED: + if (available.contains(applicationId) || taken.contains(applicationId)) { + return; // We do not expect updates to existing sessions; ignore them for now. + } + if (claimsCache != null && claimsCache.get(claimsPath + PATH_SEPARATOR + applicationId).isPresent()) { + LOG.info("Ignoring newly added AM {} because it is already claimed by another session.", applicationId); + return; + } + available.add(applicationId); + lock.notifyAll(); + break; + case CHILD_REMOVED: + if (taken.remove(applicationId)) { + LOG.warn("The session in use has disappeared from the registry ({})", applicationId); + } else if (!available.remove(applicationId)) { + LOG.warn("An unknown session has been removed ({})", applicationId); + } + break; + default: + // Ignore all the other events; logged above. } } } } + + @Override + public boolean isClaimed(String appId) { + if (!zkConnectionHealthy) { + return false; + } + synchronized (lock) { + return taken.contains(appId); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java index 92844f4d5716..871258837af6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java @@ -499,6 +499,20 @@ public static void killRunningJobs() { } } + public static void killRunningDAGsForApplication(String applicationId) { + synchronized (shutdownList) { + for (DAGClient c : shutdownList) { + try { + if (applicationId.equals(c.getSessionIdentifierString())) { + c.tryKillDAG(); + } + } catch (Exception e) { + LOG.error("Error while trying to kill running DAG on tez session {}", applicationId, e); + } + } + } + } + static long getCounterValueByGroupName(TezCounters vertexCounters, String groupNamePattern, String counterName) { TezCounter tezCounter = vertexCounters.getGroup(groupNamePattern).findCounter(counterName); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index d46eded36e61..9248897bf745 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -275,7 +275,7 @@ private VectorizedOrcAcidRowBatchReader(JobConf conf, OrcSplit orcSplit, Reporte } PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(orcSplit.getPath(), mapWork.getPathToPartitionInfo()); - cacheTag = LlapHiveUtils.getDbAndTableNameForMetrics(orcSplit.getPath(), true, partitionDesc); + cacheTag = LlapHiveUtils.getCacheTag(orcSplit.getPath(), true, partitionDesc); } else { cacheTag = null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index 2a3bccb6d9a5..c6bd5fc87815 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -15,44 +15,34 @@ import java.io.IOException; import java.util.List; -import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.io.DataCache; import org.apache.hadoop.hive.common.io.FileMetadataCache; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; -import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.LlapCacheOnlyInputFormatInterface; -import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.JobConf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; - import org.apache.parquet.hadoop.ParquetInputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.mapreduce.lib.input.FileInputFormat.LIST_STATUS_NUM_THREADS; /** - * - * A Parquet InputFormat for Hive (with the deprecated package mapred) - * - * NOTE: With HIVE-9235 we removed "implements VectorizedParquetInputFormat" since all data types - * are not currently supported. Removing the interface turns off vectorization. + * A Parquet InputFormat for Hive (with the deprecated package mapred). */ public class MapredParquetInputFormat extends FileInputFormat implements InputFormatChecker, VectorizedInputFormatInterface, LlapCacheOnlyInputFormatInterface { @@ -72,6 +62,19 @@ protected MapredParquetInputFormat(final ParquetInputFormat input vectorizedSelf = new VectorizedParquetInputFormat(); } + /** + * Parallelize split-generation file listing by sizing Hadoop's {@code LocatedFileStatusFetcher} + * from {@code hive.compute.splits.num.threads}. We set it on the job conf here because that + * property's cluster default does not reach the conf split generation uses; a value of 1 stays + * serial. + */ + @Override + protected FileStatus[] listStatus(JobConf job) throws IOException { + job.setInt(LIST_STATUS_NUM_THREADS, + HiveConf.getIntVar(job, HiveConf.ConfVars.HIVE_COMPUTE_SPLITS_NUM_THREADS)); + return super.listStatus(job); + } + @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public org.apache.hadoop.mapred.RecordReader getRecordReader( @@ -115,6 +118,6 @@ public boolean validateInput(FileSystem fs, HiveConf conf, List file @Override public VectorizedSupport.Support[] getSupportedFeatures() { - return null; + return new VectorizedSupport.Support[] { VectorizedSupport.Support.DECIMAL_64 }; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java index 348406d9cfac..c94dd493ae87 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java @@ -104,6 +104,32 @@ public interface ParquetDataColumnReader { */ byte[] readDecimal(); + /** + * True when this reader can supply DECIMAL_64 values as raw unscaled longs at the column's scale, + * with no per-row HiveDecimal/byte[] conversion -- i.e. an INT32/INT64- or byte-array-backed decimal + * whose file scale equals the requested Hive scale and whose value fits a long. When true, the + * long-backed reader may call {@link #readDecimal64()} / {@link #readDecimal64(int)} instead of + * {@link #readDecimal()} / {@link #readDecimal(int)}. + */ + default boolean isFastDecimal64() { + return false; + } + + /** + * @return the next value as a raw unscaled decimal64 long. Only valid when {@link #isFastDecimal64()}. + * {@link #isValid()} is set false when the value does not fit the Hive precision (caller -> NULL). + */ + default long readDecimal64() { + throw new UnsupportedOperationException(); + } + + /** + * @return the dictionary value at {@code id} as a raw unscaled decimal64 long. See {@link #readDecimal64()}. + */ + default long readDecimal64(int id) { + throw new UnsupportedOperationException(); + } + /** * @return the next Double from the page */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java index 6b44459b8064..472deeca41f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java @@ -382,6 +382,19 @@ byte[] validatedScaledDecimal(int inpScale) { } } + // Validate a raw unscaled decimal64 long (already at the Hive scale) against the Hive precision. + // Sets isValid; returns the value unchanged when in range, else 0 (caller marks the entry NULL). + // Used by the Decimal64 identity fast path; bounds via HiveDecimalWritable to avoid hand-rolling. + long validatedDecimal64(long unscaledValue) { + long absMax = HiveDecimalWritable.getDecimal64AbsMax(hivePrecision); + if (unscaledValue >= -absMax && unscaledValue <= absMax) { + this.isValid = true; + return unscaledValue; + } + this.isValid = false; + return 0; + } + /** * Helper function to validate double data. Sets the isValid to true if the data is valid * for the type it will be read in, otherwise false. @@ -1304,17 +1317,21 @@ private static byte[] convertToBytes(Timestamp value) { * and returned as per the type defined in HMS. */ public static class TypesFromDecimalPageReader extends DefaultParquetDataColumnReader { - private short scale; + // Parquet file decimal precision and scale. + private final int precision; + private final short scale; - public TypesFromDecimalPageReader(ValuesReader realReader, int length, short scale, + public TypesFromDecimalPageReader(ValuesReader realReader, int length, int precision, short scale, int hivePrecision, int hiveScale) { super(realReader, length, hivePrecision, hiveScale); + this.precision = precision; this.scale = scale; } - public TypesFromDecimalPageReader(Dictionary dict, int length, short scale, int hivePrecision, - int hiveScale) { + public TypesFromDecimalPageReader(Dictionary dict, int length, int precision, short scale, + int hivePrecision, int hiveScale) { super(dict, length, hivePrecision, hiveScale); + this.precision = precision; this.scale = scale; } @@ -1460,6 +1477,47 @@ public byte[] readDecimal(int id) { hiveDecimalWritable.set(dict.decodeToBinary(id).getBytesUnsafe(), scale); return super.validatedScaledDecimal(scale); } + + /** + * Decimal64 identity fast path for FIXED_LEN_BYTE_ARRAY/BINARY decimals: the stored big-endian + * two's-complement unscaled value is read straight into a long with only a bounds check, instead + * of the per-row HiveDecimal materialization {@link #readDecimal()} performs via + * {@code fastSetFromBigIntegerBytesAndScale}. Rescaled or wider decimals fall back to + * {@link #readDecimal()}. + */ + @Override + public boolean isFastDecimal64() { + // Valid only with no rescale (file scale == Hive scale) and precision <= 18, so the unscaled + // value fits a long. + return scale == hiveScale && HiveDecimalWritable.isPrecisionDecimal64(precision); + } + + @Override + public long readDecimal64() { + return validatedDecimal64(binaryToUnscaledLong(valuesReader.readBytes())); + } + + @Override + public long readDecimal64(int id) { + return validatedDecimal64(binaryToUnscaledLong(dict.decodeToBinary(id))); + } + + /** + * Big-endian two's-complement bytes -> unscaled long (the low 64 bits). The fast path runs only + * for file precision <= 18 (see isFastDecimal64), so a value conforming to that precision fits a + * long; the leading bytes of a wider-than-8-byte array are sign extension and shift out losslessly. + */ + private static long binaryToUnscaledLong(Binary value) { + ByteBuffer buf = value.toByteBuffer(); + int pos = buf.position(); + int len = buf.remaining(); + // Sign-extend from the most significant byte so negative values are reconstructed correctly. + long v = (len > 0 && buf.get(pos) < 0) ? -1L : 0L; + for (int i = 0; i < len; i++) { + v = (v << 8) | (buf.get(pos + i) & 0xFF); + } + return v; + } } /** @@ -1469,7 +1527,7 @@ public byte[] readDecimal(int id) { * and returned as per the type defined in HMS. */ public static class TypesFromInt32DecimalPageReader extends DefaultParquetDataColumnReader { - private short scale; + private final short scale; public TypesFromInt32DecimalPageReader(ValuesReader realReader, int length, short scale, int hivePrecision, int hiveScale) { @@ -1622,6 +1680,23 @@ public byte[] readDecimal(int id) { hiveDecimalWritable.set(hiveDecimal); return super.validatedScaledDecimal(scale); } + + @Override + public boolean isFastDecimal64() { + // Identity fast path: the file scale equals the Hive scale, so the stored unscaled value IS the + // Decimal64 value -- no rescale/rounding, only a precision bounds check. + return scale == hiveScale; + } + + @Override + public long readDecimal64() { + return validatedDecimal64(valuesReader.readInteger()); + } + + @Override + public long readDecimal64(int id) { + return validatedDecimal64(dict.decodeToInt(id)); + } } /** @@ -1631,7 +1706,7 @@ public byte[] readDecimal(int id) { * and returned as per the type defined in HMS. */ public static class TypesFromInt64DecimalPageReader extends DefaultParquetDataColumnReader { - private short scale; + private final short scale; public TypesFromInt64DecimalPageReader(ValuesReader realReader, int length, short scale, int hivePrecision, int hiveScale) { @@ -1784,6 +1859,23 @@ public byte[] readDecimal(int id) { hiveDecimalWritable.set(hiveDecimal); return super.validatedScaledDecimal(scale); } + + @Override + public boolean isFastDecimal64() { + // Identity fast path: the file scale equals the Hive scale, so the stored unscaled long IS the + // Decimal64 value -- no rescale/rounding, only a precision bounds check. + return scale == hiveScale; + } + + @Override + public long readDecimal64() { + return validatedDecimal64(valuesReader.readLong()); + } + + @Override + public long readDecimal64(int id) { + return validatedDecimal64(dict.decodeToLong(id)); + } } /** @@ -1956,15 +2048,16 @@ private static ParquetDataColumnReader getConvertorFromBinary(boolean isDict, } Optional reader = parquetType.getLogicalTypeAnnotation() - .accept(new LogicalTypeAnnotationVisitor() { + .accept(new LogicalTypeAnnotationVisitor<>() { @Override public Optional visit( DecimalLogicalTypeAnnotation logicalTypeAnnotation) { + final int precision = logicalTypeAnnotation.getPrecision(); final short scale = (short) logicalTypeAnnotation.getScale(); return isDict ? Optional - .of(new TypesFromDecimalPageReader(dictionary, length, scale, hivePrecision, - hiveScale)) : Optional - .of(new TypesFromDecimalPageReader(valuesReader, length, scale, hivePrecision, - hiveScale)); + .of(new TypesFromDecimalPageReader(dictionary, length, precision, scale, + hivePrecision, hiveScale)) : Optional + .of(new TypesFromDecimalPageReader(valuesReader, length, precision, scale, + hivePrecision, hiveScale)); } @Override public Optional visit( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index fbffab44a64c..c346331325df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -343,7 +343,7 @@ public static CacheTag cacheTagOfParquetFile(Path path, Configuration cacheConf, return null; } PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(path, mapWork.getPathToPartitionInfo()); - return LlapHiveUtils.getDbAndTableNameForMetrics(path, true, partitionDesc); + return LlapHiveUtils.getCacheTag(path, true, partitionDesc); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java index c25475733ee4..be2622153bbe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java @@ -16,6 +16,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -25,6 +26,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.page.PageReader; @@ -136,7 +138,11 @@ private void readBatchHelper( readFloats(num, (DoubleColumnVector) column, rowId); break; case DECIMAL: - readDecimal(num, (DecimalColumnVector) column, rowId); + if (column instanceof Decimal64ColumnVector) { + readDecimal64(num, (Decimal64ColumnVector) column, rowId); + } else { + readDecimal(num, (DecimalColumnVector) column, rowId); + } break; case TIMESTAMP: readTimestamp(num, (TimestampColumnVector) column, rowId); @@ -153,10 +159,7 @@ private static void setNullValue(ColumnVector c, int rowId) { c.noNulls = false; } - private void readDictionaryIDs( - int total, - LongColumnVector c, - int rowId) throws IOException { + private void readDictionaryIDs(int total, LongColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -172,10 +175,7 @@ private void readDictionaryIDs( } } - private void readIntegers( - int total, - LongColumnVector c, - int rowId) throws IOException { + private void readIntegers(int total, LongColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -196,10 +196,7 @@ private void readIntegers( } } - private void readSmallInts( - int total, - LongColumnVector c, - int rowId) throws IOException { + private void readSmallInts(int total, LongColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -220,10 +217,7 @@ private void readSmallInts( } } - private void readTinyInts( - int total, - LongColumnVector c, - int rowId) throws IOException { + private void readTinyInts(int total, LongColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -244,10 +238,7 @@ private void readTinyInts( } } - private void readDoubles( - int total, - DoubleColumnVector c, - int rowId) throws IOException { + private void readDoubles(int total, DoubleColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -268,10 +259,7 @@ private void readDoubles( } } - private void readBooleans( - int total, - LongColumnVector c, - int rowId) throws IOException { + private void readBooleans(int total, LongColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -287,10 +275,7 @@ private void readBooleans( } } - private void readLongs( - int total, - LongColumnVector c, - int rowId) throws IOException { + private void readLongs(int total, LongColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -311,10 +296,7 @@ private void readLongs( } } - private void readFloats( - int total, - DoubleColumnVector c, - int rowId) throws IOException { + private void readFloats(int total, DoubleColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -335,17 +317,9 @@ private void readFloats( } } - private void readDecimal( - int total, - DecimalColumnVector c, - int rowId) throws IOException { - - DecimalLogicalTypeAnnotation decimalLogicalType = null; - if (type.getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation) { - decimalLogicalType = (DecimalLogicalTypeAnnotation) type.getLogicalTypeAnnotation(); - } - byte[] decimalData = null; - fillDecimalPrecisionScale(decimalLogicalType, c); + private void readDecimal(int total, DecimalColumnVector c, int rowId) { + byte[] decimalData; + fillDecimalPrecisionScale(c); int left = total; while (left > 0) { @@ -367,10 +341,7 @@ private void readDecimal( } } - private void readString( - int total, - BytesColumnVector c, - int rowId) throws IOException { + private void readString(int total, BytesColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -387,10 +358,7 @@ private void readString( } } - private void readChar( - int total, - BytesColumnVector c, - int rowId) throws IOException { + private void readChar(int total, BytesColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -407,10 +375,7 @@ private void readChar( } } - private void readVarchar( - int total, - BytesColumnVector c, - int rowId) throws IOException { + private void readVarchar(int total, BytesColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -427,10 +392,7 @@ private void readVarchar( } } - private void readBinaries( - int total, - BytesColumnVector c, - int rowId) throws IOException { + private void readBinaries(int total, BytesColumnVector c, int rowId) { int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); @@ -447,10 +409,7 @@ private void readBinaries( } } - private void readDate( - int total, - DateColumnVector c, - int rowId) throws IOException { + private void readDate(int total, DateColumnVector c, int rowId) { c.setUsingProlepticCalendar(true); int left = total; while (left > 0) { @@ -651,14 +610,21 @@ private void decodeDictionaryIds( } break; case DECIMAL: - DecimalLogicalTypeAnnotation decimalLogicalType = null; - if (type.getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation) { - decimalLogicalType = (DecimalLogicalTypeAnnotation) type.getLogicalTypeAnnotation(); + if (column instanceof Decimal64ColumnVector dec64) { + fillDecimal64PrecisionScale(dec64); + boolean fast = dictionary.isFastDecimal64(); + short valueScale = (short) getDecimalTypeInfo().getScale(); + for (int i = rowId; i < rowId + num; ++i) { + if (!column.isNull[i]) { + setDecimal64Value(dec64, i, fast, dictionary, (int) dictionaryIds.vector[i], valueScale); + } + } + break; } DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column); - byte[] decimalData = null; + byte[] decimalData; - fillDecimalPrecisionScale(decimalLogicalType, decimalColumnVector); + fillDecimalPrecisionScale(decimalColumnVector); for (int i = rowId; i < rowId + num; ++i) { if (!column.isNull[i]) { @@ -687,26 +653,99 @@ private void decodeDictionaryIds( } /** - * The decimal precision and scale is filled into decimalColumnVector. If the data in - * Parquet is in decimal, the precision and scale will come in from decimalLogicalType. If parquet - * is not in decimal, then this call is made because HMS shows the type as decimal. So, the - * precision and scale are picked from hiveType. - * - * @param decimalLogicalType - * @param decimalColumnVector + * Fill a {@link DecimalColumnVector} at the Parquet file (logical-type) precision/scale: the scale + * {@link #readDecimal} reads the unscaled bytes at, carried per row by the HiveDecimal. + */ + private void fillDecimalPrecisionScale(DecimalColumnVector c) { + DecimalTypeInfo dti = getDecimalTypeInfo(); + c.precision = (short) dti.getPrecision(); + c.scale = (short) dti.getScale(); + } + + /** + * Fill a long-backed {@link Decimal64ColumnVector} at the Hive (table) scale -- the scale every + * consumer reads {@code c.vector} at, and the only scale at which the unscaled value fits the long. + * NOT the Parquet file scale from {@link #getDecimalTypeInfo()}: under schema evolution that + * scale can be larger (e.g. a DECIMAL(38,37) file read as DECIMAL(16,8)) and would overflow. + */ + private void fillDecimal64PrecisionScale(Decimal64ColumnVector c) { + DecimalTypeInfo dti = hiveType instanceof DecimalTypeInfo hiveDti ? hiveDti : getDecimalTypeInfo(); + c.precision = (short) dti.getPrecision(); + c.scale = (short) dti.getScale(); + } + + /** + * Decimal precision/scale for this column: from the Parquet decimal logical type when present, + * otherwise from the Hive type (Parquet stores it as a non-decimal physical type but HMS reports + * decimal). */ - private void fillDecimalPrecisionScale(DecimalLogicalTypeAnnotation decimalLogicalType, - DecimalColumnVector decimalColumnVector) { - if (decimalLogicalType != null) { - decimalColumnVector.precision = (short) decimalLogicalType.getPrecision(); - decimalColumnVector.scale = (short) decimalLogicalType.getScale(); + private DecimalTypeInfo getDecimalTypeInfo() { + if (type.getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation d) { + return TypeInfoFactory.getDecimalTypeInfo(d.getPrecision(), d.getScale()); } else if (TypeInfoUtils.getBaseName(hiveType.getTypeName()) .equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) { - decimalColumnVector.precision = (short) ((DecimalTypeInfo) hiveType).getPrecision(); - decimalColumnVector.scale = (short) ((DecimalTypeInfo) hiveType).getScale(); + return (DecimalTypeInfo) hiveType; + } + throw new UnsupportedOperationException( + "The underlying Parquet type cannot be converted to Hive Decimal type: " + type); + } + + /** + * Decimal64 fast path: read the unscaled value straight into the long-backed vector instead of + * materializing a HiveDecimal per row. Only for columns the vectorizer tagged DECIMAL_64 + * (precision <= 18); higher precision uses {@link #readDecimal}. + */ + private void readDecimal64(int total, Decimal64ColumnVector c, int rowId) { + fillDecimal64PrecisionScale(c); + boolean fast = dataColumn.isFastDecimal64(); + short valueScale = (short) getDecimalTypeInfo().getScale(); + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + setDecimal64Value(c, rowId, fast, dataColumn, -1, valueScale); + if (!c.isNull[rowId]) { + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } + } else { + setNullValue(c, rowId); + } + rowId++; + left--; + } + } + + /** + * Store one decimal64 value into {@code c[rowId]} from {@code reader}, NULLing the entry when the + * value is out of range. {@code fast} selects the identity fast path (raw unscaled long) over the + * HiveDecimal/byte[] slow path. {@code id >= 0} reads that dictionary entry; a negative {@code id} + * reads the current page value. Shared by the page ({@link #readDecimal64}) and dictionary + * ({@link #decodeDictionaryIds}) decode loops. + */ + private void setDecimal64Value(Decimal64ColumnVector c, int rowId, boolean fast, + ParquetDataColumnReader reader, int id, short valueScale) { + c.isNull[rowId] = false; + boolean stored; + if (fast) { + // Identity fast path: store the raw unscaled long directly (no HiveDecimal/byte[] per row). + long v = id >= 0 ? reader.readDecimal64(id) : reader.readDecimal64(); + stored = reader.isValid(); + if (stored) { + c.vector[rowId] = v; + } } else { - throw new UnsupportedOperationException( - "The underlying Parquet type cannot be converted to Hive Decimal type: " + type); + // set() enforces the column precision/scale and marks the entry NULL if the value does not + // fit (e.g. schema-evolved data whose larger file scale can't be held at the column scale). + byte[] bytes = id >= 0 ? reader.readDecimal(id) : reader.readDecimal(); + stored = reader.isValid(); + if (stored) { + c.set(rowId, bytes, valueScale); + stored = !c.isNull[rowId]; + } + } + if (!stored) { + c.vector[rowId] = 0; + setNullValue(c, rowId); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/CreateExternalViewRequest.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/CreateExternalViewRequest.java new file mode 100644 index 000000000000..886313869993 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/CreateExternalViewRequest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.hive.metastore.api.FieldSchema; + +public final class CreateExternalViewRequest implements Serializable { + private static final long serialVersionUID = 1L; + + private final String databaseName; + private final String viewName; + private final List schema; + private final String expandedText; + private final Map properties; + private final String comment; + private final boolean replace; + private final boolean ifNotExists; + + public CreateExternalViewRequest( + String databaseName, + String viewName, + List schema, + String expandedText, + Map properties, + String comment, + boolean replace, + boolean ifNotExists) { + this.databaseName = databaseName; + this.viewName = viewName; + this.schema = schema; + this.expandedText = expandedText; + this.properties = properties == null ? null : Collections.unmodifiableMap(properties); + this.comment = comment; + this.replace = replace; + this.ifNotExists = ifNotExists; + } + + public String getDatabaseName() { + return databaseName; + } + + public String getViewName() { + return viewName; + } + + public List getSchema() { + return schema; + } + + public String getExpandedText() { + return expandedText; + } + + public Map getProperties() { + return properties; + } + + public String getComment() { + return comment; + } + + public boolean isReplace() { + return replace; + } + + public boolean isIfNotExists() { + return ifNotExists; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index cd33896807bc..31f695ed558b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -6094,8 +6094,7 @@ public HiveMetaHook getHook( } }; - HiveMetaStoreClientBuilder msClientBuilder = new HiveMetaStoreClientBuilder(conf) - .newClient(allowEmbedded) + HiveMetaStoreClientBuilder msClientBuilder = new HiveMetaStoreClientBuilder(conf, allowEmbedded) .enhanceWith(client -> HiveMetaStoreClientWithLocalCache.newClient(conf, client)) .enhanceWith(client -> diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index 520c52a24a8f..c50f0b33b055 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -1028,6 +1028,14 @@ default void setMergeTaskDeleteProperties(TableDesc tableDesc) { throw new UnsupportedOperationException("Storage handler does not support getting custom delete merge schema."); } + /** + * @return {@code true} if this handler may store CREATE VIEW text and column metadata in an external catalog + * rather than only as a classic HMS virtual view. + */ + default boolean supportsExternalViewCatalog() { + return false; + } + default boolean supportsDefaultColumnValues(Map tblProps) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java index 8ea25a91a0bf..565479734b42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SearchTransformer.java @@ -72,30 +72,44 @@ public SearchTransformer(RexBuilder rexBuilder, RexCall search, final RexUnknown this.unknownContext = unknownContext; } + /** + * Transforms the SEARCH expression into an equivalent RexNode expression. + * Warning: when called from a shuttle, callers of this method should consider flattening AND/OR expressions + * afterward, to get the same result as applying {@link SearchTransformer.Shuttle}. + */ public RexNode transform() { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.SEARCH_TRANSFORMER); - RangeConverter consumer = new RangeConverter<>(rexBuilder, operandType, ref); - RangeSets.forEach(sarg.rangeSet, consumer); - List orList = new ArrayList<>(); if (sarg.nullAs == RexUnknownAs.TRUE && unknownContext != RexUnknownAs.TRUE) { orList.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref)); } - switch (consumer.inLiterals.size()) { - case 0: - break; - case 1: - orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0))); - break; - default: - List operands = new ArrayList<>(consumer.inLiterals.size() + 1); - operands.add(ref); - operands.addAll(consumer.inLiterals); - orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands)); + + if (sarg.isComplementedPoints()) { + // Generate 'ref <> value1 AND ... AND ref <> valueN' + List list = sarg.rangeSet.complement().asRanges().stream().map( + range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref, + rexBuilder.makeLiteral(range.lowerEndpoint(), operandType, true, true))).toList(); + orList.add(RexUtil.composeConjunction(rexBuilder, list)); + } else { + RangeConverter consumer = new RangeConverter<>(rexBuilder, operandType, ref); + RangeSets.forEach(sarg.rangeSet, consumer); + + switch (consumer.inLiterals.size()) { + case 0: + break; + case 1: + orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0))); + break; + default: + List operands = new ArrayList<>(consumer.inLiterals.size() + 1); + operands.add(ref); + operands.addAll(consumer.inLiterals); + orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands)); + } + orList.addAll(consumer.nodes); } - orList.addAll(consumer.nodes); RexNode x = RexUtil.composeDisjunction(rexBuilder, orList); if (sarg.nullAs == RexUnknownAs.FALSE && unknownContext != RexUnknownAs.FALSE) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java index 176d27e07d91..eb148dda25fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableFunctionScan.java @@ -25,6 +25,7 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelInput; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.TableFunctionScan; import org.apache.calcite.rel.metadata.RelColumnMapping; import org.apache.calcite.rel.type.RelDataType; @@ -34,6 +35,9 @@ public class HiveTableFunctionScan extends TableFunctionScan implements HiveRelNode { + // Whether this is a LATERAL VIEW OUTER + private final boolean outer; + /** * @param cluster * cluster - Cluster that this relational expression belongs to @@ -49,28 +53,54 @@ public class HiveTableFunctionScan extends TableFunctionScan implements HiveRelN * rowType - Row type produced by function * @param columnMappings * columnMappings - Column mappings associated with this function + * @param outer + * outer - true if this is a LATERAL VIEW OUTER */ protected HiveTableFunctionScan(RelOptCluster cluster, RelTraitSet traitSet, List inputs, - RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) { + RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings, + boolean outer) { super(cluster, traitSet, inputs, rexCall, elementType, rowType, columnMappings); + this.outer = outer; } public HiveTableFunctionScan(RelInput input) { super(input); + this.outer = input.getBoolean("outer", false); } public static HiveTableFunctionScan create(RelOptCluster cluster, RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) throws CalciteSemanticException { return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType, - columnMappings); + columnMappings, false); + } + + public static HiveTableFunctionScan create(RelOptCluster cluster, RelTraitSet traitSet, + List inputs, RexNode rexCall, Type elementType, RelDataType rowType, + Set columnMappings, boolean outer) throws CalciteSemanticException { + return new HiveTableFunctionScan(cluster, traitSet, inputs, rexCall, elementType, rowType, + columnMappings, outer); + } + + /** Returns true if this represents a LATERAL VIEW OUTER. */ + public boolean isOuter() { + return outer; + } + + @Override + public RelWriter explainTerms(RelWriter pw) { + super.explainTerms(pw); + if (outer) { + pw.item("outer", true); + } + return pw; } @Override public TableFunctionScan copy(RelTraitSet traitSet, List inputs, RexNode rexCall, Type elementType, RelDataType rowType, Set columnMappings) { return new HiveTableFunctionScan(getCluster(), traitSet, inputs, rexCall, - elementType, rowType, columnMappings); + elementType, rowType, columnMappings, outer); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 695196502ed1..f3e47f010646 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -703,14 +703,21 @@ private static QueryBlockInfo createASTLateralView(TableFunctionScan tfs, QueryB sel.add(selexpr.node()); // place the SELECT clause under the LATERAL VIEW clause - ASTBuilder lateralview = ASTBuilder.construct(HiveParser.TOK_LATERAL_VIEW, "TOK_LATERAL_VIEW"); - lateralview.add(sel.node()); + final boolean isOuterLateralView = tfs instanceof HiveTableFunctionScan htfs && htfs.isOuter(); + final int lateralViewToken = isOuterLateralView + ? HiveParser.TOK_LATERAL_VIEW_OUTER + : HiveParser.TOK_LATERAL_VIEW; + final String lateralViewText = isOuterLateralView + ? "TOK_LATERAL_VIEW_OUTER" + : "TOK_LATERAL_VIEW"; + ASTBuilder lateralView = ASTBuilder.construct(lateralViewToken, lateralViewText); + lateralView.add(sel.node()); // finally, add the LATERAL VIEW clause under the left side source which is the base table. - lateralview.add(tableFunctionSource.ast); + lateralView.add(tableFunctionSource.ast); Schema outputSchema = new Schema(tableFunctionSource.schema, new Schema(alias, lvFields)); - return new QueryBlockInfo(outputSchema, lateralview.node()); + return new QueryBlockInfo(outputSchema, lateralView.node()); } private boolean isLateralView(RelNode relNode) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index b582c62997e9..2098f29a7a63 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -214,6 +215,12 @@ public ExprNodeDesc visitCall(RexCall call) { && SqlTypeUtil.equalSansNullability(dTFactory, call.getType(), call.operands.get(0).getType())) { return args.get(0); + } else if (call.isA(SqlKind.AND)) { + // Make sure AND is flattened (we may have nested ANDs due to SearchTransformer conversion above) + return ExprNodeDescUtils.and(args); + } else if (call.isA(SqlKind.OR)) { + // Make sure OR is flattened (we may have nested ORs due to SearchTransformer conversion above) + return ExprNodeDescUtils.or(args); } else { GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), args.size()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java index 8ba2c51e8506..dea8f4a3b6c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java @@ -183,13 +183,12 @@ public static SemanticNodeProcessor getColumnProcessor() { return new ColumnExprProcessor(); } - private static boolean findSourceColumn( + private static boolean findSourceColumn(Operator inpOp, LineageCtx lctx, Predicate cond, String tabAlias, String alias) { for (Map.Entry topOpMap: lctx.getParseCtx().getTopOps().entrySet()) { TableScanOperator tableScanOp = topOpMap.getValue(); Table tbl = tableScanOp.getConf().getTableMetadata(); - if (tbl.getTableName().equals(tabAlias) - || tabAlias.equals(tableScanOp.getConf().getAlias())) { + if (isMatchingTableScan(inpOp, tabAlias, tableScanOp, tbl)) { for (FieldSchema column: tbl.getCols()) { if (column.getName().equals(alias)) { TableAliasInfo table = new TableAliasInfo(); @@ -208,6 +207,16 @@ private static boolean findSourceColumn( return false; } + private static boolean isMatchingTableScan(Operator inpOp, String tabAlias, + TableScanOperator tableScanOp, Table tbl) { + boolean operatorIdMatches = inpOp.getOperatorId().equals(tableScanOp.getOperatorId()); + + boolean tableNameMatches = tbl.getTableName().equals(tabAlias); + boolean aliasMatches = tabAlias.equals(tableScanOp.getConf().getAlias()); + + return operatorIdMatches && (tableNameMatches || aliasMatches); + } + /** * Get the expression string of an expression node. */ @@ -241,7 +250,7 @@ public static String getExprString(RowSchema rs, ExprNodeDesc expr, } if (tabAlias != null && tabAlias.length() > 0 && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) { - if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) { + if (cond != null && !findSourceColumn(inpOp, lctx, cond, tabAlias, alias) && dep != null) { cond.getBaseCols().addAll(dep.getBaseCols()); } return tabAlias + "." + alias; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java index 91340b1b76ef..11f02e295b8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java @@ -25,6 +25,7 @@ import java.util.Properties; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.ddl.DDLUtils; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -73,6 +75,9 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv throw new HiveException("Internal error : Partition Spec size, " + partSpec.size() + " doesn't match partition key definition size, " + partKeyTypes.length); } + String defaultPartitionName = HiveConf.getVar(SessionState.getSessionConf(), + HiveConf.ConfVars.DEFAULT_PARTITION_NAME); + // Create the row object List partNames = new ArrayList<>(); List partValues = new ArrayList<>(); @@ -82,9 +87,15 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv partNames.add(entry.getKey()); ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i++])); - partValues.add(ObjectInspectorConverters.getConverter( - PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) - .convert(entry.getValue())); + + String partitionValue = entry.getValue(); + if (partitionValue.equals(defaultPartitionName)) { + partValues.add(null); // Null for default partition. + } else { + partValues.add(ObjectInspectorConverters.getConverter( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) + .convert(partitionValue)); + } partObjectInspectors.add(oi); } StructObjectInspector partObjectInspector = ObjectInspectorFactory @@ -104,7 +115,7 @@ public static Pair prepareExpr( ExprNodeDesc expr, List partColumnNames, List partColumnTypeInfos) throws HiveException { // Create the row object - List partObjectInspectors = new ArrayList(); + List partObjectInspectors = new ArrayList<>(); for (int i = 0; i < partColumnNames.size(); i++) { partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( partColumnTypeInfos.get(i))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 7574ad5f6d24..8839256f3093 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -563,7 +563,7 @@ public static boolean prunePartitionNames(List partColumnNames, Warehouse.makeValsFromName(partName, values); List convertedValues = new ArrayList<>(values.size()); - for(int i=0; i(); - int rhssize = rightAliases.size(); + List scopedLeftAliases = new ArrayList<>(); + List scopedRightAliases = new ArrayList<>(); + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), - leftAliases, rightAliases, null, aliasToOpInfo); - String rhsAlias = null; + scopedLeftAliases, scopedRightAliases, null, aliasToOpInfo); - if (rightAliases.size() > rhssize) { // the new table is rhs table - rhsAlias = rightAliases.get(rightAliases.size() - 1); - } + String rhsAlias = scopedRightAliases.isEmpty() ? null : scopedRightAliases.get(0); + scopedLeftAliases.forEach(alias -> { + if (!leftAliases.contains(alias)) { + leftAliases.add(alias); + } + }); + scopedRightAliases.forEach(alias -> { + if (!rightAliases.contains(alias)) { + rightAliases.add(alias); + } + }); parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), leftAliases, rightAliases, fields1, aliasToOpInfo); @@ -11124,7 +11129,7 @@ boolean isCBOExecuted() { return false; } - boolean isCBOSupportedLateralView(ASTNode lateralView) { + boolean isCBOSupportedLateralView() { return false; } @@ -15527,4 +15532,14 @@ public void startAnalysis() { queryState.createHMSCache(); } } + + /** + * Utility method to determine if an AST node represents a lateral view or lateral view outer. + * @param node AST node + * @return true if node is of lateral view or lateral view outer; false otherwise. + */ + public static boolean isASTNodeLateralView(ASTNode node) { + return node.getToken().getType() == HiveParser.TOK_LATERAL_VIEW + || node.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java index 2472ad44ad00..7fb5269ec25c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java @@ -88,6 +88,19 @@ public String outputFormat() { return outputFormat; } } + + public static String resolveStorageHandlerClassName(String formatType) throws SemanticException { + if (StringUtils.isBlank(formatType)) { + throw new SemanticException("Format type cannot be empty"); + } + for (StorageHandlerTypes type : StorageHandlerTypes.NON_DEFAULT_TYPES) { + if (type.name().equalsIgnoreCase(formatType.trim())) { + Objects.requireNonNull(type.className()); + return ensureClassExists(BaseSemanticAnalyzer.unescapeSQLString(type.className())); + } + } + return ensureClassExists(BaseSemanticAnalyzer.unescapeSQLString(formatType.trim())); + } public StorageFormat(Configuration conf) { this.conf = conf; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/relnodegen/LateralViewPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/relnodegen/LateralViewPlan.java index 1e206405eaae..2c585ce6262d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/relnodegen/LateralViewPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/relnodegen/LateralViewPlan.java @@ -28,7 +28,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; @@ -91,14 +90,15 @@ public class LateralViewPlan { public LateralViewPlan(ASTNode lateralView, RelOptCluster cluster, RelNode inputRel, RowResolver inputRR, UnparseTranslator unparseTranslator, - HiveConf conf, FunctionHelper functionHelper - ) throws SemanticException { + HiveConf conf, FunctionHelper functionHelper) throws SemanticException { // initialize global variables containing helper information this.cluster = cluster; this.unparseTranslator = unparseTranslator; this.conf = conf; this.functionHelper = functionHelper; + boolean isOuter = lateralView.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER; + // AST should have form of LATERAL_VIEW -> SELECT -> SELEXPR -> FUNCTION -> function info tree ASTNode selExprAST = (ASTNode) lateralView.getChild(0).getChild(0); ASTNode functionAST = (ASTNode) selExprAST.getChild(0); @@ -118,7 +118,7 @@ public LateralViewPlan(ASTNode lateralView, RelOptCluster cluster, RelNode input this.lateralViewRel = HiveTableFunctionScan.create(cluster, TraitsUtil.getDefaultTraitSet(cluster), ImmutableList.of(inputRel), udtfCall, - null, retType, createColumnMappings(inputRel)); + null, retType, createColumnMappings(inputRel), isOuter); } public static void validateLateralView(ASTNode lateralView) throws SemanticException { @@ -127,9 +127,9 @@ public static void validateLateralView(ASTNode lateralView) throws SemanticExcep " children."); } ASTNode next = (ASTNode) lateralView.getChild(1); - if (!TABLE_ALIAS_TOKEN_TYPES.contains(next.getToken().getType()) && - HiveParser.TOK_LATERAL_VIEW != next.getToken().getType()) { - throw new SemanticException(ASTErrorUtils.getMsg( + if (!TABLE_ALIAS_TOKEN_TYPES.contains(next.getToken().getType()) && + !SemanticAnalyzer.isASTNodeLateralView(next)) { + throw new SemanticException(ASTErrorUtils.getMsg( ErrorMsg.LATERAL_VIEW_INVALID_CHILD.getMsg(), lateralView)); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 541ce20f5180..34d5f0ba0b38 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -64,6 +64,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Predicate; public class ExprNodeDescUtils { @@ -243,6 +244,21 @@ public static ExprNodeGenericFuncDesc and(List exps) { return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), "and", flatExps); } + /** + * Creates a disjunction (OR) of the given expressions flattening nested disjunctions if possible. + *
+   * Input: AND(A, B), C, OR(D, OR(E, F))
+   * Output: OR(AND(A, B), C, D, E, F)
+   * 
+ */ + public static ExprNodeGenericFuncDesc or(List exps) { + List flatExps = new ArrayList<>(); + for (ExprNodeDesc e : exps) { + split(e, flatExps, FunctionRegistry::isOpOr); + } + return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPOr(), "or", flatExps); + } + /** * Create an expression for computing a murmur hash by recursively hashing given expressions by two: *
@@ -305,9 +321,17 @@ public static List split(ExprNodeDesc current) {
    * split predicates by AND op
    */
   public static List split(ExprNodeDesc current, List splitted) {
-    if (FunctionRegistry.isOpAnd(current)) {
+    return split(current, splitted, FunctionRegistry::isOpAnd);
+  }
+
+  /**
+   * split predicates by a certain condition
+   */
+  private static List split(ExprNodeDesc current, List splitted,
+      Predicate condition) {
+    if (condition.test(current)) {
       for (ExprNodeDesc child : current.getChildren()) {
-        split(child, splitted);
+        split(child, splitted, condition);
       }
       return splitted;
     }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index 0dcfe72d7f5b..25f8afe6d6c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -258,6 +258,13 @@ public String getTableName() {
     return tableName;
   }
 
+  /**
+   * Returns the catalog name for this partition's table.
+   */
+  public String getCatalogName() {
+    return tableDesc.getCatalogName();
+  }
+
   @Explain(displayName = "input format", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
   public String getInputFileFormatClassName() {
     return getInputFileFormatClass().getName();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
index 58ce207c0c6c..b0e6feff7ec1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
@@ -20,6 +20,7 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.StringInternUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
@@ -52,6 +53,8 @@ public class TableDesc implements Serializable, Cloneable {
   public static final String SECRET_PREFIX = "TABLE_SECRET";
   public static final String SECRET_DELIMIT = "#";
 
+  private String catalogName;
+
   public TableDesc() {
   }
 
@@ -59,14 +62,31 @@ public TableDesc() {
    * @param inputFormatClass
    * @param outputFormatClass
    * @param properties must contain serde class name associate with this table.
+   * @param catalogName the catalog this table belongs to; stored as a dedicated field so it does
+   *                    not appear in EXPLAIN output. Pass {@code null} for internal/intermediate
+   *                    descriptors that are not backed by a real user table; {@code null} will be
+   *                    normalized to {@link Warehouse#DEFAULT_CATALOG_NAME}.
    */
   public TableDesc(
       final Class inputFormatClass,
-      final Class outputFormatClass, final Properties properties) {
+      final Class outputFormatClass, final Properties properties,
+      final String catalogName) {
     this.inputFileFormatClass = inputFormatClass;
     outputFileFormatClass = HiveFileFormatUtils
         .getOutputFormatSubstitute(outputFormatClass);
     setProperties(properties);
+    this.catalogName = catalogName == null ? Warehouse.DEFAULT_CATALOG_NAME : catalogName;
+  }
+
+  /**
+   * @param inputFormatClass
+   * @param outputFormatClass
+   * @param properties must contain serde class name associate with this table.
+   */
+  public TableDesc(
+      final Class inputFormatClass,
+      final Class outputFormatClass, final Properties properties) {
+    this(inputFormatClass, outputFormatClass, properties, null);
   }
 
   public Class getSerDeClass() {
@@ -199,6 +219,14 @@ public int getBucketingVersion() {
         properties.getProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION));
   }
 
+  public String getCatalogName() {
+    return catalogName;
+  }
+
+  public void setCatalogName(String catalogName) {
+    this.catalogName = catalogName == null ? Warehouse.DEFAULT_CATALOG_NAME : catalogName;
+  }
+
   @Override
   public Object clone() {
     TableDesc ret = new TableDesc();
@@ -215,6 +243,7 @@ public Object clone() {
     if (jobProperties != null) {
       ret.jobProperties = new LinkedHashMap(jobProperties);
     }
+    ret.catalogName = catalogName == null ? Warehouse.DEFAULT_CATALOG_NAME : catalogName;
     return ret;
   }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
index ec0dc52a7321..3fa2ba1fd652 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/command/CommandAuthorizerV2.java
@@ -19,13 +19,13 @@
 package org.apache.hadoop.hive.ql.security.authorization.command;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.Map.Entry;
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.DataConnector;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
@@ -105,6 +105,9 @@ private static List getHivePrivObjects(List baseTablesViaRegularView = buildBaseTablesViaRegularView(privObjects);
+
     for (Entity privObject : privObjects) {
       if (privObject.isDummy()) {
         //do not authorize dummy readEntity or writeEntity
@@ -154,12 +157,35 @@ private static List getHivePrivObjects(List buildBaseTablesViaRegularView(List entities) {
+    Set result = new HashSet<>();
+    for (Entity entity : entities) {
+      if (!(entity instanceof ReadEntity) || entity.getTyp() != Type.TABLE) {
+        continue;
+      }
+      ReadEntity re = (ReadEntity) entity;
+      Table t = re.getTable();
+      if (!re.isDirect() && t != null && !hasDeferredViewParent(re) && hasRegularViewParent(re)) {
+        result.add(t.getDbName() + "." + t.getTableName());
+      }
+    }
+    return result;
+  }
+
   /**
    * A deferred authorization view is view created by non-super user like spark-user. This view contains a parameter "Authorized"
    * set to false, so ranger will not authorize it during view creation. When a select statement is issued, then the ranger authorizes
@@ -168,13 +194,8 @@ private static List getHivePrivObjects(List params = t.getParameters();
       if (params != null && params.containsKey(authorizedKeyword)) {
         String authorizedValue = params.get(authorizedKeyword);
@@ -186,6 +207,59 @@ private static boolean isDeferredAuthView(Table t){
     return false;
   }
 
+  /**
+   * Returns true when a PARTITION entity should not produce its own privilege object
+   * because access is already covered by a view's TABLE_OR_VIEW object.
+   */
+  private static boolean isPartitionAccessedViaRegularView(Entity entity,
+      Set baseTablesViaRegularView) {
+    if (!(entity instanceof ReadEntity)
+        || (entity.getTyp() != Type.PARTITION && entity.getTyp() != Type.DUMMYPARTITION)) {
+      return false;
+    }
+    ReadEntity re = (ReadEntity) entity;
+    // Deferred-auth views must still authorize the underlying base table.
+    if (hasDeferredViewParent(re)) {
+      return false;
+    }
+
+    if (hasRegularViewParent(re)) {
+      return true;
+    }
+    Table partTable = re.getTable();
+    return partTable != null
+        && baseTablesViaRegularView.contains(partTable.getDbName() + "." + partTable.getTableName());
+  }
+
+  private static boolean hasDeferredViewParent(ReadEntity entity) {
+    Set parents = entity.getParents();
+    if (parents == null || parents.isEmpty()) {
+      return false;
+    }
+    for (ReadEntity parent : parents) {
+      if (parent.getTyp() == Type.TABLE && parent.getTable() != null
+          && isDeferredAuthView(parent.getTable())) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static boolean hasRegularViewParent(ReadEntity entity) {
+    Set parents = entity.getParents();
+    if (parents == null || parents.isEmpty()) {
+      return false;
+    }
+    for (ReadEntity parent : parents) {
+      if (parent.getTyp() == Type.TABLE && parent.getTable() != null
+          && (parent.getTable().isView() || parent.getTable().isMaterializedView())
+          && !isDeferredAuthView(parent.getTable())) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   private static void addHivePrivObject(Entity privObject, Map> tableName2Cols,
       List hivePrivObjs, HiveOperationType hiveOpType) throws HiveException {
     HivePrivilegeObjectType privObjType = AuthorizationUtils.getHivePrivilegeObjectType(privObject.getType());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
index 197fa1e76bd8..b8c6bb602451 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactoryImpl.java
@@ -40,6 +40,10 @@ public HiveMetastoreClientFactoryImpl(HiveConf conf) {
   public IMetaStoreClient getHiveMetastoreClient() throws HiveAuthzPluginException {
     String errMsg = "Error getting metastore client";
     try {
+      Hive db = Hive.getThreadLocal();
+      if (db != null) {
+        return db.getMSC();
+      }
       return Hive.get(hiveConf, false).getMSC();
     } catch (MetaException e) {
       throw new HiveAuthzPluginException(errMsg, e);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 55f9d0c1e158..e539f0ab9c4a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -239,22 +239,33 @@ public static long getNumRows(HiveConf conf, List schema, Table tabl
     return aggregateStat.getNumRows();
   }
 
-  private static void estimateStatsForMissingCols(List neededColumns, List columnStats,
-      HiveConf conf, long nr, List schema) {
+  /**
+   * Estimates column statistics for columns specified in {@code neededColumnNames}
+   * that do not already have statistics in the {@code existingColStats} list.
+   *
+   * @return A {@link List} of {@link ColStatistics} objects containing
+   * both the provided existing statistics and the newly estimated ones.
+   */
+  static List estimateStatsForMissingCols(
+      List neededColumnNames, List existingColStats, HiveConf conf, long nr,
+      List schema) {
 
-    Set neededCols = new HashSet<>(neededColumns);
-    Set colsWithStats = new HashSet<>();
+    Set neededCols = new HashSet<>(neededColumnNames);
+    Set columnNamesWithStats = HashSet.newHashSet(existingColStats.size());
 
-    for (ColStatistics cstats : columnStats) {
-      colsWithStats.add(cstats.getColumnName());
+    for (ColStatistics cstats : existingColStats) {
+      columnNamesWithStats.add(cstats.getColumnName());
     }
 
-    List missingColStats = new ArrayList<>(Sets.difference(neededCols, colsWithStats));
+    List missingColumnNames = new ArrayList<>(Sets.difference(neededCols, columnNamesWithStats));
+    ArrayList combined = new ArrayList<>(existingColStats.size() + missingColumnNames.size());
+    combined.addAll(existingColStats);
 
-    if (!missingColStats.isEmpty()) {
-      columnStats.addAll(
-          estimateStats(schema, missingColStats, conf, nr));
+    if (!missingColumnNames.isEmpty()) {
+      combined.addAll(estimateStats(schema, missingColumnNames, conf, nr));
     }
+
+    return combined;
   }
 
   public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
@@ -300,7 +311,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p
       if (needColStats && !metaTable) {
         colStats = getTableColumnStats(table, neededColumns, colStatsCache, fetchColStats);
         if (estimateStats) {
-          estimateStatsForMissingCols(neededColumns, colStats, conf, nr, schema);
+          colStats = estimateStatsForMissingCols(neededColumns, colStats, conf, nr, schema);
         }
         // we should have stats for all columns (estimated or actual)
         if (neededColumns.size() == colStats.size()) {
@@ -386,7 +397,7 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p
         boolean statsRetrieved = aggrStats != null &&
             aggrStats.getColStats() != null && aggrStats.getColStatsSize() != 0;
         if (neededColumns.isEmpty() || (!neededColsToRetrieve.isEmpty() && !statsRetrieved)) {
-          estimateStatsForMissingCols(neededColsToRetrieve, columnStats, conf, nr, schema);
+          columnStats = estimateStatsForMissingCols(neededColsToRetrieve, columnStats, conf, nr, schema);
           // There are some partitions with no state (or we didn't fetch any state).
           // Update the stats with empty list to reflect that in the
           // state/initialize structures.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
index 5dc40b2ba9fc..1e5fe45f682f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
@@ -213,8 +213,10 @@ public void init(AtomicBoolean stop) throws Exception {
     checkInterval = conf.getTimeVar(HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
     metricsEnabled = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METRICS_ENABLED) &&
         MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON);
+    boolean isSupportAcid = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID);
     optimizers = Arrays.stream(MetastoreConf.getTrimmedStringsVar(conf,
             MetastoreConf.ConfVars.COMPACTOR_INITIATOR_TABLE_OPTIMIZERS))
+        .filter(e -> isSupportAcid || !e.equalsIgnoreCase(MetastoreConf.ACID_TABLE_OPTIMIZER_CLASS))
         .map(this::instantiateTableOptimizer).toList();
   }
   
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/service/AcidCompactionService.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/service/AcidCompactionService.java
index d1c7e3972d03..f513d289fa92 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/service/AcidCompactionService.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/service/AcidCompactionService.java
@@ -48,6 +48,7 @@
 import org.apache.hadoop.hive.ql.txn.compactor.CompactorFactory;
 import org.apache.hadoop.hive.ql.txn.compactor.CompactorPipeline;
 import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil;
+import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil.ThrowingRunnable;
 import org.apache.hadoop.hive.ql.txn.compactor.QueryCompactor;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hive.common.util.Ref;
@@ -125,57 +126,56 @@ public void cleanupResultDirs(CompactionInfo ci) {
   
   public Boolean compact(Table table, CompactionInfo ci) throws Exception {
 
-    try (CompactionTxn compactionTxn = new CompactionTxn()) {
-
-      if (ci.isRebalanceCompaction() && table.getSd().getNumBuckets() > 0) {
-        LOG.error("Cannot execute rebalancing compaction on bucketed tables.");
-        ci.errorMessage = "Cannot execute rebalancing compaction on bucketed tables.";
-        msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
-        return false;
-      }
+    if (ci.isRebalanceCompaction() && table.getSd().getNumBuckets() > 0) {
+      LOG.error("Cannot execute rebalancing compaction on bucketed tables.");
+      ci.errorMessage = "Cannot execute rebalancing compaction on bucketed tables.";
+      msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
+      return false;
+    }
 
-      if (!ci.type.equals(CompactionType.REBALANCE) && ci.numberOfBuckets > 0) {
-        if (LOG.isWarnEnabled()) {
-          LOG.warn("Only the REBALANCE compaction accepts the number of buckets clause (CLUSTERED INTO {N} BUCKETS). " +
-              "Since the compaction request is {}, it will be ignored.", ci.type);
-        }
+    if (!ci.type.equals(CompactionType.REBALANCE) && ci.numberOfBuckets > 0) {
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Only the REBALANCE compaction accepts the number of buckets clause (CLUSTERED INTO {N} BUCKETS). " +
+            "Since the compaction request is {}, it will be ignored.", ci.type);
       }
+    }
 
-      String fullTableName = TxnUtils.getFullTableName(table.getDbName(), table.getTableName());
+    String fullTableName = TxnUtils.getFullTableName(table.getDbName(), table.getTableName());
 
-      // Find the partition we will be working with, if there is one.
-      Partition p;
-      try {
-        p = CompactorUtil.resolvePartition(conf, msc, ci.dbname, ci.tableName, ci.partName,
-            CompactorUtil.METADATA_FETCH_MODE.REMOTE);
-        if (p == null && ci.partName != null) {
-          ci.errorMessage = "Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped and moving on.";
-          LOG.warn(ci.errorMessage + " Compaction info: {}", ci);
-          msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
-          return false;
-        }
-      } catch (Exception e) {
-        LOG.error("Unexpected error during resolving partition.", e);
-        ci.errorMessage = e.getMessage();
-        msc.markFailed(CompactionInfo.compactionInfoToStruct(ci));
+    // Find the partition we will be working with, if there is one.
+    Partition p;
+    try {
+      p = CompactorUtil.resolvePartition(conf, msc, ci.dbname, ci.tableName, ci.partName,
+          CompactorUtil.METADATA_FETCH_MODE.REMOTE);
+      if (p == null && ci.partName != null) {
+        ci.errorMessage = "Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped and moving on.";
+        LOG.warn(ci.errorMessage + " Compaction info: {}", ci);
+        msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
         return false;
       }
+    } catch (Exception e) {
+      LOG.error("Unexpected error during resolving partition.", e);
+      ci.errorMessage = e.getMessage();
+      msc.markFailed(CompactionInfo.compactionInfoToStruct(ci));
+      return false;
+    }
 
-      CompactorUtil.checkInterrupt(CLASS_NAME);
+    CompactorUtil.checkInterrupt(CLASS_NAME);
 
-      // Find the appropriate storage descriptor
-      sd =  CompactorUtil.resolveStorageDescriptor(table, p);
+    // Find the appropriate storage descriptor
+    sd =  CompactorUtil.resolveStorageDescriptor(table, p);
 
-      if (isTableSorted(sd, ci)) {
-        return false;
-      }
+    if (isTableSorted(sd, ci)) {
+      return false;
+    }
 
-      if (ci.runAs == null) {
-        ci.runAs = TxnUtils.findUserToRunAs(sd.getLocation(), table, conf);
-      }
+    if (ci.runAs == null) {
+      ci.runAs = TxnUtils.findUserToRunAs(sd.getLocation(), table, conf);
+    }
 
-      CompactorUtil.checkInterrupt(CLASS_NAME);
+    CompactorUtil.checkInterrupt(CLASS_NAME);
 
+    try (CompactionTxn compactionTxn = new CompactionTxn()) {
       /*
        * we cannot have Worker use HiveTxnManager (which is on ThreadLocal) since
        * then the Driver would already have the an open txn but then this txn would have
@@ -211,32 +211,30 @@ public Boolean compact(Table table, CompactionInfo ci) throws Exception {
 
       // Don't start compaction or cleaning if not necessary
       if (isDynPartAbort(table, ci)) {
-        msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci));
-        compactionTxn.wasSuccessful();
+        compactionTxn.commit(() ->
+            msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci)));
         return false;
       }
       dir = getAcidStateForWorker(ci, sd, tblValidWriteIds);
       if (!isEnoughToCompact(ci, dir, sd)) {
         if (needsCleaning(dir, sd)) {
-          msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci));
+          compactionTxn.commit(() ->
+              msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci)));
         } else {
           // do nothing
           ci.errorMessage = "None of the compaction thresholds met, compaction request is refused!";
           LOG.debug(ci.errorMessage + " Compaction info: {}", ci);
-          msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
+          compactionTxn.commit(() ->
+              msc.markRefused(CompactionInfo.compactionInfoToStruct(ci)));
         }
-        compactionTxn.wasSuccessful();
         return false;
       }
       if (!ci.isMajorCompaction() && !CompactorUtil.isMinorCompactionSupported(conf, table.getParameters(), dir)) {
         ci.errorMessage = "Query based Minor compaction is not possible for full acid tables having raw format " +
             "(non-acid) data in them.";
         LOG.error(ci.errorMessage + " Compaction info: {}", ci);
-        try {
-          msc.markRefused(CompactionInfo.compactionInfoToStruct(ci));
-        } catch (Throwable tr) {
-          LOG.error("Caught an exception while trying to mark compaction {} as failed: {}", ci, tr);
-        }
+        compactionTxn.commit(() ->
+            msc.markRefused(CompactionInfo.compactionInfoToStruct(ci)));
         return false;
       }
       CompactorUtil.checkInterrupt(CLASS_NAME);
@@ -244,12 +242,12 @@ public Boolean compact(Table table, CompactionInfo ci) throws Exception {
       try {
         failCompactionIfSetForTest();
 
-      /*
-      First try to run compaction via HiveQL queries.
-      Compaction for MM tables happens here, or run compaction for Crud tables if query-based compaction is enabled.
-      todo Find a more generic approach to collecting files in the same logical bucket to compact within the same
-      task (currently we're using Tez split grouping).
-      */
+        /*
+        First try to run compaction via HiveQL queries.
+        Compaction for MM tables happens here, or run compaction for Crud tables if query-based compaction is enabled.
+        todo Find a more generic approach to collecting files in the same logical bucket to compact within the same
+        task (currently we're using Tez split grouping).
+        */
         CompactorPipeline compactorPipeline = compactorFactory.getCompactorPipeline(table, conf, ci, msc);
         computeStats = (compactorPipeline.isMRCompaction() && collectMrStats) || collectGenericStats;
 
@@ -261,8 +259,8 @@ public Boolean compact(Table table, CompactionInfo ci) throws Exception {
 
         LOG.info("Completed " + ci.type.toString() + " compaction for " + ci.getFullPartitionName() + " in "
             + compactionTxn + ", marking as compacted.");
-        msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci));
-        compactionTxn.wasSuccessful();
+        compactionTxn.commit(() ->
+            msc.markCompacted(CompactionInfo.compactionInfoToStruct(ci)));
 
         AcidMetricService.updateMetricsFromWorker(ci.dbname, ci.tableName, ci.partName, ci.type,
             dir.getCurrentDirectories().size(), dir.getDeleteDeltas().size(), conf, msc);
@@ -272,9 +270,6 @@ public Boolean compact(Table table, CompactionInfo ci) throws Exception {
       }
 
       return true;
-    } catch (Exception e) {
-      LOG.error("Caught exception in " + CLASS_NAME + " while trying to compact " + ci, e);
-      throw e;
     }
   }
 
@@ -346,7 +341,8 @@ class CompactionTxn implements AutoCloseable {
     private long lockId = 0;
 
     private TxnStatus status = TxnStatus.UNKNOWN;
-    private boolean successfulCompaction = false;
+    private boolean rollbackOnly = true;
+    private ThrowingRunnable onCommitSuccess;
 
     /**
      * Try to open a new txn.
@@ -377,11 +373,9 @@ private LockRequest createLockRequest(CompactionInfo ci) {
       return CompactorUtil.createLockRequest(conf, ci, txnId, lockAndOpType.getKey(), lockAndOpType.getValue());
     }
 
-    /**
-     * Mark compaction as successful. This means the txn will be committed; otherwise it will be aborted.
-     */
-    void wasSuccessful() {
-      this.successfulCompaction = true;
+    void commit(ThrowingRunnable postAction) {
+      this.rollbackOnly = false;
+      this.onCommitSuccess = postAction;
     }
 
     /**
@@ -396,8 +390,11 @@ public void close() throws Exception {
         //the transaction is about to close, we can stop heartbeating regardless of it's state
         CompactionHeartbeatService.getInstance(conf).stopHeartbeat(txnId);
       } finally {
-        if (successfulCompaction) {
+        if (!rollbackOnly) {
           commit();
+          if (onCommitSuccess != null) {
+            onCommitSuccess.run();
+          }
         } else {
           abort();
         }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
index b455e91b452e..12becaa8e7cb 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
@@ -83,7 +83,8 @@ public static String likePatternToRegExp(String likePattern, boolean literalize,
 
       if (n == '_') {
         sb.append(".");
-      } else if (n == '%') {
+      } else if (n == '%' || (n == '*' && !literalize)) {
+        // Both % and * (when not literalized) are treated as standard regex .* 
         sb.append(greedyMatch ? ".*" : ".*?");
       } else {
         sb.append(literalize ? Pattern.quote(Character.toString(n)) : n);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
index a6a9f568e497..820d14b7ade8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
@@ -42,32 +42,57 @@
 public class UDFUnhex extends UDF {
 
   /**
-   * Convert every two hex digits in s into.
-   *
+   * Convert every two hex digits in s into a byte.
    */
   public byte[] evaluate(Text s) {
     if (s == null) {
       return null;
     }
 
-    // append a leading 0 if needed
-    String str;
-    if (s.getLength() % 2 == 1) {
-      str = "0" + s.toString();
-    } else {
-      str = s.toString();
+    int len = s.getLength();
+    if (len == 0) {
+      return new byte[0];
     }
 
-    byte[] result = new byte[str.length() / 2];
-    for (int i = 0; i < str.length(); i += 2) {
-      try {
-        result[i / 2] = ((byte) Integer.parseInt(str.substring(i, i + 2), 16));
-      } catch (NumberFormatException e) {
-        // invalid character present, return null
+    byte[] textBytes = s.getBytes();
+
+    // (len + 1) / 2 ensures right size for odd lengths
+    byte[] result = new byte[(len + 1) / 2];
+
+    int i = 0;
+    int resIdx = 0;
+
+    // If length is odd, the first character acts as the first byte avoiding adding "0" prefix
+    if (len % 2 != 0) {
+      int val = decodeHexChar(textBytes[i++]);
+      if (val == -1) {
+        return null;
+      }
+      result[resIdx++] = (byte) val;
+    }
+
+    while (i < len) {
+      int high, low;
+      if ((high = decodeHexChar(textBytes[i++])) == -1 ||
+          (low = decodeHexChar(textBytes[i++])) == -1) {
         return null;
       }
+      result[resIdx++] = (byte) ((high << 4) | low);
     }
 
     return result;
   }
+
+  private int decodeHexChar(byte b) {
+    if (b >= '0' && b <= '9') {
+      return b - '0';
+    }
+    if (b >= 'a' && b <= 'f') {
+      return b - 'a' + 10;
+    }
+    if (b >= 'A' && b <= 'F') {
+      return b - 'A' + 10;
+    }
+    return -1;
+  }
 }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index 01dd3d51f5fa..f547b9366632 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -118,7 +118,7 @@ protected String getTestDataDir() {
   }
 
   @Override
-  void initHiveConf() {
+  protected void initHiveConf() {
     super.initHiveConf();
     //TestTxnCommandsWithSplitUpdateAndVectorization has the vectorized version
     //of these tests.
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index a74d3170b6a2..cd135d93130d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -155,7 +155,7 @@ String getPartitionColumns() {
   public ExpectedException expectedException = ExpectedException.none();
   
   @Override
-  void initHiveConf() {
+  protected void initHiveConf() {
     super.initHiveConf();
     //TestTxnCommands2WithSplitUpdateAndVectorization has the vectorized version
     //of these tests.
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithAbortCleanupUsingCompactionCycle.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithAbortCleanupUsingCompactionCycle.java
index 628c0f979aaa..f653b9ff2635 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithAbortCleanupUsingCompactionCycle.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithAbortCleanupUsingCompactionCycle.java
@@ -30,7 +30,7 @@ public TestTxnCommands2WithAbortCleanupUsingCompactionCycle() {
   }
 
   @Override
-  void initHiveConf() {
+  protected void initHiveConf() {
     super.initHiveConf();
     MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.COMPACTOR_CLEAN_ABORTS_USING_CLEANER, false);
   }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java
index 55cf77b99d31..ce1c29e0a82b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java
@@ -34,7 +34,7 @@ public TestTxnCommands2WithSplitUpdateAndVectorization() {
   }
 
   @Override
-  void initHiveConf() {
+  protected void initHiveConf() {
     super.initHiveConf();
     hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
   }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java
index 5b243d2022b1..ad821ba74f68 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java
@@ -74,7 +74,7 @@ public String toString() {
   }
 
   @Override
-  void initHiveConf() {
+  protected void initHiveConf() {
     super.initHiveConf();
     HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVE_ACID_TRUNCATE_USE_BASE, false);
   }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsWithSplitUpdateAndVectorization.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsWithSplitUpdateAndVectorization.java
index a0132300257a..e05042794ef4 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsWithSplitUpdateAndVectorization.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsWithSplitUpdateAndVectorization.java
@@ -29,7 +29,7 @@ public TestTxnCommandsWithSplitUpdateAndVectorization() {
   }
 
   @Override
-  void initHiveConf() {
+  protected void initHiveConf() {
     super.initHiveConf();
     hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
   }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
index ba093d0ac963..097f5751c646 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
@@ -116,7 +116,7 @@ public void setUp() throws Exception {
       HiveMetaStoreClientWithLocalCache.init(hiveConf);
     }
   }
-  void initHiveConf() {
+  protected void initHiveConf() {
     hiveConf = new HiveConfForTest(this.getClass());
     // Multiple tests requires more than one buckets per write. Use a very small value for grouping size to create
     // multiple mapper instances with FileSinkOperators. The number of buckets are depends on the size of the data
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/ddl/function/drop/TestDropFunctionAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/ddl/function/drop/TestDropFunctionAnalyzer.java
new file mode 100644
index 000000000000..d3ae91b76417
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/ddl/function/drop/TestDropFunctionAnalyzer.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.function.drop;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.conf.HiveConfForTest;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.FunctionType;
+import org.apache.hadoop.hive.metastore.api.PrincipalType;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.MockedStatic;
+
+/**
+ * Tests for DropFunctionAnalyzer focusing on the case where the function's JAR resource is
+ * unavailable (e.g. deleted from HDFS after the function was registered).
+ */
+class TestDropFunctionAnalyzer {
+
+  private HiveConf conf;
+
+  @BeforeEach
+  void setUp() {
+    conf = new HiveConfForTest(getClass());
+    SessionState.start(conf);
+  }
+
+  @AfterEach
+  void tearDown() throws Exception {
+    SessionState ss = SessionState.get();
+    if (ss != null) {
+      ss.close();
+    }
+  }
+
+  private DropFunctionAnalyzer createAnalyzer(Hive mockDb) throws SemanticException {
+    QueryState queryState = QueryState.getNewQueryState(conf, null);
+    queryState.setCommandType(HiveOperation.DROPFUNCTION);
+    return new DropFunctionAnalyzer(queryState, mockDb);
+  }
+
+  /**
+   * When FunctionRegistry.getFunctionInfo returns null (JAR unavailable) but the function still
+   * exists in the metastore, DROP FUNCTION must proceed and emit a drop task so the orphaned
+   * definition is actually removed.
+   */
+  @Test
+  void testDropSucceedsWhenJarUnavailableButFunctionInMetastore() throws Exception {
+    Hive mockDb = mock(Hive.class);
+    Database msDatabase = new Database("default", "", "/tmp", Collections.emptyMap());
+    Function msFunction = new Function("dummy", "default", "com.example.DummyUDF",
+        "user", PrincipalType.USER, 0, FunctionType.JAVA, Collections.emptyList());
+
+    when(mockDb.getFunctions("default", "dummy")).thenReturn(List.of("dummy"));
+    when(mockDb.getFunction("default", "dummy")).thenReturn(msFunction);
+    when(mockDb.getDatabase("default")).thenReturn(msDatabase);
+    when(mockDb.getDatabase(any(), eq("default"))).thenReturn(msDatabase);
+
+    try (MockedStatic registry = mockStatic(FunctionRegistry.class)) {
+      registry.when(() -> FunctionRegistry.getFunctionInfo("dummy")).thenReturn(null);
+
+      DropFunctionAnalyzer analyzer = createAnalyzer(mockDb);
+      analyzer.analyzeInternal(parse("drop function dummy"));
+
+      assertEquals(1, analyzer.getRootTasks().size(), "Expected one DROP task even when JAR is unavailable");
+    }
+  }
+
+  /**
+   * When the function does not exist in either the session registry or the metastore, DROP FUNCTION
+   * (without IF EXISTS) must surface the error to the client.
+   */
+  @Test
+  void testDropThrowsWhenFunctionNotInMetastore() throws Exception {
+    conf.setBoolVar(ConfVars.DROP_IGNORES_NON_EXISTENT, false);
+    Hive mockDb = mock(Hive.class);
+    when(mockDb.getFunctions(anyString(), anyString())).thenReturn(Collections.emptyList());
+
+    try (MockedStatic registry = mockStatic(FunctionRegistry.class)) {
+      registry.when(() -> FunctionRegistry.getFunctionInfo("dummy")).thenReturn(null);
+
+      DropFunctionAnalyzer analyzer = createAnalyzer(mockDb);
+      assertThrows(SemanticException.class, () -> analyzer.analyzeInternal(parse("drop function dummy")),
+          "Expected SemanticException when function not in registry or metastore");
+    }
+  }
+
+  /**
+   * DROP FUNCTION IF EXISTS must silently succeed (no task, no exception) when the function is
+   * absent from both the session registry and the metastore.
+   */
+  @Test
+  void testDropIfExistsSilentWhenFunctionAbsent() throws Exception {
+    Hive mockDb = mock(Hive.class);
+    when(mockDb.getFunctions(anyString(), anyString())).thenReturn(Collections.emptyList());
+
+    try (MockedStatic registry = mockStatic(FunctionRegistry.class)) {
+      registry.when(() -> FunctionRegistry.getFunctionInfo("dummy")).thenReturn(null);
+
+      DropFunctionAnalyzer analyzer = createAnalyzer(mockDb);
+      analyzer.analyzeInternal(parse("drop function if exists dummy"));
+
+      assertEquals(0, analyzer.getRootTasks().size(), "Expected no tasks when function is absent and IF EXISTS is set");
+    }
+  }
+
+  private ASTNode parse(String sql) throws Exception {
+    return ParseUtils.parse(sql, new Context(conf));
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainDdlAcidTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainDdlAcidTable.java
new file mode 100644
index 000000000000..bc11d5adb586
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainDdlAcidTable.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.File;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.TxnCommandsBaseForTests;
+import org.junit.Test;
+
+public class TestExplainDdlAcidTable extends TxnCommandsBaseForTests {
+
+  private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") +
+      File.separator + TestExplainDdlAcidTable.class.getCanonicalName()
+      + "-" + System.currentTimeMillis()
+  ).getPath().replaceAll("\\\\", "/");
+
+  private static final String EXPLAIN_DDL = "EXPLAIN DDL SELECT * FROM " + Table.ACIDTBL;
+
+  @Override
+  protected void initHiveConf() {
+    super.initHiveConf();
+    HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.HIVE_IN_TEST, false);
+  }
+
+  @Override
+  protected String getTestDataDir() {
+    return TEST_DATA_DIR;
+  }
+
+  @Test
+  public void testExplainDdlAcidTableUnauthorized() throws Exception {
+    runExplainDdl(hiveConf);
+  }
+
+  /**
+   * {@link DDLPlanUtils} must use the query conf.
+   * HIVE-29330 repoints thread-local {@link org.apache.hadoop.hive.ql.metadata.Hive} conf via
+   * {@link org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactoryImpl}
+   */
+  @Test
+  public void testExplainDdlAcidTableAuthorized() throws Exception {
+    HiveConf queryConf = new HiveConf(hiveConf);
+    HiveConf.setBoolVar(queryConf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true);
+    runExplainDdl(queryConf);
+  }
+
+  private void runExplainDdl(HiveConf queryConf) throws Exception {
+    Driver driver = new Driver(new QueryState.Builder().withHiveConf(queryConf).build(), null);
+    driver.setMaxRows(10000);
+    try {
+      driver.run(EXPLAIN_DDL);
+    } finally {
+      driver.close();
+      driver.destroy();
+    }
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
index fa71845ece27..34aa466084a1 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
@@ -180,7 +180,7 @@ public Edge answer(InvocationOnMock invocation) throws Throwable {
     sessionState = mock(TezSessionState.class);
     when(sessionState.getTezClient()).thenReturn(session);
     when(sessionState.reopen()).thenReturn(sessionState);
-    when(session.submitDAG(any(DAG.class)))
+    when(sessionState.submitDAG(any(DAG.class)))
       .thenThrow(new SessionNotRunning(""))
       .thenReturn(mock(DAGClient.class));
   }
@@ -229,7 +229,7 @@ public void testSubmit() throws Exception {
     task.submit(dag, Ref.from(sessionState));
     // validate close/reopen
     verify(sessionState, times(1)).reopen();
-    verify(session, times(2)).submitDAG(any(DAG.class));
+    verify(sessionState, times(2)).submitDAG(any(DAG.class));
   }
 
   @Test
@@ -241,14 +241,14 @@ public void testSubmitOnNonPoolSession() throws Exception {
     TezClient tezClient = mock(TezClient.class);
     when(tezSessionState.reopen()).thenThrow(new HiveException("Dag cannot be submitted"));
     when(tezSessionState.getTezClient()).thenReturn(tezClient);
-    when(tezClient.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning(""));
+    when(tezSessionState.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning(""));
     doNothing().when(tezSessionState).destroy();
     boolean isException = false;
     try {
       task.submit(dag, Ref.from(tezSessionState));
     } catch (Exception e) {
       isException = true;
-      verify(tezClient, times(1)).submitDAG(any(DAG.class));
+      verify(tezSessionState, times(1)).submitDAG(any(DAG.class));
       verify(tezSessionState, times(2)).reopen();
       verify(tezSessionState, times(1)).destroy();
       verify(tezSessionState, times(0)).returnToSessionManager();
@@ -266,13 +266,13 @@ public void testSubmitOnPoolSession() throws Exception {
     doNothing().when(tezSessionPoolSession).returnToSessionManager();
     when(tezSessionPoolSession.getTezClient()).thenReturn(tezClient);
     when(tezSessionPoolSession.isDefault()).thenReturn(true);
-    when(tezClient.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning(""));
+    when(tezSessionPoolSession.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning(""));
     boolean isException = false;
     try {
       task.submit(dag, Ref.from(tezSessionPoolSession));
     } catch (Exception e) {
       isException = true;
-      verify(tezClient, times(1)).submitDAG(any(DAG.class));
+      verify(tezSessionPoolSession, times(1)).submitDAG(any(DAG.class));
       verify(tezSessionPoolSession, times(2)).reopen();
       verify(tezSessionPoolSession, times(0)).destroy();
       verify(tezSessionPoolSession, times(1)).returnToSessionManager();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestZookeeperExternalSessionsRegistryClient.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestZookeeperExternalSessionsRegistryClient.java
index 8274e87187b0..aa25f0b2450d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestZookeeperExternalSessionsRegistryClient.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestZookeeperExternalSessionsRegistryClient.java
@@ -19,8 +19,10 @@
 package org.apache.hadoop.hive.ql.exec.tez;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.CuratorFrameworkFactory;
@@ -28,8 +30,16 @@
 import org.apache.curator.test.TestingServer;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.zookeeper.KeeperException;
 import org.junit.Test;
 
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
 /**
  * Tests for {@link ZookeeperExternalSessionsRegistryClient}.
  */
@@ -128,5 +138,143 @@ public void testReuseSameSession() throws Exception {
       }
     }
   }
+
+  /**
+   * Tests that multiple registry clients (simulating multiple HS2 instances)
+   * respect the global distributed lock (claims) and do not claim the same session simultaneously.
+   */
+  @Test
+  public void testSessionClaimsFromDifferentRegistryClients() throws Exception {
+    CuratorFramework client = null;
+    ZookeeperExternalSessionsRegistryClient registry1 = null;
+    ZookeeperExternalSessionsRegistryClient registry2 = null;
+
+    try (TestingServer server = new TestingServer()) {
+      String connectString = server.getConnectString();
+
+      HiveConf conf = new HiveConf();
+      conf.setVar(ConfVars.HIVE_ZOOKEEPER_QUORUM, connectString);
+      conf.setVar(ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE, "/tez_ns_concurrent");
+      conf.setIntVar(ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_WAIT_MAX_ATTEMPTS, 5);
+
+      String namespace = HiveConf.getVar(conf, ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE);
+      String effectivePath = ZookeeperExternalSessionsRegistryClient.normalizeZkPath(namespace);
+
+      CuratorFrameworkFactory.Builder builder = CuratorFrameworkFactory.builder();
+      client = builder.connectString(connectString).retryPolicy(new RetryOneTime(1)).build();
+      client.start();
+
+      client.create().creatingParentsIfNeeded().forPath(effectivePath + "/app_1");
+      client.create().forPath(effectivePath + "/app_2");
+
+      registry1 = new ZookeeperExternalSessionsRegistryClient(conf);
+      registry2 = new ZookeeperExternalSessionsRegistryClient(conf);
+
+      String sessionFromRegistry1 = registry1.getSession();
+      String sessionFromRegistry2 = registry2.getSession();
+
+      assertNotNull("Registry 1 should have claimed a session", sessionFromRegistry1);
+      assertNotNull("Registry 2 should have claimed a session", sessionFromRegistry2);
+
+      assertNotEquals("The two registries should claim different sessions!",
+          sessionFromRegistry1, sessionFromRegistry2);
+
+      registry1.returnSession(sessionFromRegistry1);
+
+      String session3FromRegistry2 = registry2.getSession();
+      assertEquals("Registry 2 should be able to claim the newly released session",
+          sessionFromRegistry1, session3FromRegistry2);
+
+      registry2.returnSession(sessionFromRegistry2);
+      registry2.returnSession(session3FromRegistry2);
+    } finally {
+      if (registry1 != null) {
+        registry1.close();
+      }
+      if (registry2 != null) {
+        registry2.close();
+      }
+      if (client != null) {
+        client.close();
+      }
+    }
+  }
+
+  /**
+   * Tests that the InterProcessMutex enforces strict Global FIFO ordering.
+   * Clients form a queue when no sessions are available, and are served in exact order.
+   */
+  @Test
+  public void testFIFOSessionClaimsFromDifferentRegistries() throws Exception {
+    try (TestingServer server = new TestingServer()) {
+      String connectString = server.getConnectString();
+
+      HiveConf conf = new HiveConf();
+      conf.setVar(ConfVars.HIVE_ZOOKEEPER_QUORUM, connectString);
+      conf.setVar(ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE, "/tez_ns_fifo");
+      conf.setIntVar(ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_WAIT_MAX_ATTEMPTS, 5);
+
+      String namespace = HiveConf.getVar(conf, ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE);
+      String effectivePath = ZookeeperExternalSessionsRegistryClient.normalizeZkPath(namespace);
+
+      CuratorFrameworkFactory.Builder builder = CuratorFrameworkFactory.builder();
+      CuratorFramework client = builder.connectString(connectString).retryPolicy(new RetryOneTime(1)).build();
+      client.start();
+
+      ExecutorService executor = Executors.newFixedThreadPool(3);
+      ZookeeperExternalSessionsRegistryClient registry1 = new ZookeeperExternalSessionsRegistryClient(conf);
+      ZookeeperExternalSessionsRegistryClient registry2 = new ZookeeperExternalSessionsRegistryClient(conf);
+      ZookeeperExternalSessionsRegistryClient registry3 = new ZookeeperExternalSessionsRegistryClient(conf);
+      try {
+        // Submit getSession() for one registry at a time and wait for each to reach globalQueue which is visible
+        // as a sequential lock znode, before starting the next, so FIFO order matches registry1→2→3.
+        String queuePath = effectivePath + "-queue";
+        Future future1 = executor.submit(registry1::getSession);
+        awaitMutexQueueSize(client, queuePath, 1);
+
+        Future future2 = executor.submit(registry2::getSession);
+        awaitMutexQueueSize(client, queuePath, 2);
+
+        Future future3 = executor.submit(registry3::getSession);
+        awaitMutexQueueSize(client, queuePath, 3);
+
+        client.create().creatingParentsIfNeeded().forPath(effectivePath + "/app_first");
+        assertEquals("Registry 1 should get the first AM", "app_first", future1.get(5, TimeUnit.SECONDS));
+
+        client.create().forPath(effectivePath + "/app_second");
+        String session2 = future2.get(5, TimeUnit.SECONDS);
+
+        assertEquals("Registry 2 should get the second AM", "app_second", session2);
+        registry2.returnSession(session2);
+
+        assertEquals("Registry 3 should get the second AM", "app_second", future3.get(5, TimeUnit.SECONDS));
+      } finally {
+        registry1.close();
+        registry2.close();
+        registry3.close();
+        client.close();
+        executor.shutdownNow();
+      }
+    }
+  }
+
+  private static void awaitMutexQueueSize(CuratorFramework client, String queuePath, int expectedSize)
+      throws Exception {
+    long startTimeNs = System.nanoTime();
+    long timeoutNs = TimeUnit.SECONDS.toNanos(30);
+    while (System.nanoTime() - startTimeNs < timeoutNs) {
+      List childQueueNodes;
+      try {
+        childQueueNodes = client.getChildren().forPath(queuePath);
+      } catch (KeeperException.NoNodeException e) {
+        childQueueNodes = Collections.emptyList();
+      }
+      if (childQueueNodes.size() >= expectedSize) {
+        return;
+      }
+      Thread.sleep(100);
+    }
+    fail("Timed out waiting for " + expectedSize + " mutex queue participants under " + queuePath);
+  }
 }
 
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
index 35553d9cb445..85e6882d4d68 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java
@@ -54,7 +54,6 @@
  */
 class TestVectorMapJoinOuterGenerateResultOperator {
 
-  /** Concrete subclass that exposes the generateOuterNulls* methods to tests. */
   private static final class TestableOuterOp extends VectorMapJoinOuterGenerateResultOperator {
     @Override
     protected String getLoggingPrefix() {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
index 0a0867fff9f1..685e729834d4 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
@@ -119,6 +119,36 @@ public void decimalRead() throws Exception {
     stringReadDecimal(isDictionaryEncoding);
   }
 
+  @Test
+  public void testDecimal64Read() throws Exception {
+    decimal64Read(isDictionaryEncoding);
+  }
+
+  @Test
+  public void testDecimal64ReadInt32() throws Exception {
+    decimal64ReadInt32();
+  }
+
+  @Test
+  public void testDecimal64ReadInt64() throws Exception {
+    decimal64ReadInt64();
+  }
+
+  @Test
+  public void testDecimal64ReadScaleEvolution() throws Exception {
+    decimal64ReadScaleEvolution();
+  }
+
+  @Test
+  public void testDecimal64ReadPrecisionNarrowing() throws Exception {
+    decimal64ReadPrecisionNarrowing();
+  }
+
+  @Test
+  public void testDecimal64ReadFixedLenByteArray() throws Exception {
+    decimal64ReadFixedLenByteArray();
+  }
+
   @Test
   public void verifyBatchOffsets() throws Exception {
     super.verifyBatchOffsets();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedDictionaryEncodingColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedDictionaryEncodingColumnReader.java
index 32d27d95477f..d80abc34d094 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedDictionaryEncodingColumnReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedDictionaryEncodingColumnReader.java
@@ -96,4 +96,19 @@ public void decimalRead() throws Exception {
     decimalRead(isDictionaryEncoding);
     stringReadDecimal(isDictionaryEncoding);
   }
+
+  @Test
+  public void testDecimal64Read() throws Exception {
+    decimal64Read(isDictionaryEncoding);
+  }
+
+  @Test
+  public void testDecimal64ReadInt32() throws Exception {
+    decimal64ReadInt32();
+  }
+
+  @Test
+  public void testDecimal64ReadInt64() throws Exception {
+    decimal64ReadInt64();
+  }
 }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
index c9c858932550..5df190f8fab8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
@@ -25,8 +25,10 @@
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.Timestamp;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -60,9 +62,13 @@
 import org.apache.parquet.hadoop.example.GroupReadSupport;
 import org.apache.parquet.hadoop.example.GroupWriteSupport;
 import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
 
 import java.io.IOException;
+import java.math.BigInteger;
 import java.util.Arrays;
 import java.util.List;
 import java.util.TimeZone;
@@ -258,19 +264,36 @@ protected static boolean isNull(int index) {
 
   public static VectorizedParquetRecordReader createTestParquetReader(String schemaString, Configuration conf)
       throws IOException, InterruptedException, HiveException {
+    return createTestParquetReader(schemaString, conf, null);
+  }
+
+  public static VectorizedParquetRecordReader createTestParquetReader(String schemaString, Configuration conf,
+      DataTypePhysicalVariation[] rowDataTypePhysicalVariations)
+      throws IOException, InterruptedException, HiveException {
+    return createTestParquetReader(schemaString, conf, rowDataTypePhysicalVariations, file);
+  }
+
+  public static VectorizedParquetRecordReader createTestParquetReader(String schemaString, Configuration conf,
+      DataTypePhysicalVariation[] rowDataTypePhysicalVariations, Path inputFile)
+      throws IOException, InterruptedException, HiveException {
     conf.set(PARQUET_READ_SCHEMA, schemaString);
     HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
     HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
     Job vectorJob = new Job(conf, "read vector");
-    ParquetInputFormat.setInputPaths(vectorJob, file);
-    initialVectorizedRowBatchCtx(conf);
-    return new VectorizedParquetRecordReader(getFileSplit(vectorJob), new JobConf(conf));
+    ParquetInputFormat.setInputPaths(vectorJob, inputFile);
+    initialVectorizedRowBatchCtx(conf, rowDataTypePhysicalVariations);
+    return new VectorizedParquetRecordReader(getFileSplit(vectorJob, inputFile), new JobConf(conf));
   }
 
   protected static FileSplit getFileSplit(Job vectorJob) throws IOException, InterruptedException {
+    return getFileSplit(vectorJob, file);
+  }
+
+  protected static FileSplit getFileSplit(Job vectorJob, Path inputFile)
+      throws IOException, InterruptedException {
     ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
     InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
-    FileSplit fsplit = new FileSplit(file, 0L, split.getLength(), split.getLocations());
+    FileSplit fsplit = new FileSplit(inputFile, 0L, split.getLength(), split.getLocations());
     return fsplit;
   }
 
@@ -344,14 +367,459 @@ protected static void writeData(ParquetWriter writer, boolean isDictionar
   }
 
   protected static void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
+    initialVectorizedRowBatchCtx(conf, null);
+  }
+
+  protected static void initialVectorizedRowBatchCtx(Configuration conf,
+      DataTypePhysicalVariation[] rowDataTypePhysicalVariations) throws HiveException {
     MapWork mapWork = new MapWork();
     VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
     rbCtx.init(createStructObjectInspector(conf), new String[0]);
+    if (rowDataTypePhysicalVariations != null) {
+      rbCtx.setRowDataTypePhysicalVariations(rowDataTypePhysicalVariations);
+    }
     mapWork.setVectorMode(true);
     mapWork.setVectorizedRowBatchCtx(rbCtx);
     Utilities.setMapWork(conf, mapWork);
   }
 
+  /**
+   * Verifies the Decimal64 read path: when the decimal column is tagged DECIMAL_64 (as the
+   * vectorizer does once {@code MapredParquetInputFormat} advertises it), the reader must fill a
+   * {@link Decimal64ColumnVector} (long-backed) with the correct unscaled values.
+   */
+  protected void decimal64Read(boolean isDictionaryEncoding) throws Exception {
+    Configuration readerConf = new Configuration();
+    readerConf.set(IOConstants.COLUMNS, "value");
+    readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(5,2)");
+    readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+    readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+    VectorizedParquetRecordReader reader = createTestParquetReader(
+        "message hive_schema { required value (DECIMAL(5,2));}", readerConf,
+        new DataTypePhysicalVariation[] { DataTypePhysicalVariation.DECIMAL_64 });
+    VectorizedRowBatch previous = reader.createValue();
+    try {
+      int c = 0;
+      while (reader.next(NullWritable.get(), previous)) {
+        assertTrue("expected Decimal64ColumnVector but got " + previous.cols[0].getClass().getSimpleName(),
+            previous.cols[0] instanceof Decimal64ColumnVector);
+        Decimal64ColumnVector vector = (Decimal64ColumnVector) previous.cols[0];
+        assertTrue(vector.noNulls);
+        assertEquals((short) 5, vector.precision);
+        assertEquals((short) 2, vector.scale);
+        for (int i = 0; i < vector.vector.length; i++) {
+          if (c == nElements) {
+            break;
+          }
+          long expected =
+              new HiveDecimalWritable(getDecimal(isDictionaryEncoding, c).setScale(2)).serialize64(2);
+          assertEquals("Check failed at pos " + c, expected, vector.vector[i]);
+          assertFalse(vector.isNull[i]);
+          c++;
+        }
+      }
+      assertEquals(nElements, c);
+    } finally {
+      reader.close();
+    }
+  }
+
+  // Unscaled values (scale=2) used by the INT32/INT64-backed Decimal64 tests. The element at
+  // DECIMAL64_NULL_INDEX is ignored because those rows are written as null. 1001 -> 10.01,
+  // 1234 -> 12.34, -550 -> -5.50, 9999 -> 99.99. The Decimal64 vector must hold the full unscaled
+  // long; at scale 2 that equals the literal value, which is what the tests assert.
+  private static final long[] DECIMAL64_UNSCALED = { 1001L, 1234L, -550L, 0L, 9999L };
+  private static final int DECIMAL64_NULL_INDEX = 3;
+  private static final int DECIMAL64_ROWS = 200;
+  // Dedicated file so the per-test write never clobbers the class-level {@link #file} fixture that
+  // other @Test methods read via the @BeforeClass writeData(); JUnit @Test ordering is undefined.
+  private static final Path DECIMAL64_FILE =
+      new Path(System.getProperty("java.io.tmpdir"), "testDecimal64ParquetFile");
+
+  /**
+   * Writes a parquet file with a single DECIMAL(5,2) column physically stored as the given
+   * primitive ({@link PrimitiveTypeName#INT32} or {@link PrimitiveTypeName#INT64}), cycling
+   * through the unscaled values in {@link #DECIMAL64_UNSCALED} (index {@link #DECIMAL64_NULL_INDEX}
+   * is written as null), then reads it back tagged DECIMAL_64 and asserts the long-backed
+   * {@link Decimal64ColumnVector} holds the correct unscaled longs (NOT the truncated/zero values
+   * the buggy reader produced).
+   * 

+ * Runs with {@code dictionaryEncoding} both true and false so that BOTH reader paths are + * exercised: with dictionary encoding on (few distinct values, huge dictionary page) Parquet + * dictionary-encodes the column and the read goes through {@code decodeDictionaryIds}; with it off + * every value is plain-encoded and the read goes through {@code readDecimal64}. Both populate the + * {@link Decimal64ColumnVector} via {@link Decimal64ColumnVector#set(int, byte[], int)}. + */ + protected void decimal64ReadFromPrimitive(PrimitiveTypeName physical, boolean dictionaryEncoding) + throws Exception { + final int scale = 2; + final int precision = 5; + MessageType writeSchema = Types.buildMessage() + .optional(physical).as(LogicalTypeAnnotation.decimalType(scale, precision)).named("value") + .named("hive_schema"); + + FileSystem fs = DECIMAL64_FILE.getFileSystem(conf); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + GroupWriteSupport.setSchema(writeSchema, conf); + try (ParquetWriter writer = new ParquetWriter<>( + DECIMAL64_FILE, new GroupWriteSupport(), GZIP, 1024 * 1024, 1024, 1024 * 1024, + dictionaryEncoding, false, PARQUET_1_0, conf)) { + SimpleGroupFactory f = new SimpleGroupFactory(writeSchema); + for (int i = 0; i < DECIMAL64_ROWS; i++) { + int idx = i % DECIMAL64_UNSCALED.length; + Group group = f.newGroup(); + if (idx != DECIMAL64_NULL_INDEX) { + if (physical == PrimitiveTypeName.INT32) { + group.append("value", (int) DECIMAL64_UNSCALED[idx]); + } else { + group.append("value", DECIMAL64_UNSCALED[idx]); + } + } + writer.write(group); + } + } + + Configuration readerConf = new Configuration(); + readerConf.set(IOConstants.COLUMNS, "value"); + readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(" + precision + "," + scale + ")"); + readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader( + writeSchema.toString(), readerConf, + new DataTypePhysicalVariation[] { DataTypePhysicalVariation.DECIMAL_64 }, DECIMAL64_FILE); + VectorizedRowBatch previous = reader.createValue(); + String label = physical + (dictionaryEncoding ? " (dict)" : " (plain)"); + try { + int c = 0; + boolean sawNull = false; + while (reader.next(NullWritable.get(), previous)) { + assertTrue("expected Decimal64ColumnVector but got " + previous.cols[0].getClass().getSimpleName(), + previous.cols[0] instanceof Decimal64ColumnVector); + Decimal64ColumnVector vector = (Decimal64ColumnVector) previous.cols[0]; + assertEquals((short) precision, vector.precision); + assertEquals((short) scale, vector.scale); + for (int i = 0; i < previous.size; i++) { + if (c == DECIMAL64_ROWS) { + break; + } + int idx = c % DECIMAL64_UNSCALED.length; + if (idx == DECIMAL64_NULL_INDEX) { + assertTrue("Expected null at pos " + c + " for " + label, vector.isNull[i]); + sawNull = true; + } else { + assertFalse("Unexpected null at pos " + c + " for " + label, vector.isNull[i]); + // A Decimal64 vector must hold the FULL unscaled long: 10.01 -> 1001. For scale 2 the + // expected unscaled long is exactly DECIMAL64_UNSCALED[idx]. + assertEquals("Decimal64 must keep the full unscaled value at pos " + c + " for " + label, + DECIMAL64_UNSCALED[idx], vector.vector[i]); + } + c++; + } + } + assertEquals("Did not read all rows for " + label, DECIMAL64_ROWS, c); + assertTrue("Null row was never exercised for " + label, sawNull); + } finally { + reader.close(); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + } + } + + protected void decimal64ReadInt32() throws Exception { + decimal64ReadFromPrimitive(PrimitiveTypeName.INT32, true); + decimal64ReadFromPrimitive(PrimitiveTypeName.INT32, false); + } + + protected void decimal64ReadInt64() throws Exception { + decimal64ReadFromPrimitive(PrimitiveTypeName.INT64, true); + decimal64ReadFromPrimitive(PrimitiveTypeName.INT64, false); + } + + /** + * Gate test for the Decimal64 identity fast path. When the Parquet file scale differs from the Hive + * table scale (schema evolution), the fast path must NOT engage: each value must be rescaled + * (rounded, or NULLed when it no longer fits) exactly as HiveDecimal would -- never copied verbatim, + * which would be off by a power of ten. Writes DECIMAL(9,4), reads as DECIMAL(6,2), and asserts every + * result against a HiveDecimal oracle. (The {@code decimal64Read*} tests cover the file==table-scale + * identity fast path; this covers the mismatched-scale fallback and proves the gate.) + */ + protected void decimal64ReadScaleEvolution(PrimitiveTypeName physical, boolean dictionaryEncoding) + throws Exception { + final int fileScale = 4; + final int filePrecision = 9; + final int readScale = 2; + final int readPrecision = 6; + // Unscaled values at fileScale=4 (index DECIMAL64_NULL_INDEX is written as null): + // 1234567 -> 123.4567 (rounds to 123.46), 100 -> 0.0100, -98765 -> -9.8765 (-> -9.88), + // 99999999 -> 9999.9999 (rounds to 10000.00 -> exceeds DECIMAL(6,2) -> NULL). + long[] unscaled = { 1234567L, 100L, -98765L, 0L, 99999999L }; + + FileSystem fs = DECIMAL64_FILE.getFileSystem(conf); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + MessageType writeSchema = Types.buildMessage() + .optional(physical).as(LogicalTypeAnnotation.decimalType(fileScale, filePrecision)).named("value") + .named("hive_schema"); + GroupWriteSupport.setSchema(writeSchema, conf); + try (ParquetWriter writer = new ParquetWriter<>( + DECIMAL64_FILE, new GroupWriteSupport(), GZIP, 1024 * 1024, 1024, 1024 * 1024, + dictionaryEncoding, false, PARQUET_1_0, conf)) { + SimpleGroupFactory f = new SimpleGroupFactory(writeSchema); + for (int i = 0; i < DECIMAL64_ROWS; i++) { + int idx = i % unscaled.length; + Group group = f.newGroup(); + if (idx != DECIMAL64_NULL_INDEX) { + if (physical == PrimitiveTypeName.INT32) { + group.append("value", (int) unscaled[idx]); + } else { + group.append("value", unscaled[idx]); + } + } + writer.write(group); + } + } + + Configuration readerConf = new Configuration(); + readerConf.set(IOConstants.COLUMNS, "value"); + readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(" + readPrecision + "," + readScale + ")"); + readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader( + writeSchema.toString(), readerConf, + new DataTypePhysicalVariation[] { DataTypePhysicalVariation.DECIMAL_64 }, DECIMAL64_FILE); + VectorizedRowBatch previous = reader.createValue(); + String label = physical + (dictionaryEncoding ? " (dict)" : " (plain)") + " scale-evolution"; + HiveDecimalWritable oracle = new HiveDecimalWritable(); + try { + int c = 0; + while (reader.next(NullWritable.get(), previous)) { + Decimal64ColumnVector vector = (Decimal64ColumnVector) previous.cols[0]; + // The long is stored at the Hive (table) scale, NOT the file scale -- proves the gate. + assertEquals((short) readScale, vector.scale); + for (int i = 0; i < previous.size; i++) { + if (c == DECIMAL64_ROWS) { + break; + } + int idx = c % unscaled.length; + if (idx == DECIMAL64_NULL_INDEX) { + assertTrue("Expected null at pos " + c + " for " + label, vector.isNull[i]); + } else { + // Oracle: interpret the unscaled value at the file scale, enforce to the read type. + oracle.set(HiveDecimal.create(BigInteger.valueOf(unscaled[idx]), fileScale)); + oracle.mutateEnforcePrecisionScale(readPrecision, readScale); + if (!oracle.isSet()) { + assertTrue("Expected NULL (out of range) at pos " + c + " for " + label, vector.isNull[i]); + } else { + assertFalse("Unexpected null at pos " + c + " for " + label, vector.isNull[i]); + assertEquals("Scale-evolved Decimal64 must match HiveDecimal at pos " + c + " for " + label, + oracle.serialize64(readScale), vector.vector[i]); + } + } + c++; + } + } + assertEquals("Did not read all rows for " + label, DECIMAL64_ROWS, c); + } finally { + reader.close(); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + } + } + + protected void decimal64ReadScaleEvolution() throws Exception { + decimal64ReadScaleEvolution(PrimitiveTypeName.INT32, true); + decimal64ReadScaleEvolution(PrimitiveTypeName.INT32, false); + decimal64ReadScaleEvolution(PrimitiveTypeName.INT64, true); + decimal64ReadScaleEvolution(PrimitiveTypeName.INT64, false); + } + + /** + * Bounds test for the Decimal64 identity fast path: file scale == table scale (so the fast path + * engages) but the file precision is wider than the table precision. Values outside the table + * precision -- and the pathological {@link Long#MIN_VALUE} -- must be NULLed by the fast path's + * bounds check, exactly as the enforced path would. Writes DECIMAL(filePrecision,2), reads DECIMAL(5,2). + */ + protected void decimal64ReadPrecisionNarrowing(PrimitiveTypeName physical, boolean dictionaryEncoding) + throws Exception { + final int scale = 2; + final int filePrecision = (physical == PrimitiveTypeName.INT32) ? 9 : 18; + final int readPrecision = 5; // max abs unscaled value = 99999 + final long absMax = 99999L; + // Unscaled @ scale 2: 1001 -> 10.01 (fits), 99999 -> 999.99 (boundary, fits), + // 100000 -> 1000.00 (precision 6 > 5 -> NULL), then an out-of-range negative -> NULL. + long[] unscaled = (physical == PrimitiveTypeName.INT32) + ? new long[] { 1001L, 99999L, 100000L, -100000L } + : new long[] { 1001L, 99999L, 100000L, Long.MIN_VALUE }; + + FileSystem fs = DECIMAL64_FILE.getFileSystem(conf); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + MessageType writeSchema = Types.buildMessage() + .optional(physical).as(LogicalTypeAnnotation.decimalType(scale, filePrecision)).named("value") + .named("hive_schema"); + GroupWriteSupport.setSchema(writeSchema, conf); + try (ParquetWriter writer = new ParquetWriter<>( + DECIMAL64_FILE, new GroupWriteSupport(), GZIP, 1024 * 1024, 1024, 1024 * 1024, + dictionaryEncoding, false, PARQUET_1_0, conf)) { + SimpleGroupFactory f = new SimpleGroupFactory(writeSchema); + for (int i = 0; i < DECIMAL64_ROWS; i++) { + long v = unscaled[i % unscaled.length]; + Group group = f.newGroup(); + if (physical == PrimitiveTypeName.INT32) { + group.append("value", (int) v); + } else { + group.append("value", v); + } + writer.write(group); + } + } + + Configuration readerConf = new Configuration(); + readerConf.set(IOConstants.COLUMNS, "value"); + readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(" + readPrecision + "," + scale + ")"); + readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader( + writeSchema.toString(), readerConf, + new DataTypePhysicalVariation[] { DataTypePhysicalVariation.DECIMAL_64 }, DECIMAL64_FILE); + VectorizedRowBatch previous = reader.createValue(); + String label = physical + (dictionaryEncoding ? " (dict)" : " (plain)") + " precision-narrowing"; + try { + int c = 0; + while (reader.next(NullWritable.get(), previous)) { + Decimal64ColumnVector vector = (Decimal64ColumnVector) previous.cols[0]; + assertEquals((short) scale, vector.scale); + for (int i = 0; i < previous.size; i++) { + if (c == DECIMAL64_ROWS) { + break; + } + long v = unscaled[c % unscaled.length]; + if (v < -absMax || v > absMax) { + assertTrue("Expected NULL (out of table precision) at pos " + c + " for " + label, vector.isNull[i]); + } else { + assertFalse("Unexpected null at pos " + c + " for " + label, vector.isNull[i]); + assertEquals("In-range Decimal64 must be stored verbatim at pos " + c + " for " + label, + v, vector.vector[i]); + } + c++; + } + } + assertEquals("Did not read all rows for " + label, DECIMAL64_ROWS, c); + } finally { + reader.close(); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + } + } + + protected void decimal64ReadPrecisionNarrowing() throws Exception { + decimal64ReadPrecisionNarrowing(PrimitiveTypeName.INT32, true); + decimal64ReadPrecisionNarrowing(PrimitiveTypeName.INT32, false); + decimal64ReadPrecisionNarrowing(PrimitiveTypeName.INT64, true); + decimal64ReadPrecisionNarrowing(PrimitiveTypeName.INT64, false); + } + + /** + * Coverage for the FIXED_LEN_BYTE_ARRAY-backed Decimal64 fast path: binaryToUnscaledLong decodes + * the big-endian two's-complement bytes straight into the long. Uses a 16-byte fixed array (wider + * than 8) so the multi-byte shift and leading sign-byte extension actually run, includes negative + * values (encoded with leading 0xFF), and a value beyond the read precision that the bounds check + * must NULL. Writes DECIMAL(18,2) so the fast path engages (file scale 2 == table scale, file + * precision <= 18), reads as DECIMAL(10,2); every result is checked against the unscaled literal. + */ + protected void decimal64ReadFixedLenByteArray(boolean dictionaryEncoding) throws Exception { + final int scale = 2; + final int filePrecision = 18; + final int byteLen = 16; + final int readPrecision = 10; // max abs unscaled value at scale 2 = 9_999_999_999 + final long absMax = 9_999_999_999L; + // 10.01, -10.01, the +/- precision-10 boundary, then 11 digits -> exceeds DECIMAL(10,2) -> NULL. + long[] unscaled = { 1001L, -1001L, absMax, -absMax, 10_000_000_000L }; + + FileSystem fs = DECIMAL64_FILE.getFileSystem(conf); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + MessageType writeSchema = Types.buildMessage() + .optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(byteLen) + .as(LogicalTypeAnnotation.decimalType(scale, filePrecision)).named("value") + .named("hive_schema"); + GroupWriteSupport.setSchema(writeSchema, conf); + try (ParquetWriter writer = new ParquetWriter<>( + DECIMAL64_FILE, new GroupWriteSupport(), GZIP, 1024 * 1024, 1024, 1024 * 1024, + dictionaryEncoding, false, PARQUET_1_0, conf)) { + SimpleGroupFactory f = new SimpleGroupFactory(writeSchema); + for (int i = 0; i < DECIMAL64_ROWS; i++) { + Group group = f.newGroup(); + group.append("value", Binary.fromConstantByteArray(toFixedLenBytes(unscaled[i % unscaled.length], byteLen))); + writer.write(group); + } + } + + Configuration readerConf = new Configuration(); + readerConf.set(IOConstants.COLUMNS, "value"); + readerConf.set(IOConstants.COLUMNS_TYPES, "decimal(" + readPrecision + "," + scale + ")"); + readerConf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + readerConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader( + writeSchema.toString(), readerConf, + new DataTypePhysicalVariation[] { DataTypePhysicalVariation.DECIMAL_64 }, DECIMAL64_FILE); + VectorizedRowBatch previous = reader.createValue(); + String label = "FIXED_LEN_BYTE_ARRAY(" + byteLen + ")" + (dictionaryEncoding ? " (dict)" : " (plain)"); + try { + int c = 0; + while (reader.next(NullWritable.get(), previous)) { + Decimal64ColumnVector vector = (Decimal64ColumnVector) previous.cols[0]; + assertEquals((short) scale, vector.scale); + for (int i = 0; i < previous.size; i++) { + if (c == DECIMAL64_ROWS) { + break; + } + long v = unscaled[c % unscaled.length]; + if (v < -absMax || v > absMax) { + assertTrue("Expected NULL (out of read precision) at pos " + c + " for " + label, vector.isNull[i]); + } else { + assertFalse("Unexpected null at pos " + c + " for " + label, vector.isNull[i]); + assertEquals("Wide byte-array Decimal64 must decode the full unscaled value at pos " + c + + " for " + label, v, vector.vector[i]); + } + c++; + } + } + assertEquals("Did not read all rows for " + label, DECIMAL64_ROWS, c); + } finally { + reader.close(); + if (fs.exists(DECIMAL64_FILE)) { + fs.delete(DECIMAL64_FILE, true); + } + } + } + + protected void decimal64ReadFixedLenByteArray() throws Exception { + decimal64ReadFixedLenByteArray(true); + decimal64ReadFixedLenByteArray(false); + } + + // Big-endian two's-complement of value, left-padded to exactly len bytes (FIXED_LEN_BYTE_ARRAY + // storage). Negative values pad with 0xFF, exercising the leading sign-extension bytes that + // binaryToUnscaledLong must drop losslessly. + private static byte[] toFixedLenBytes(long value, int len) { + byte[] bytes = new byte[len]; + if (value < 0) { + Arrays.fill(bytes, (byte) 0xFF); + } + byte[] minimal = BigInteger.valueOf(value).toByteArray(); + System.arraycopy(minimal, 0, bytes, len - minimal.length, minimal.length); + return bytes; + } + private static StructObjectInspector createStructObjectInspector(Configuration conf) { // Create row related objects String columnNames = conf.get(IOConstants.COLUMNS); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestSessionHiveMetastoreClientAlterPartitionsTempTable.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestSessionHiveMetastoreClientAlterPartitionsTempTable.java index 1cb3e1d2edd9..4f562d5aa29f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestSessionHiveMetastoreClientAlterPartitionsTempTable.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestSessionHiveMetastoreClientAlterPartitionsTempTable.java @@ -247,4 +247,15 @@ private void assertPartitionRollback(List oldParts, List a fail("Exception should have been thrown."); } + @Override + @Test(expected = MetaException.class) + public void testRenamePartitionChangeTblName() throws Exception { + super.testRenamePartitionChangeTblName(); + } + + @Override + @Test(expected = MetaException.class) + public void testRenamePartitionChangeDbName() throws Exception { + super.testRenamePartitionChangeDbName(); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java index 39c6ca8f80c4..56e294a3fd0a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java @@ -371,6 +371,17 @@ public void testBetweenSelectivityLeftEqualsRight_KO() { betweenSelectivity(KLL, 2, 2); } + @Test + public void testComputeNotEqualsPredicateSelectivity() { + RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND, + REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int3), + REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int7)); + filter = simplify(filter); + Assert.assertEquals(SqlKind.SEARCH, filter.getKind()); + FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq); + Assert.assertEquals(0.8095238095238095, estimator.estimateSelectivity(filter), DELTA); + } + @Test public void testComputeRangePredicateSelectivityWhenNoStats() { RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int3); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/TestViewPartitionPrivilegeObjects.java b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/TestViewPartitionPrivilegeObjects.java new file mode 100644 index 000000000000..964ba154a0e9 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/TestViewPartitionPrivilegeObjects.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.security.authorization.plugin; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.conf.HiveConfForTest; +import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; + +import java.util.List; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.verify; + +/** + * Tests the {@link HivePrivilegeObject} inputs passed to {@link HiveAuthorizer#checkPrivileges} + * for view queries over partitioned base tables. + */ +public class TestViewPartitionPrivilegeObjects { + + protected static HiveConf conf; + protected static Driver driver; + static HiveAuthorizer mockedAuthorizer; + + static class MockedHiveAuthorizerFactory implements HiveAuthorizerFactory { + @Override + public HiveAuthorizer createHiveAuthorizer(HiveMetastoreClientFactory metastoreClientFactory, + HiveConf conf, HiveAuthenticationProvider authenticator, HiveAuthzSessionContext ctx) { + TestViewPartitionPrivilegeObjects.mockedAuthorizer = Mockito.mock(HiveAuthorizer.class); + return TestViewPartitionPrivilegeObjects.mockedAuthorizer; + } + } + + @BeforeClass + public static void beforeClass() throws Exception { + UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser("hive")); + conf = new HiveConfForTest(TestViewPartitionPrivilegeObjects.class); + conf.setVar(ConfVars.HIVE_AUTHORIZATION_MANAGER, MockedHiveAuthorizerFactory.class.getName()); + conf.setBoolVar(ConfVars.HIVE_AUTHORIZATION_ENABLED, true); + conf.setBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS, false); + conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, true); + conf.setVar(ConfVars.HIVE_TXN_MANAGER, DbTxnManager.class.getName()); + conf.setVar(ConfVars.DYNAMIC_PARTITIONING_MODE, "nonstrict"); + conf.setVar(ConfVars.HIVE_FETCH_TASK_CONVERSION, "none"); + conf.setVar(ConfVars.HIVE_EXECUTION_ENGINE, "mr"); + + TestTxnDbUtil.prepDb(conf); + SessionState.start(conf); + driver = new Driver(conf); + + runCmd("CREATE DATABASE IF NOT EXISTS datadb"); + runCmd("CREATE TABLE IF NOT EXISTS datadb.t1 (i INT) PARTITIONED BY (dept STRING)"); + runCmd("ALTER TABLE datadb.t1 ADD IF NOT EXISTS PARTITION (dept='a')"); + runCmd("CREATE DATABASE IF NOT EXISTS viewdb"); + runCmd("CREATE VIEW IF NOT EXISTS viewdb.v1 AS SELECT * FROM datadb.t1"); + // Target table used by INSERT OVERWRITE tests — non-partitioned to keep DML simple + runCmd("CREATE TABLE IF NOT EXISTS datadb.insert_target (i INT, dept STRING)"); + } + + @Before + public void resetMock() { + if (mockedAuthorizer != null) { + reset(mockedAuthorizer); + } + } + + @AfterClass + public static void afterClass() throws Exception { + runCmd("DROP VIEW IF EXISTS viewdb.v1"); + runCmd("DROP TABLE IF EXISTS datadb.t1"); + runCmd("DROP TABLE IF EXISTS datadb.insert_target"); + runCmd("DROP DATABASE IF EXISTS viewdb"); + runCmd("DROP DATABASE IF EXISTS datadb"); + driver.close(); + } + + @Test + public void testViewSelectNoPartitionPrivObj() throws Exception { + conf.setVar(ConfVars.HIVE_FETCH_TASK_CONVERSION, "none"); + SessionState.get().setConf(conf); + + HiveAuthenticationProvider user1Auth = Mockito.mock(HiveAuthenticationProvider.class); + Mockito.when(user1Auth.getUserName()).thenReturn("user1"); + SessionState.get().setAuthenticator(user1Auth); + + driver.compile("SELECT * FROM viewdb.v1", true); + + List inputs = getInputPrivObjects(); + + Assert.assertTrue("Expected a TABLE_OR_VIEW object for the view", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.TABLE_OR_VIEW + && "v1".equalsIgnoreCase(h.getObjectName()) + && "viewdb".equalsIgnoreCase(h.getDbname()))); + + Assert.assertFalse("HIVE-29628: view query must not send a PARTITION object on the base table", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.PARTITION + && "t1".equalsIgnoreCase(h.getObjectName()) + && "datadb".equalsIgnoreCase(h.getDbname()))); + + Assert.assertFalse("HIVE-29628: view query must not send a base-table TABLE_OR_VIEW object", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.TABLE_OR_VIEW + && "t1".equalsIgnoreCase(h.getObjectName()) + && "datadb".equalsIgnoreCase(h.getDbname()))); + } + + @Test + public void testDirectTableSelectNoPartitionPrivObj() throws Exception { + conf.setVar(ConfVars.HIVE_FETCH_TASK_CONVERSION, "none"); + SessionState.get().setConf(conf); + + driver.compile("SELECT * FROM datadb.t1", true); + + List inputs = getInputPrivObjects(); + + Assert.assertTrue("Direct table access must still emit a TABLE_OR_VIEW object on the base table", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.TABLE_OR_VIEW + && "t1".equalsIgnoreCase(h.getObjectName()) + && "datadb".equalsIgnoreCase(h.getDbname()))); + + Assert.assertFalse("ADH-6957: read PARTITION objects are stripped for SELECT, even direct access", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.PARTITION + && "t1".equalsIgnoreCase(h.getObjectName()) + && "datadb".equalsIgnoreCase(h.getDbname()))); + } + + /** + * When a view over a partitioned table is the READ SOURCE of an INSERT OVERWRITE statement, + * {@code checkPrivileges} inputs must contain only {@code TABLE_OR_VIEW viewdb/v1}. + */ + @Test + public void testInsertOverwriteFromView() throws Exception { + conf.setVar(ConfVars.HIVE_FETCH_TASK_CONVERSION, "none"); + SessionState.get().setConf(conf); + + driver.compile("INSERT OVERWRITE TABLE datadb.insert_target SELECT * FROM viewdb.v1", true); + + List inputs = getInputPrivObjects(); + + Assert.assertTrue("Expected TABLE_OR_VIEW privilege object for view source in INSERT", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.TABLE_OR_VIEW + && "v1".equalsIgnoreCase(h.getObjectName()) + && "viewdb".equalsIgnoreCase(h.getDbname()))); + + Assert.assertFalse( + "INSERT from view must not send a PARTITION privilege object on the base table", + inputs.stream().anyMatch(h -> + h.getType() == HivePrivilegeObject.HivePrivilegeObjectType.PARTITION + && "t1".equalsIgnoreCase(h.getObjectName()) + && "datadb".equalsIgnoreCase(h.getDbname()))); + } + + @SuppressWarnings("unchecked") + private List getInputPrivObjects() + throws HiveAuthzPluginException, HiveAccessControlException { + Class> cls = (Class) List.class; + ArgumentCaptor> inputsCapturer = ArgumentCaptor.forClass(cls); + ArgumentCaptor> outputsCapturer = ArgumentCaptor.forClass(cls); + + verify(mockedAuthorizer, atLeastOnce()).checkPrivileges( + any(HiveOperationType.class), + inputsCapturer.capture(), + outputsCapturer.capture(), + any(HiveAuthzContext.class)); + + List> all = inputsCapturer.getAllValues(); + return all.get(all.size() - 1); + } + + private static void runCmd(String cmd) throws Exception { + driver.run(cmd); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index c009472fed0a..2faefafdee35 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.stats; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -41,10 +43,12 @@ import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.Timestamp; import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -565,4 +569,67 @@ void testGetColStatisticsTimestampType() { assertEquals(1700000000L, range.maxValue.longValue(), "maxValue mismatch for TIMESTAMP"); } + @Test + void testEstimateStatsForMissingColsHandlesEmptyList() { + HiveConf conf = new HiveConf(); + + ColumnInfo columnInfoA = new ColumnInfo("a", TypeInfoFactory.intTypeInfo, "t", false); + + List allColumnStats = StatsUtils.estimateStatsForMissingCols( + List.of("a"), Collections.emptyList(), conf, 0, List.of(columnInfoA)); + + assertEquals(1, allColumnStats.size()); + } + + @Test + void testEstimateStatsForMissingColsCombinesExistingStatsAndEstimations() { + HiveConf conf = new HiveConf(); + + ColumnInfo colNeededButNotExists = new ColumnInfo("neededButNotExists", TypeInfoFactory.intTypeInfo, "t", false); + ColumnInfo colNeededAndExists = new ColumnInfo("neededAndExists", TypeInfoFactory.intTypeInfo, "t", false); + ColumnInfo colNotNeededButExists = new ColumnInfo("notNeededButExists", TypeInfoFactory.intTypeInfo, "t", false); + ColumnInfo colNotNeededNotExists = new ColumnInfo("notNeededNotExists", TypeInfoFactory.intTypeInfo, "t", false); + + ColStatistics colStatNeededAndExists = new ColStatistics(); + colStatNeededAndExists.setColumnName(colNeededAndExists.getInternalName()); + ColStatistics colStatNotNeededButExists = new ColStatistics(); + colStatNotNeededButExists.setColumnName(colNotNeededButExists.getInternalName()); + + List allColumnStats = StatsUtils.estimateStatsForMissingCols( + List.of(colNeededAndExists.getInternalName(), colNeededButNotExists.getInternalName()), + List.of(colStatNeededAndExists, colStatNotNeededButExists), + conf, + 0, + List.of(colNeededButNotExists, colNeededAndExists, colNotNeededButExists, colNotNeededNotExists)); + + assertEquals(3, allColumnStats.size()); + assertEquals(colStatNeededAndExists, allColumnStats.get(0)); + assertFalse(allColumnStats.get(0).isEstimated()); + assertEquals(colStatNotNeededButExists, allColumnStats.get(1)); + assertFalse(allColumnStats.get(1).isEstimated()); + assertEquals(colNeededButNotExists.getInternalName(), allColumnStats.get(2).getColumnName()); + assertTrue(allColumnStats.get(2).isEstimated()); + } + + @Test + void testEstimateStatsForMissingColsReturnOnlyColumnsWithExistingStatsWhenNoNeededColumn() { + HiveConf conf = new HiveConf(); + + ColumnInfo colNotNeededButExists = new ColumnInfo("notNeededButExists", TypeInfoFactory.intTypeInfo, "t", false); + ColumnInfo colNotNeededNotExists = new ColumnInfo("notNeededNotExists", TypeInfoFactory.intTypeInfo, "t", false); + + ColStatistics colStatNotNeededButExists = new ColStatistics(); + colStatNotNeededButExists.setColumnName(colNotNeededButExists.getInternalName()); + + List allColumnStats = StatsUtils.estimateStatsForMissingCols( + Collections.emptyList(), + List.of(colStatNotNeededButExists), + conf, + 0, + List.of(colNotNeededButExists, colNotNeededNotExists)); + + assertEquals(1, allColumnStats.size()); + assertEquals(allColumnStats.getFirst(), colStatNotNeededButExists); + } + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java index bf01034711e3..f97237353f7c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.TransactionalValidationListener; +import org.apache.hadoop.hive.metastore.api.AbortTxnRequest; import org.apache.hadoop.hive.metastore.api.CompactionRequest; import org.apache.hadoop.hive.metastore.api.CompactionType; import org.apache.hadoop.hive.metastore.api.FindNextCompactRequest; @@ -41,7 +42,10 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.utils.StringableMap; +import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.thrift.TException; +import org.apache.thrift.transport.TTransportException; import org.junit.After; import org.junit.Assert; import org.junit.Test; @@ -70,8 +74,13 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.apache.hadoop.hive.common.AcidConstants.VISIBILITY_PATTERN; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -100,9 +109,9 @@ public void stringableMap() throws Exception { // Empty map case StringableMap m = new StringableMap(new HashMap()); String s = m.toString(); - Assert.assertEquals("0:", s); + assertEquals("0:", s); m = new StringableMap(s); - Assert.assertEquals(0, m.size()); + assertEquals(0, m.size()); Map base = new HashMap(); base.put("mary", "poppins"); @@ -111,22 +120,22 @@ public void stringableMap() throws Exception { m = new StringableMap(base); s = m.toString(); m = new StringableMap(s); - Assert.assertEquals(3, m.size()); + assertEquals(3, m.size()); Map saw = new HashMap(3); saw.put("mary", false); saw.put("bert", false); saw.put(null, false); for (Map.Entry e : m.entrySet()) { saw.put(e.getKey(), true); - if ("mary".equals(e.getKey())) Assert.assertEquals("poppins", e.getValue()); + if ("mary".equals(e.getKey())) assertEquals("poppins", e.getValue()); else if ("bert".equals(e.getKey())) Assert.assertNull(e.getValue()); - else if (null == e.getKey()) Assert.assertEquals("banks", e.getValue()); + else if (null == e.getKey()) assertEquals("banks", e.getValue()); else Assert.fail("Unexpected value " + e.getKey()); } - Assert.assertEquals(3, saw.size()); - Assert.assertTrue(saw.get("mary")); - Assert.assertTrue(saw.get("bert")); - Assert.assertTrue(saw.get(null)); + assertEquals(3, saw.size()); + assertTrue(saw.get("mary")); + assertTrue(saw.get("bert")); + assertTrue(saw.get(null)); } @Test @@ -134,26 +143,26 @@ public void stringableList() throws Exception { // Empty list case MRCompactor.StringableList ls = new MRCompactor.StringableList(); String s = ls.toString(); - Assert.assertEquals("0:", s); + assertEquals("0:", s); ls = new MRCompactor.StringableList(s); - Assert.assertEquals(0, ls.size()); + assertEquals(0, ls.size()); ls = new MRCompactor.StringableList(); ls.add(new Path("/tmp")); ls.add(new Path("/usr")); s = ls.toString(); - Assert.assertTrue("Expected 2:4:/tmp4:/usr or 2:4:/usr4:/tmp, got " + s, + assertTrue("Expected 2:4:/tmp4:/usr or 2:4:/usr4:/tmp, got " + s, "2:4:/tmp4:/usr".equals(s) || "2:4:/usr4:/tmp".equals(s)); ls = new MRCompactor.StringableList(s); - Assert.assertEquals(2, ls.size()); + assertEquals(2, ls.size()); boolean sawTmp = false, sawUsr = false; for (Path p : ls) { if ("/tmp".equals(p.toString())) sawTmp = true; else if ("/usr".equals(p.toString())) sawUsr = true; else Assert.fail("Unexpected path " + p.toString()); } - Assert.assertTrue(sawTmp); - Assert.assertTrue(sawUsr); + assertTrue(sawTmp); + assertTrue(sawUsr); } @Test @@ -181,10 +190,10 @@ public void inputSplit() throws Exception { MRCompactor.CompactorInputSplit split = new MRCompactor.CompactorInputSplit(conf, 3, files, new Path(basename), deltas, new HashMap()); - Assert.assertEquals(520L, split.getLength()); + assertEquals(520L, split.getLength()); String[] locations = split.getLocations(); - Assert.assertEquals(1, locations.length); - Assert.assertEquals("localhost", locations[0]); + assertEquals(1, locations.length); + assertEquals("localhost", locations[0]); ByteArrayOutputStream buf = new ByteArrayOutputStream(); DataOutput out = new DataOutputStream(buf); @@ -194,12 +203,12 @@ public void inputSplit() throws Exception { DataInput in = new DataInputStream(new ByteArrayInputStream(buf.toByteArray())); split.readFields(in); - Assert.assertEquals(3, split.getBucket()); - Assert.assertEquals(basename, split.getBaseDir().toString()); + assertEquals(3, split.getBucket()); + assertEquals(basename, split.getBaseDir().toString()); deltas = split.getDeltaDirs(); - Assert.assertEquals(2, deltas.length); - Assert.assertEquals(delta1, deltas[0].toString()); - Assert.assertEquals(delta2, deltas[1].toString()); + assertEquals(2, deltas.length); + assertEquals(delta1, deltas[0].toString()); + assertEquals(delta2, deltas[1].toString()); } @Test @@ -234,12 +243,12 @@ public void inputSplitNullBase() throws Exception { DataInput in = new DataInputStream(new ByteArrayInputStream(buf.toByteArray())); split.readFields(in); - Assert.assertEquals(3, split.getBucket()); + assertEquals(3, split.getBucket()); Assert.assertNull(split.getBaseDir()); deltas = split.getDeltaDirs(); - Assert.assertEquals(2, deltas.length); - Assert.assertEquals(delta1, deltas[0].toString()); - Assert.assertEquals(delta2, deltas[1].toString()); + assertEquals(2, deltas.length); + assertEquals(delta1, deltas[0].toString()); + assertEquals(delta2, deltas[1].toString()); } @Test @@ -264,7 +273,7 @@ public void sortedTable() throws Exception { // There should still be four directories in the location. FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + assertEquals(4, stat.length); } @Test @@ -291,7 +300,7 @@ public void sortedPartition() throws Exception { // There should still be four directories in the location. FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + assertEquals(4, stat.length); } @Test @@ -312,13 +321,13 @@ public void minorTableWithBase() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(5, stat.length); + assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewDelta = false; @@ -326,26 +335,26 @@ public void minorTableWithBase() throws Exception { if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 24) + "_v0000026")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(21, 24) + "_v0000026")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } else { LOG.debug("This is not the delta file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewDelta); + assertTrue(toString(stat), sawNewDelta); } /** @@ -372,20 +381,20 @@ public void minorWithOpenInMiddle() throws Exception { // since compaction was not run, state should not be "ready for cleaning" but "refused" ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals(TxnStore.REFUSED_RESPONSE, compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals(TxnStore.REFUSED_RESPONSE, compacts.get(0).getState()); // There should still be 4 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(toString(stat), 4, stat.length); + assertEquals(toString(stat), 4, stat.length); // Find the new delta file and make sure it has the right contents Arrays.sort(stat); - Assert.assertEquals("base_20", stat[0].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(21, 22), stat[1].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(23, 25), stat[2].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(26, 27), stat[3].getPath().getName()); + assertEquals("base_20", stat[0].getPath().getName()); + assertEquals(makeDeltaDirName(21, 22), stat[1].getPath().getName()); + assertEquals(makeDeltaDirName(23, 25), stat[2].getPath().getName()); + assertEquals(makeDeltaDirName(26, 27), stat[3].getPath().getName()); } @Test @@ -407,22 +416,22 @@ public void minorWithAborted() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 6 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(6, stat.length); + assertEquals(6, stat.length); // Find the new delta file and make sure it has the right contents Arrays.sort(stat); - Assert.assertEquals("base_20", stat[0].getPath().getName()); - Assert.assertEquals(makeDeleteDeltaDirNameCompacted(21, 27) + "_v0000028", stat[1].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); - Assert.assertEquals(makeDeltaDirNameCompacted(21, 27) + "_v0000028", stat[3].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(23, 25), stat[4].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(26, 27), stat[5].getPath().getName()); + assertEquals("base_20", stat[0].getPath().getName()); + assertEquals(makeDeleteDeltaDirNameCompacted(21, 27) + "_v0000028", stat[1].getPath().getName()); + assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); + assertEquals(makeDeltaDirNameCompacted(21, 27) + "_v0000028", stat[3].getPath().getName()); + assertEquals(makeDeltaDirName(23, 25), stat[4].getPath().getName()); + assertEquals(makeDeltaDirName(26, 27), stat[5].getPath().getName()); } @Test @@ -444,13 +453,13 @@ public void minorPartitionWithBase() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still be four directories in the location. FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); - Assert.assertEquals(5, stat.length); + assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewDelta = false; @@ -458,25 +467,25 @@ public void minorPartitionWithBase() throws Exception { if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 24) + "_v0000026")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(21, 24))) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } else { LOG.debug("This is not the delta file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewDelta); + assertTrue(toString(stat), sawNewDelta); } @Test @@ -496,13 +505,13 @@ public void minorTableNoBase() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + assertEquals(4, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewDelta = false; @@ -510,25 +519,25 @@ public void minorTableNoBase() throws Exception { if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(1, 4) + "_v0000006")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(1, 4) + "_v0000006")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } else { LOG.debug("This is not the delta file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewDelta); + assertTrue(toString(stat), sawNewDelta); } @Test @@ -549,13 +558,13 @@ public void majorTableWithBase() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + assertEquals(4, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewBase = false; @@ -563,16 +572,16 @@ public void majorTableWithBase() throws Exception { if (stat[i].getPath().getName().equals("base_0000024_v0000026")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(624L, buckets[0].getLen()); - Assert.assertEquals(624L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(624L, buckets[0].getLen()); + assertEquals(624L, buckets[1].getLen()); } else { LOG.debug("This is not the file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewBase); + assertTrue(toString(stat), sawNewBase); } @Test @@ -625,14 +634,14 @@ private void compactNoBaseLotsOfDeltas(CompactionType type) throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); /* delete_delta_21_23 and delete_delta_25_33 which are created as a result of compacting*/ int numFilesExpected = 11 + (type == CompactionType.MINOR ? 1 : 0); - Assert.assertEquals(numFilesExpected, stat.length); + assertEquals(numFilesExpected, stat.length); // Find the new delta file and make sure it has the right contents List matchesNotFound = new ArrayList<>(numFilesExpected); @@ -665,7 +674,7 @@ private void compactNoBaseLotsOfDeltas(CompactionType type) throws Exception { if(matchesNotFound.size() == 0) { return; } - Assert.assertTrue("Files remaining: " + matchesNotFound + "; " + toString(stat), false); + assertTrue("Files remaining: " + matchesNotFound + "; " + toString(stat), false); } @Test public void majorPartitionWithBase() throws Exception { @@ -687,13 +696,13 @@ public void majorPartitionWithBase() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still be four directories in the location. FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + assertEquals(4, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewBase = false; @@ -701,16 +710,16 @@ public void majorPartitionWithBase() throws Exception { if (stat[i].getPath().getName().equals("base_0000024_v0000026")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(624L, buckets[0].getLen()); - Assert.assertEquals(624L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(624L, buckets[0].getLen()); + assertEquals(624L, buckets[1].getLen()); } else { LOG.debug("This is not the file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewBase); + assertTrue(toString(stat), sawNewBase); } @Test @@ -730,13 +739,13 @@ public void majorTableNoBase() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should now be 3 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(3, stat.length); + assertEquals(3, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewBase = false; @@ -744,16 +753,16 @@ public void majorTableNoBase() throws Exception { if (stat[i].getPath().getName().equals("base_0000004_v0000005")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(104L, buckets[0].getLen()); - Assert.assertEquals(104L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(104L, buckets[0].getLen()); + assertEquals(104L, buckets[1].getLen()); } else { LOG.debug("This is not the file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewBase); + assertTrue(toString(stat), sawNewBase); } private static String toString(FileStatus[] stat) { @@ -785,8 +794,8 @@ public void majorTableLegacy() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); @@ -799,16 +808,16 @@ public void majorTableLegacy() throws Exception { if (stat[i].getPath().getName().equals("base_0000024_v0000026")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(624L, buckets[0].getLen()); - Assert.assertEquals(624L, buckets[1].getLen()); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(624L, buckets[0].getLen()); + assertEquals(624L, buckets[1].getLen()); } else { LOG.debug("This is not the file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewBase); + assertTrue(toString(stat), sawNewBase); } @Test @@ -829,8 +838,8 @@ public void minorTableLegacy() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); @@ -842,14 +851,14 @@ public void minorTableLegacy() throws Exception { if (stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21, 24) + "_v0000026")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); } else { LOG.debug("This is not the file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewDelta); + assertTrue(toString(stat), sawNewDelta); } @Test @@ -873,13 +882,13 @@ public void majorPartitionWithBaseMissingBuckets() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still be four directories in the location. FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + assertEquals(4, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewBase = false; @@ -887,11 +896,11 @@ public void majorPartitionWithBaseMissingBuckets() throws Exception { if (stat[i].getPath().getName().equals("base_0000026_v0000028")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(stat[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER); - Assert.assertEquals(2, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + assertEquals(2, buckets.length); + assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); // Bucket 0 should be small and bucket 1 should be large, make sure that's the case - Assert.assertTrue( + assertTrue( ("bucket_00000".equals(buckets[0].getPath().getName()) && 104L == buckets[0].getLen() && "bucket_00001".equals(buckets[1].getPath().getName()) && 676L == buckets[1] .getLen()) @@ -904,7 +913,7 @@ public void majorPartitionWithBaseMissingBuckets() throws Exception { LOG.debug("This is not the file you are looking for " + stat[i].getPath().getName()); } } - Assert.assertTrue(toString(stat), sawNewBase); + assertTrue(toString(stat), sawNewBase); } @Test @@ -926,21 +935,21 @@ public void majorWithOpenInMiddle() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(5, stat.length); + assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents Arrays.sort(stat); - Assert.assertEquals("base_0000022_v0000028", stat[0].getPath().getName()); - Assert.assertEquals("base_20", stat[1].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(23, 25), stat[3].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(26, 27), stat[4].getPath().getName()); + assertEquals("base_0000022_v0000028", stat[0].getPath().getName()); + assertEquals("base_20", stat[1].getPath().getName()); + assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); + assertEquals(makeDeltaDirName(23, 25), stat[3].getPath().getName()); + assertEquals(makeDeltaDirName(26, 27), stat[4].getPath().getName()); } @Test @@ -962,21 +971,21 @@ public void majorWithAborted() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(5, stat.length); + assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents Arrays.sort(stat); - Assert.assertEquals("base_0000027_v0000028", stat[0].getPath().getName()); - Assert.assertEquals("base_20", stat[1].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(23, 25), stat[3].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(26, 27), stat[4].getPath().getName()); + assertEquals("base_0000027_v0000028", stat[0].getPath().getName()); + assertEquals("base_20", stat[1].getPath().getName()); + assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); + assertEquals(makeDeltaDirName(23, 25), stat[3].getPath().getName()); + assertEquals(makeDeltaDirName(26, 27), stat[4].getPath().getName()); } @Override boolean useHive130DeltaDirName() { @@ -1008,10 +1017,10 @@ public void testWorkerAndInitiatorVersion() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); - Assert.assertEquals(initiatorVersion, compacts.get(0).getInitiatorVersion()); - Assert.assertEquals(workerVersion, compacts.get(0).getWorkerVersion()); + assertEquals(1, compacts.size()); + assertEquals("ready for cleaning", compacts.get(0).getState()); + assertEquals(initiatorVersion, compacts.get(0).getInitiatorVersion()); + assertEquals(workerVersion, compacts.get(0).getWorkerVersion()); } @@ -1072,7 +1081,7 @@ public void droppedTable() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(0, compacts.size()); + assertEquals(0, compacts.size()); } @Test @@ -1097,7 +1106,7 @@ public void droppedPartition() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(0, compacts.size()); + assertEquals(0, compacts.size()); } @Test @@ -1148,8 +1157,8 @@ public void insertOnlyDisabled() throws Exception { ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); - Assert.assertEquals("failed", compacts.get(0).getState()); + assertEquals(1, compacts.size()); + assertEquals("failed", compacts.get(0).getState()); } @@ -1162,21 +1171,21 @@ private void verifyTxn1IsAborted(int compactionNum, Table t, CompactionType type // Compaction should not have run on a single delta file FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(1, stat.length); - Assert.assertEquals(makeDeltaDirName(0, 2), stat[0].getPath().getName()); + assertEquals(1, stat.length); + assertEquals(makeDeltaDirName(0, 2), stat[0].getPath().getName()); // State should not be "ready for cleaning" because we skip cleaning List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); - Assert.assertEquals(compactionNum + 1, compacts.size()); - Assert.assertEquals(TxnStore.REFUSED_RESPONSE, compacts.get(compactionNum).getState()); + assertEquals(compactionNum + 1, compacts.size()); + assertEquals(TxnStore.REFUSED_RESPONSE, compacts.get(compactionNum).getState()); // assert transaction with txnId=1 is still aborted after cleaner is run startCleaner(); List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); - Assert.assertEquals(1, openTxns.get(0).getId()); - Assert.assertEquals(TxnState.ABORTED, openTxns.get(0).getState()); + assertEquals(1, openTxns.get(0).getId()); + assertEquals(TxnState.ABORTED, openTxns.get(0).getState()); } // With high timeout, but fast run we should finish without a problem @@ -1197,6 +1206,186 @@ public void testTimeoutWithoutInterrupt() throws Exception { runTimeoutTest(1, true, true); } + @Test + public void testExceptionWhenTxnCommitAndMarkFailed() throws Exception { + prepareTableAndCompaction("default", "tbforcomperror"); + runWorkerWithException(MethodToFail.COMMIT_TXN, MethodToFail.MARK_FAILED); + + List compacts = + txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + assertEquals(TxnStore.WORKING_RESPONSE, compacts.get(0).getState()); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(1, openTxns.size()); + TxnInfo txn = openTxns.get(0); + assertEquals(compacts.get(0).getTxnId(), txn.getId()); + assertEquals(TxnState.OPEN, txn.getState()); + txnHandler.abortTxn(new AbortTxnRequest(txn.getId())); + } + + @Test + public void testExceptionWhenTxnCommit() throws Exception { + prepareTableAndCompaction("default", "tbforcomperror"); + runWorkerWithException(MethodToFail.COMMIT_TXN); + + List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + ShowCompactResponseElement compaction = compacts.get(0); + assertEquals(TxnStore.FAILED_RESPONSE, compaction.getState()); + assertEquals("Simulated failure in commitTxn", compaction.getErrorMessage()); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(1, openTxns.size()); + TxnInfo txn = openTxns.get(0); + assertEquals(compaction.getTxnId(), txn.getId()); + assertEquals(TxnState.OPEN, txn.getState()); + txnHandler.abortTxn(new AbortTxnRequest(txn.getId())); + } + + @Test + public void testExceptionWhenMarkCompacted() throws Exception { + prepareTableAndCompaction("default", "tbforcomperror"); + runWorkerWithException(MethodToFail.MARK_COMPACTED); + + List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + ShowCompactResponseElement compaction = compacts.get(0); + assertEquals(TxnStore.FAILED_RESPONSE, compaction.getState()); + assertEquals("Simulated failure in markCompacted", compaction.getErrorMessage()); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(0, openTxns.size()); + } + + @Test + public void testExceptionDuringCompact() throws Exception { + prepareTableAndCompaction("default", "tbforcomperror"); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST, true); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TEST_MODE_FAIL_COMPACTION, true); + startWorker(); + + List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + ShowCompactResponseElement compaction = compacts.get(0); + assertEquals(TxnStore.FAILED_RESPONSE, compaction.getState()); + assertEquals("HIVE_TEST_MODE_FAIL_COMPACTION=true", compaction.getErrorMessage()); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(1, openTxns.size()); + TxnInfo txn = openTxns.get(0); + assertEquals(compaction.getTxnId(), txn.getId()); + assertEquals(TxnState.ABORTED, txn.getState()); + } + + @Test + public void testWorkerIfIsDynPartAbort() throws Exception { + String dbName = "default"; + String tableName = "tbforcomperror"; + Table t = newTable(dbName, tableName, true); + addBaseFile(t, null, 1L, 3, 1); + addDeltaFile(t, null, 2L, 2L, 1); + addDeltaFile(t, null, 3L, 3L, 1); + addDeltaFile(t, null, 4L, 4L, 1); + burnThroughTransactions(dbName, tableName, 4, null, null); + // trigger compaction + CompactionRequest rqst = new CompactionRequest(dbName, tableName, CompactionType.MAJOR); + rqst.setPartitionname(null); + txnHandler.compact(rqst); + startWorker(); + + List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + ShowCompactResponseElement compaction = compacts.get(0); + assertEquals(TxnStore.CLEANING_RESPONSE, compaction.getState()); + assertTrue(compaction.getNextTxnId() > 0L); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(0, openTxns.size()); + } + + @Test + public void testWorkerNotEnoughToCompact() throws Exception { + String dbName = "default"; + String tableName = "tbforcomperror"; + Table t = newTable(dbName, tableName, false); + addBaseFile(t, null, 1L, 3, 1); + burnThroughTransactions(dbName, tableName, 1, null, null); + // trigger compaction + CompactionRequest rqst = new CompactionRequest(dbName, tableName, CompactionType.MAJOR); + txnHandler.compact(rqst); + startWorker(); + + List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + ShowCompactResponseElement compaction = compacts.get(0); + assertEquals(TxnStore.REFUSED_RESPONSE, compaction.getState()); + assertTrue(compaction.getErrorMessage().contains("None of the compaction thresholds met, compaction request is refused!")); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(0, openTxns.size()); + } + + @Test + public void testWorkerNotEnoughToCompactNeedsCleaning() throws Exception { + String dbName = "default"; + String tableName = "tbforcomperror"; + Table t = newTable(dbName, tableName, false); + addDeltaFile(t, null, 20L, 20L, 10); + addDeltaFile(t, null, 21L, 21L, 10); + addDeltaFile(t, null, 22L, 22L, 10); + addDeltaFile(t, null, 23L, 23L, 10); + addDeltaFile(t, null, 24L, 24L, 10); + burnThroughTransactions(dbName, tableName, 25, null, new HashSet(Arrays.asList(20L, 21L, 22L, 23L, 24L))); + // trigger compaction + CompactionRequest rqst = new CompactionRequest(dbName, tableName, CompactionType.MAJOR); + txnHandler.compact(rqst); + startWorker(); + + List compacts = txnHandler.showCompact(new ShowCompactRequest()).getCompacts(); + ShowCompactResponseElement compaction = compacts.get(0); + assertEquals(TxnStore.CLEANING_RESPONSE, compaction.getState()); + assertTrue(compaction.getNextTxnId() > 0L); + List openTxns = HiveMetaStoreUtils.getHiveMetastoreClient(conf).showTxns().getOpen_txns(); + assertEquals(5, openTxns.size()); + assertEquals(20L, openTxns.get(0).getId()); + assertEquals(21L, openTxns.get(1).getId()); + assertEquals(22L, openTxns.get(2).getId()); + assertEquals(23L, openTxns.get(3).getId()); + assertEquals(24L, openTxns.get(4).getId()); + + } + + private void runWorkerWithException(MethodToFail... methodToFail) throws Exception { + IMetaStoreClient spyMsc = Mockito.spy(ms); + for (MethodToFail method: methodToFail) { + switch (method) { + case MARK_FAILED -> doThrow(new TTransportException("Simulated failure in markFailed")).when(spyMsc).markFailed(any()); + case COMMIT_TXN -> doThrow(new TException("Simulated failure in commitTxn")).when(spyMsc).commitTxn(anyLong()); + case MARK_COMPACTED -> doThrow(new TTransportException("Simulated failure in markCompacted")).when(spyMsc).markCompacted(any()); + } + } + + TestTxnDbUtil.setConfValues(conf); + Worker worker = Mockito.spy(new Worker()); + worker.setConf(conf); + AtomicBoolean stop = new AtomicBoolean(); + stop.set(true); + worker.init(stop); + worker.msc = spyMsc; + worker.setName("testworker"); + CompactorThread ct = worker; + ct.run(); + } + + private void prepareTableAndCompaction(String dbName, String tableName) throws Exception { + Table t = newTable(dbName, tableName, false); + addBaseFile(t, null, 20L, 20); + addDeltaFile(t, null, 21L, 23L, 2); + addDeltaFile(t, null, 23L, 23L, 3); + addDeltaFile(t, null, 24L, 24L, 2); + addDeltaFile(t, null, 25L, 25L, 3); + addDeltaFile(t, null, 26L, 26L, 3); + burnThroughTransactions(dbName, tableName, 27, null, null); + // trigger compaction + CompactionRequest rqst = new CompactionRequest(dbName, tableName, CompactionType.MAJOR); + txnHandler.compact(rqst); + } + + enum MethodToFail { + MARK_COMPACTED, + MARK_FAILED, + COMMIT_TXN; + } + private void runTimeoutTest(long timeout, boolean runForever, boolean swallowInterrupt) throws Exception { ExecutorService executor = Executors.newSingleThreadExecutor(); HiveConf timeoutConf = new HiveConf(conf); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java index 3cf665472c2e..c72962625618 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java @@ -21,7 +21,9 @@ import org.apache.hadoop.io.Text; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import org.junit.Test; /** @@ -40,8 +42,101 @@ public void testUnhexConversion(){ UDFUnhex udf = new UDFUnhex(); byte[] output = udf.evaluate(hex); assertEquals(expected.length,output.length); - for (int i = 0; i < expected.length; i++){ - assertEquals(expected[i], output[i]); - } + assertArrayEquals(expected, output); + } + + @Test + public void testUnhexOddLength() { + UDFUnhex udf = new UDFUnhex(); + + Text hex1 = new Text("A"); + byte[] expected1 = new byte[] {(byte) 0x0A}; + assertArrayEquals(expected1, udf.evaluate(hex1)); + + Text hex2 = new Text("123"); + byte[] expected2 = new byte[] {(byte) 0x01, (byte) 0x23}; + assertArrayEquals(expected2, udf.evaluate(hex2)); + } + + @Test + public void testUnhexInvalidCharacters() { + UDFUnhex udf = new UDFUnhex(); + + Text hex = new Text("7374G9"); + assertNull("Should return null for invalid hex characters", udf.evaluate(hex)); + + Text hexOddInvalid = new Text("12G"); + assertNull("Should return null for invalid hex characters in odd length string", udf.evaluate(hexOddInvalid)); + + Text hexOddInvalidSingleChar = new Text("G"); + assertNull("Should return null for invalid hex character in odd-length input", + udf.evaluate(hexOddInvalidSingleChar)); + + Text hexInvalidLow = new Text("0G"); + assertNull("Should return null when low nibble is invalid", udf.evaluate(hexInvalidLow)); + + Text hexInvalidHigh = new Text("G0"); + assertNull("Should return null when high nibble is invalid", udf.evaluate(hexInvalidHigh)); + } + + @Test + public void testUnhexNullEmptyCases() { + UDFUnhex udf = new UDFUnhex(); + + assertNull(udf.evaluate(null)); + + Text hexEmpty = new Text(""); + byte[] expectedEmpty = new byte[0]; + assertArrayEquals(expectedEmpty, udf.evaluate(hexEmpty)); + } + + @Test + public void testUnhexMixedCase() { + UDFUnhex udf = new UDFUnhex(); + + Text hex = new Text("aABb9"); + byte[] expected = new byte[] {(byte) 0x0A, (byte) 0xAB, (byte) 0xB9}; + assertArrayEquals(expected, udf.evaluate(hex)); + } + + @Test + public void testUnhexLowerCase() { + UDFUnhex udf = new UDFUnhex(); + + Text hexLowerPair = new Text("ab"); + assertArrayEquals(new byte[] {(byte) 0xAB}, udf.evaluate(hexLowerPair)); + + Text hexLowerOddLength = new Text("abc"); + assertArrayEquals(new byte[] {(byte) 0x0A, (byte) 0xBC}, udf.evaluate(hexLowerOddLength)); + + Text hexLowerDigits = new Text("0123456789abcdef"); + byte[] expectedLowerDigits = new byte[] { + (byte) 0x01, (byte) 0x23, (byte) 0x45, (byte) 0x67, + (byte) 0x89, (byte) 0xAB, (byte) 0xCD, (byte) 0xEF + }; + assertArrayEquals(expectedLowerDigits, udf.evaluate(hexLowerDigits)); + } + + @Test + public void testUnhexBoundaryValues() { + UDFUnhex udf = new UDFUnhex(); + + Text hexMinByte = new Text("00"); + assertArrayEquals(new byte[] {(byte) 0x00}, udf.evaluate(hexMinByte)); + + Text hexMaxByteUpper = new Text("FF"); + assertArrayEquals(new byte[] {(byte) 0xFF}, udf.evaluate(hexMaxByteUpper)); + + Text hexMaxByteLower = new Text("ff"); + assertArrayEquals(new byte[] {(byte) 0xFF}, udf.evaluate(hexMaxByteLower)); + + Text hexOddMinDigit = new Text("0"); + assertArrayEquals(new byte[] {(byte) 0x00}, udf.evaluate(hexOddMinDigit)); + + Text hexOddMaxUpper = new Text("F"); + assertArrayEquals(new byte[] {(byte) 0x0F}, udf.evaluate(hexOddMaxUpper)); + + Text hexOddMaxLower = new Text("f"); + assertArrayEquals(new byte[] {(byte) 0x0F}, udf.evaluate(hexOddMaxLower)); } } diff --git a/ql/src/test/queries/clientpositive/db_notification_batch_insert.q b/ql/src/test/queries/clientpositive/db_notification_batch_insert.q new file mode 100644 index 000000000000..ad537615d1e2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/db_notification_batch_insert.q @@ -0,0 +1,25 @@ +set hive.metastore.transactional.event.listeners=org.apache.hive.hcatalog.listener.DbNotificationListener; +set metastore.jdbc.max.batch.size=10; + +set hive.stats.autogather=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.stats.reliable=true; + +DROP TABLE IF EXISTS repro_batch_test; + +CREATE TABLE repro_batch_test ( + id INT, + name STRING +) +PARTITIONED BY (part_key INT); + +INSERT INTO TABLE repro_batch_test PARTITION (part_key) +SELECT + val as id, + 'dummy_data' as name, + val as part_key +FROM ( + SELECT (a.pos + 1) as val + FROM (SELECT posexplode(split(repeat(',', 10), ','))) a +) dummy; diff --git a/ql/src/test/queries/clientpositive/join_common_rhs_alias.q b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q new file mode 100644 index 000000000000..b691f6c88039 --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q @@ -0,0 +1,8 @@ +CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING); + +INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a"); + +SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2 +ON t1.c1 == t2.c1 +AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 ) +WHERE t2.c1 IS NULL; diff --git a/ql/src/test/queries/clientpositive/lateral_view_outer.q b/ql/src/test/queries/clientpositive/lateral_view_outer.q index ddb41a8fd099..80f92cebee58 100644 --- a/ql/src/test/queries/clientpositive/lateral_view_outer.q +++ b/ql/src/test/queries/clientpositive/lateral_view_outer.q @@ -13,4 +13,23 @@ create table array_valued as select key, if (key > 300, array(value, value), nul explain select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; +explain ast select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; +explain cbo +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10; + +-- array_valued already has a nullable array column, which can be used for the view-based test +CREATE VIEW array_valued_view AS +SELECT array_valued.key AS key, a +FROM array_valued +LATERAL VIEW OUTER explode(value) lv AS a; + +-- CBO plan should contain `outer=[true]` in HiveTableFunctionScan node. +EXPLAIN CBO +SELECT key, a FROM array_valued_view limit 10; +-- Explain plan should contain `outer lateral view: true` in the UDTF Operator +EXPLAIN +SELECT key, a FROM array_valued_view limit 10; +-- Rows with null array value should still appear with a=NULL +SELECT key, a FROM array_valued_view limit 10; diff --git a/ql/src/test/queries/clientpositive/lineage8.q b/ql/src/test/queries/clientpositive/lineage8.q new file mode 100644 index 000000000000..959376eaf417 --- /dev/null +++ b/ql/src/test/queries/clientpositive/lineage8.q @@ -0,0 +1,19 @@ +set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger; + +create table table_1 (id1 int, id2 int); +create table table_2 (id1 int, id2 int); + +create table table_3 as +select id1 from table_1 t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2; + +create table table_4 as +select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2; + +create table table_5 as +select t.id1 from +(select id1 from table_1 t1 where t1.id2 = 1) t +join table_2 t1 on t.id1 = t1.id2; diff --git a/ql/src/test/queries/clientpositive/parquet_decimal64.q b/ql/src/test/queries/clientpositive/parquet_decimal64.q new file mode 100644 index 000000000000..ce06dee45c54 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_decimal64.q @@ -0,0 +1,15 @@ +--! qt:replace:/(\s+Statistics: Num rows: \d+)/#Masked#/ +set hive.vectorized.execution.enabled=true; +set hive.explain.user=false; + +drop table if exists dec64_parquet; + +create table dec64_parquet (k int, d decimal(7,2)) stored as parquet; +insert into dec64_parquet values + (1, 1.10), (1, 2.20), (2, 3.30), (2, 4.40), (3, 5.50), (3, cast(null as decimal(7,2))); + +-- Verifies that the Parquet vectorized reader engages the DECIMAL_64 path: +explain vectorization detail +select k, sum(d) from dec64_parquet group by k; + +select k, sum(d) from dec64_parquet group by k order by k; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/partitions_filter_default.q b/ql/src/test/queries/clientpositive/partitions_filter_default.q index f265133a7029..f1b1747bfb7c 100644 --- a/ql/src/test/queries/clientpositive/partitions_filter_default.q +++ b/ql/src/test/queries/clientpositive/partitions_filter_default.q @@ -1,3 +1,4 @@ +--! qt:disabled:HIVE-25965 create table ptestfilter (a string) partitioned by (c int); INSERT OVERWRITE TABLE ptestfilter PARTITION (c) select 'Col1', null; diff --git a/ql/src/test/queries/clientpositive/pcr_null_partition.q b/ql/src/test/queries/clientpositive/pcr_null_partition.q new file mode 100644 index 000000000000..e8edafe9a47d --- /dev/null +++ b/ql/src/test/queries/clientpositive/pcr_null_partition.q @@ -0,0 +1,17 @@ +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.fetch.task.conversion=none; + +drop table if exists pcr_t1; +create table pcr_t1 (key string, value string) partitioned by (ds string); + +insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08'; +insert into pcr_t1 partition (ds) select 'B', 'V2', 'null'; +insert into pcr_t1 partition (ds) select 'C', 'V3', null; + +explain select key, value, ds from pcr_t1 where ds is null; +select key, value, ds from pcr_t1 where ds is null; + +explain select key, value, ds from pcr_t1 where ds is not null; +select key, value, ds from pcr_t1 where ds is not null order by key; + +select key, value, ds from pcr_t1 where ds = 'null'; diff --git a/ql/src/test/queries/clientpositive/show_columns_like.q b/ql/src/test/queries/clientpositive/show_columns_like.q new file mode 100644 index 000000000000..3cee9698f3a4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/show_columns_like.q @@ -0,0 +1,29 @@ +CREATE DATABASE IF NOT EXISTS col_test_db; +USE col_test_db; + +CREATE TABLE wildcard_table ( + id_primary INT, + id_secondary INT, + idxprimary INT, + name_first STRING, + name_last STRING, + MixedCaseColumn INT, + another_Mixed_Col STRING +); + +SHOW COLUMNS FROM wildcard_table LIKE 'id%'; +SHOW COLUMNS FROM wildcard_table LIKE 'name_%'; +-- Case Insensitivity test +SHOW COLUMNS FROM wildcard_table LIKE 'mixedcase%'; +SHOW COLUMNS FROM wildcard_table LIKE 'another_mixed_col'; +SHOW COLUMNS FROM wildcard_table LIKE 'id*'; +SHOW COLUMNS FROM wildcard_table LIKE 'id_primary|name_first'; + +-- Additional tests for '_' and empty results +SHOW COLUMNS FROM wildcard_table LIKE 'id_secondar_'; +SHOW COLUMNS FROM wildcard_table LIKE 'id__rimary'; +SHOW COLUMNS FROM wildcard_table LIKE 'abc%'; +SHOW COLUMNS FROM wildcard_table LIKE 'id__'; +SHOW COLUMNS FROM wildcard_table LIKE 'id\_primary'; + +DROP DATABASE col_test_db CASCADE; diff --git a/ql/src/test/queries/clientpositive/show_tables.q b/ql/src/test/queries/clientpositive/show_tables.q index 97e48921a69f..bc48f60c0c2a 100644 --- a/ql/src/test/queries/clientpositive/show_tables.q +++ b/ql/src/test/queries/clientpositive/show_tables.q @@ -53,3 +53,7 @@ USE `database`; CREATE TABLE foo_n4(a INT); USE default; SHOW TABLES FROM `database` LIKE "foo_n4"; + +-- SHOW TABLES with legacy glob and regex +SHOW TABLES LIKE 'shtb_*'; +SHOW TABLES LIKE 'shtb_test1_n0|shtb_test2_n0'; diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out index 85c049fe39d9..ea4e1ccd766b 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out @@ -137,10 +137,12 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink + Filter Operator + predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: explain select * from loc_orc_n4 PREHOOK: type: QUERY @@ -228,10 +230,12 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink + Filter Operator + predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: explain select * from loc_orc_n4 PREHOOK: type: QUERY @@ -283,10 +287,12 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - ListSink + Filter Operator + predicate: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + ListSink PREHOOK: query: explain select * from loc_orc_n4 where year='2001' and year='__HIVE_DEFAULT_PARTITION__' PREHOOK: type: QUERY @@ -475,10 +481,12 @@ STAGE PLANS: TableScan alias: loc_orc_n4 filterExpr: (year <> '2001') (type: boolean) - Select Operator - expressions: state (type: string), locid (type: int) - outputColumnNames: _col0, _col1 - ListSink + Filter Operator + predicate: (year <> '2001') (type: boolean) + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: _col0, _col1 + ListSink PREHOOK: query: explain select * from loc_orc_n4 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/db_ddl_explain.q.out b/ql/src/test/results/clientpositive/llap/db_ddl_explain.q.out index 257d69751fde..3dc0c6bbc39a 100644 --- a/ql/src/test/results/clientpositive/llap/db_ddl_explain.q.out +++ b/ql/src/test/results/clientpositive/llap/db_ddl_explain.q.out @@ -159,6 +159,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-0 Drop Database + catalog: hive database: d if exists: false diff --git a/ql/src/test/results/clientpositive/llap/db_notification_batch_insert.q.out b/ql/src/test/results/clientpositive/llap/db_notification_batch_insert.q.out new file mode 100644 index 000000000000..7ab6820cf149 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/db_notification_batch_insert.q.out @@ -0,0 +1,79 @@ +PREHOOK: query: DROP TABLE IF EXISTS repro_batch_test +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: DROP TABLE IF EXISTS repro_batch_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: CREATE TABLE repro_batch_test ( + id INT, + name STRING +) +PARTITIONED BY (part_key INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repro_batch_test +POSTHOOK: query: CREATE TABLE repro_batch_test ( + id INT, + name STRING +) +PARTITIONED BY (part_key INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repro_batch_test +PREHOOK: query: INSERT INTO TABLE repro_batch_test PARTITION (part_key) +SELECT + val as id, + 'dummy_data' as name, + val as part_key +FROM ( + SELECT (a.pos + 1) as val + FROM (SELECT posexplode(split(repeat(',', 10), ','))) a +) dummy +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@repro_batch_test +POSTHOOK: query: INSERT INTO TABLE repro_batch_test PARTITION (part_key) +SELECT + val as id, + 'dummy_data' as name, + val as part_key +FROM ( + SELECT (a.pos + 1) as val + FROM (SELECT posexplode(split(repeat(',', 10), ','))) a +) dummy +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@repro_batch_test +POSTHOOK: Output: default@repro_batch_test@part_key=1 +POSTHOOK: Output: default@repro_batch_test@part_key=10 +POSTHOOK: Output: default@repro_batch_test@part_key=11 +POSTHOOK: Output: default@repro_batch_test@part_key=2 +POSTHOOK: Output: default@repro_batch_test@part_key=3 +POSTHOOK: Output: default@repro_batch_test@part_key=4 +POSTHOOK: Output: default@repro_batch_test@part_key=5 +POSTHOOK: Output: default@repro_batch_test@part_key=6 +POSTHOOK: Output: default@repro_batch_test@part_key=7 +POSTHOOK: Output: default@repro_batch_test@part_key=8 +POSTHOOK: Output: default@repro_batch_test@part_key=9 +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=10).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=10).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=11).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=11).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=1).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=1).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=2).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=2).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=3).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=3).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=4).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=4).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=5).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=5).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=6).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=6).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=7).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=7).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=8).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=8).name SIMPLE [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=9).id SCRIPT [] +POSTHOOK: Lineage: repro_batch_test PARTITION(part_key=9).name SIMPLE [] diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out index deefd0129395..ce11a0b74da3 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out @@ -276,8 +276,11 @@ STAGE PLANS: alias: dynamic_part_table filterExpr: ((partcol1 = '1') and (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__')) (type: boolean) GatherStats: false - Select Operator - expressions: intcol (type: string) - outputColumnNames: _col0 - ListSink + Filter Operator + isSamplingPred: false + predicate: (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') (type: boolean) + Select Operator + expressions: intcol (type: string) + outputColumnNames: _col0 + ListSink diff --git a/ql/src/test/results/clientpositive/llap/folder_predicate.q.out b/ql/src/test/results/clientpositive/llap/folder_predicate.q.out index f8b2ef3663ef..1e67ce4271a4 100644 --- a/ql/src/test/results/clientpositive/llap/folder_predicate.q.out +++ b/ql/src/test/results/clientpositive/llap/folder_predicate.q.out @@ -41,9 +41,9 @@ STAGE PLANS: Processor Tree: TableScan alias: predicate_fold_tb - filterExpr: (value is null or (value < 3) or (value > 3)) (type: boolean) + filterExpr: ((value <> 3) or value is null) (type: boolean) Filter Operator - predicate: (value is null or (value < 3) or (value > 3)) (type: boolean) + predicate: ((value <> 3) or value is null) (type: boolean) Select Operator expressions: value (type: int) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out new file mode 100644 index 000000000000..1ce5e8286210 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out @@ -0,0 +1,35 @@ +PREHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab +POSTHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab +POSTHOOK: Lineage: tab.c1 SCRIPT [] +POSTHOOK: Lineage: tab.c2 SCRIPT [] +POSTHOOK: Lineage: tab.c3 SCRIPT [] +PREHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2 +ON t1.c1 == t2.c1 +AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 ) +WHERE t2.c1 IS NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2 +ON t1.c1 == t2.c1 +AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 ) +WHERE t2.c1 IS NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +#### A masked pattern was here #### +a a aa +b b ba diff --git a/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out b/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out index c5959dce36b5..a44dddfc09e5 100644 --- a/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out +++ b/ql/src/test/results/clientpositive/llap/lateral_view_outer.q.out @@ -187,6 +187,59 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 ListSink +PREHOOK: query: explain ast +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +#### A masked pattern was here #### +POSTHOOK: query: explain ast +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +#### A masked pattern was here #### +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_LATERAL_VIEW_OUTER + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + explode + TOK_TABLE_OR_COL + value + a + TOK_TABALIAS + C + TOK_TABREF + TOK_TABNAME + array_valued + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_LIMIT + 10 + +PREHOOK: query: explain cbo +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[10]) + HiveProject(array_valued.key=[$0], array_valued.value=[$1], c.a=[$6]) + HiveTableFunctionScan(invocation=[LATERAL(explode($1), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) ARRAY value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, VARCHAR(2147483647) c.a)], outer=[true]) + HiveTableScan(table=[[default, array_valued]], table:alias=[array_valued]) + PREHOOK: query: select * from array_valued LATERAL VIEW OUTER explode(value) C AS a limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@array_valued @@ -205,3 +258,110 @@ POSTHOOK: Input: default@array_valued 409 ["val_409","val_409"] val_409 255 NULL NULL 278 NULL NULL +PREHOOK: query: CREATE VIEW array_valued_view AS +SELECT array_valued.key AS key, a +FROM array_valued +LATERAL VIEW OUTER explode(value) lv AS a +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@array_valued +PREHOOK: Output: database:default +PREHOOK: Output: default@array_valued_view +POSTHOOK: query: CREATE VIEW array_valued_view AS +SELECT array_valued.key AS key, a +FROM array_valued +LATERAL VIEW OUTER explode(value) lv AS a +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@array_valued +POSTHOOK: Output: database:default +POSTHOOK: Output: default@array_valued_view +POSTHOOK: Lineage: array_valued_view.a SCRIPT [(array_valued)array_valued.FieldSchema(name:value, type:array, comment:null), ] +POSTHOOK: Lineage: array_valued_view.key SIMPLE [(array_valued)array_valued.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: EXPLAIN CBO +SELECT key, a FROM array_valued_view limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +PREHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT key, a FROM array_valued_view limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +POSTHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[10]) + HiveProject(key=[$0], a=[$6]) + HiveTableFunctionScan(invocation=[LATERAL(explode($1), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) ARRAY value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, VARCHAR(2147483647) lv.a)], outer=[true]) + HiveTableScan(table=[[default, array_valued]], table:alias=[array_valued]) + +PREHOOK: query: EXPLAIN +SELECT key, a FROM array_valued_view limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +PREHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT key, a FROM array_valued_view limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +POSTHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: array_valued + properties: + insideView TRUE + Lateral View Forward + Select Operator + expressions: key (type: string) + outputColumnNames: key + Lateral View Join Operator + outputColumnNames: _col0, _col6 + Limit + Number of rows: 10 + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + ListSink + Select Operator + expressions: value (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + outer lateral view: true + Lateral View Join Operator + outputColumnNames: _col0, _col6 + Limit + Number of rows: 10 + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT key, a FROM array_valued_view limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@array_valued +PREHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, a FROM array_valued_view limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@array_valued +POSTHOOK: Input: default@array_valued_view +#### A masked pattern was here #### +238 NULL +86 NULL +311 val_311 +311 val_311 +27 NULL +165 NULL +409 val_409 +409 val_409 +255 NULL +278 NULL diff --git a/ql/src/test/results/clientpositive/llap/lineage8.q.out b/ql/src/test/results/clientpositive/llap/lineage8.q.out new file mode 100644 index 000000000000..6f8334018da7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/lineage8.q.out @@ -0,0 +1,41 @@ +PREHOOK: query: create table table_1 (id1 int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_1 +PREHOOK: query: create table table_2 (id1 int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_2 +PREHOOK: query: create table table_3 as +select id1 from table_1 t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@table_1 +PREHOOK: Input: default@table_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@table_3 +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"24a0f860f60a1b7d5f350fd8eb164a37","queryText":"create table table_3 as\nselect id1 from table_1 t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 2","edges":[{"sources":[1,2],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"(t1.id2 = 1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_3.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_2.id1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]} +PREHOOK: query: create table table_4 as +select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@table_1 +PREHOOK: Input: default@table_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@table_4 +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"761d0cf34076cec77766bf7af8f1cbe9","queryText":"create table table_4 as\nselect id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 2","edges":[{"sources":[1],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"expression":"((t1.id1 = 3) and (t1.id2 = 1))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_4.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_2.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]} +PREHOOK: query: create table table_5 as +select t.id1 from +(select id1 from table_1 t1 where t1.id2 = 1) t +join table_2 t1 on t.id1 = t1.id2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@table_1 +PREHOOK: Input: default@table_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@table_5 +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"615bb67f6ff2dd50695bffd14c296677","queryText":"create table table_5 as\nselect t.id1 from\n(select id1 from table_1 t1 where t1.id2 = 1) t\njoin table_2 t1 on t.id1 = t1.id2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,1],"targets":[0],"expression":"((t1.id2 = 1) and t1.id1 is not null)","edgeType":"PREDICATE"},{"sources":[1,3],"targets":[0],"expression":"(t1.id1 = t1.id2)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"t1.id2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_5.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]} diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index dcc7c103b771..cb2d50d73666 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -627,7 +627,7 @@ STAGE PLANS: alias: orc_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) @@ -695,10 +695,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index 907c374f1841..289cc55a5834 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -146,8 +146,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -267,8 +267,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -454,8 +454,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -575,8 +575,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -762,8 +762,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -883,8 +883,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_decimal64.q.out b/ql/src/test/results/clientpositive/llap/parquet_decimal64.q.out new file mode 100644 index 000000000000..cb52b3c0393d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_decimal64.q.out @@ -0,0 +1,171 @@ +PREHOOK: query: drop table if exists dec64_parquet +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists dec64_parquet +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: create table dec64_parquet (k int, d decimal(7,2)) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dec64_parquet +POSTHOOK: query: create table dec64_parquet (k int, d decimal(7,2)) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dec64_parquet +PREHOOK: query: insert into dec64_parquet values + (1, 1.10), (1, 2.20), (2, 3.30), (2, 4.40), (3, 5.50), (3, cast(null as decimal(7,2))) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dec64_parquet +POSTHOOK: query: insert into dec64_parquet values + (1, 1.10), (1, 2.20), (2, 3.30), (2, 4.40), (3, 5.50), (3, cast(null as decimal(7,2))) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dec64_parquet +POSTHOOK: Lineage: dec64_parquet.d SCRIPT [] +POSTHOOK: Lineage: dec64_parquet.k SCRIPT [] +PREHOOK: query: explain vectorization detail +select k, sum(d) from dec64_parquet group by k +PREHOOK: type: QUERY +PREHOOK: Input: default@dec64_parquet +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select k, sum(d) from dec64_parquet group by k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec64_parquet +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: dec64_parquet +#Masked# Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:k:int, 1:d:decimal(7,2)/DECIMAL_64, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Select Operator + expressions: k (type: int), d (type: decimal(7,2)) + outputColumnNames: k, d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] +#Masked# Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(d) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64(col 1:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: k (type: int) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 +#Masked# Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(17,2) +#Masked# Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: k:int, d:decimal(7,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, VALUE._col0:decimal(17,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal64(col 1:decimal(17,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 +#Masked# Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false +#Masked# Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select k, sum(d) from dec64_parquet group by k order by k +PREHOOK: type: QUERY +PREHOOK: Input: default@dec64_parquet +#### A masked pattern was here #### +POSTHOOK: query: select k, sum(d) from dec64_parquet group by k order by k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec64_parquet +#### A masked pattern was here #### +1 3.30 +2 7.70 +3 5.50 diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index 8de33e7c480b..458ddf3c5297 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -158,8 +158,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -283,8 +283,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index d7a825b592a6..4858f10aa63a 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -561,7 +561,7 @@ STAGE PLANS: alias: tbl_pred Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) @@ -629,10 +629,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean) + filterExpr: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean) Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and ((t < -3Y) or ((t > -3Y) and (t < -2Y)) or ((t > -2Y) and (t < -1Y)) or (t > -1Y)) and (s like 'bob%') and s is not null) (type: boolean) + predicate: (UDFToInteger(t) BETWEEN 25 AND 30 and (s like 'bob%') and (t <> -3Y) and (t <> -2Y) and (t <> -1Y) and s is not null) (type: boolean) Statistics: Num rows: 262 Data size: 26462 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) diff --git a/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index 20c6f428a8c2..faca95103c2f 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -134,8 +134,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -291,8 +291,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_0.q.out index 3d01120514a3..41f017ab2f0b 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_0.q.out @@ -123,8 +123,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -251,8 +251,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -443,8 +443,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -571,8 +571,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -763,8 +763,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -891,8 +891,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -1084,8 +1084,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_1.q.out index a00c56715b32..85b2feec2ee4 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_1.q.out @@ -111,8 +111,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_10.q.out index 87b54a394c53..28a89994b74f 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_10.q.out @@ -102,8 +102,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out index 96e5976e66de..f7fb95307d02 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out @@ -84,8 +84,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out index 02fde23144f2..1a797059afd5 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out @@ -138,8 +138,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out index 7125704c33d2..cdeb5e543056 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out @@ -150,8 +150,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -507,8 +507,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out index 5acc12c3b71d..f5780b5bbe0f 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out @@ -140,8 +140,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out index 5979bc2dbb9a..41cfc82d2aab 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out @@ -136,8 +136,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out index b3c24ec4c133..969bf4fb798d 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out @@ -113,8 +113,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_17.q.out index 1300d7beb851..93d249b7ef87 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_17.q.out @@ -105,8 +105,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_2.q.out index 4e8b06ab91e6..3f32b3cbd15b 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_2.q.out @@ -115,8 +115,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out index 25e7197cedb8..529d61eb1db3 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out @@ -120,8 +120,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_4.q.out index aecd787c793c..7bcf796c86ea 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_4.q.out @@ -115,8 +115,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_5.q.out index 342674ecd771..f905323dba6f 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_5.q.out @@ -108,8 +108,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_6.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_6.q.out index 48d7026eb630..37a7e111dc73 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_6.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_6.q.out @@ -96,8 +96,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_7.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_7.q.out index 02d8275b94e8..cfde16992112 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_7.q.out @@ -120,8 +120,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -369,8 +369,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_8.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_8.q.out index 22a2e1922e37..8f6bb22b2814 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_8.q.out @@ -116,8 +116,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -352,8 +352,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out index b3c24ec4c133..969bf4fb798d 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out @@ -113,8 +113,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_decimal_date.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_decimal_date.q.out index 05e4d277465d..0bab42017f29 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_decimal_date.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_decimal_date.q.out @@ -77,8 +77,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_div0.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_div0.q.out index 9f9a40bac222..5cc571489008 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_div0.q.out @@ -58,8 +58,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -256,8 +256,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -489,8 +489,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_limit.q.out index 1167c5d43e92..09d31bdd4c41 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_limit.q.out @@ -52,8 +52,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -177,8 +177,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -334,8 +334,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -502,8 +502,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -666,8 +666,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -870,8 +870,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_offset_limit.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_offset_limit.q.out index a65dddfbdcd6..e14c57ffbed4 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_offset_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_offset_limit.q.out @@ -50,8 +50,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -172,8 +172,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_project.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_project.q.out index 4f13a49cc63a..bb56a81f6e61 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_part_project.q.out @@ -99,8 +99,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_pushdown.q.out index d90110e7b662..170da8b19b52 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_pushdown.q.out @@ -52,8 +52,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/pcr_null_partition.q.out b/ql/src/test/results/clientpositive/llap/pcr_null_partition.q.out new file mode 100644 index 000000000000..ca1d2ea23f32 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/pcr_null_partition.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: drop table if exists pcr_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists pcr_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: create table pcr_t1 (key string, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: create table pcr_t1 (key string, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t1 +PREHOOK: query: insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@pcr_t1 +POSTHOOK: Output: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key SIMPLE [] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [] +PREHOOK: query: insert into pcr_t1 partition (ds) select 'B', 'V2', 'null' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: insert into pcr_t1 partition (ds) select 'B', 'V2', 'null' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@pcr_t1 +POSTHOOK: Output: default@pcr_t1@ds=null +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=null).key SIMPLE [] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=null).value SIMPLE [] +PREHOOK: query: insert into pcr_t1 partition (ds) select 'C', 'V3', null +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: insert into pcr_t1 partition (ds) select 'C', 'V3', null +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@pcr_t1 +POSTHOOK: Output: default@pcr_t1@ds=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=__HIVE_DEFAULT_PARTITION__).key SIMPLE [] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=__HIVE_DEFAULT_PARTITION__).value SIMPLE [] +PREHOOK: query: explain select key, value, ds from pcr_t1 where ds is null +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: explain select key, value, ds from pcr_t1 where ds is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + filterExpr: ds is null (type: boolean) + Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), null (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where ds is null +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where ds is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +C V3 NULL +PREHOOK: query: explain select key, value, ds from pcr_t1 where ds is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=null +#### A masked pattern was here #### +POSTHOOK: query: explain select key, value, ds from pcr_t1 where ds is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=null +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where ds is not null order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=null +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where ds is not null order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=null +#### A masked pattern was here #### +A V1 2000-04-08 +B V2 null +PREHOOK: query: select key, value, ds from pcr_t1 where ds = 'null' +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=null +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where ds = 'null' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=null +#### A masked pattern was here #### +B V2 null diff --git a/ql/src/test/results/clientpositive/llap/semijoin6.q.out b/ql/src/test/results/clientpositive/llap/semijoin6.q.out index 0c7e9d4f441b..327b12ec0509 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin6.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin6.q.out @@ -777,20 +777,20 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col3 Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col2 (type: int), _col2 (type: int) + keys: _col0 (type: int), _col3 (type: int), _col3 (type: int), _col3 (type: int) minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/show_columns_like.q.out b/ql/src/test/results/clientpositive/llap/show_columns_like.q.out new file mode 100644 index 000000000000..43fab7544b24 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/show_columns_like.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: CREATE DATABASE IF NOT EXISTS col_test_db +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:col_test_db +POSTHOOK: query: CREATE DATABASE IF NOT EXISTS col_test_db +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:col_test_db +PREHOOK: query: USE col_test_db +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:col_test_db +POSTHOOK: query: USE col_test_db +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:col_test_db +PREHOOK: query: CREATE TABLE wildcard_table ( + id_primary INT, + id_secondary INT, + idxprimary INT, + name_first STRING, + name_last STRING, + MixedCaseColumn INT, + another_Mixed_Col STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: col_test_db@wildcard_table +PREHOOK: Output: database:col_test_db +POSTHOOK: query: CREATE TABLE wildcard_table ( + id_primary INT, + id_secondary INT, + idxprimary INT, + name_first STRING, + name_last STRING, + MixedCaseColumn INT, + another_Mixed_Col STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: col_test_db@wildcard_table +POSTHOOK: Output: database:col_test_db +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id%' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id%' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +id_primary +id_secondary +idxprimary +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'name_%' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'name_%' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +name_first +name_last +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'mixedcase%' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'mixedcase%' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +mixedcasecolumn +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'another_mixed_col' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'another_mixed_col' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +another_mixed_col +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id*' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id*' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +id_primary +id_secondary +idxprimary +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id_primary|name_first' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id_primary|name_first' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +id_primary +idxprimary +name_first +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id_secondar_' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id_secondar_' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +id_secondary +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id__rimary' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id__rimary' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +id_primary +idxprimary +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'abc%' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'abc%' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id__' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id__' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +PREHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id\_primary' +PREHOOK: type: SHOWCOLUMNS +PREHOOK: Input: col_test_db@wildcard_table +POSTHOOK: query: SHOW COLUMNS FROM wildcard_table LIKE 'id\_primary' +POSTHOOK: type: SHOWCOLUMNS +POSTHOOK: Input: col_test_db@wildcard_table +id_primary +PREHOOK: query: DROP DATABASE col_test_db CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:col_test_db +PREHOOK: Output: col_test_db@wildcard_table +PREHOOK: Output: database:col_test_db +POSTHOOK: query: DROP DATABASE col_test_db CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:col_test_db +POSTHOOK: Output: col_test_db@wildcard_table +POSTHOOK: Output: database:col_test_db diff --git a/ql/src/test/results/clientpositive/llap/show_tables.q.out b/ql/src/test/results/clientpositive/llap/show_tables.q.out index dbdc52195bca..bb15d36391e9 100644 --- a/ql/src/test/results/clientpositive/llap/show_tables.q.out +++ b/ql/src/test/results/clientpositive/llap/show_tables.q.out @@ -588,3 +588,19 @@ POSTHOOK: query: SHOW TABLES FROM `database` LIKE "foo_n4" POSTHOOK: type: SHOWTABLES POSTHOOK: Input: database:database foo_n4 +PREHOOK: query: SHOW TABLES LIKE 'shtb_*' +PREHOOK: type: SHOWTABLES +PREHOOK: Input: database:default +POSTHOOK: query: SHOW TABLES LIKE 'shtb_*' +POSTHOOK: type: SHOWTABLES +POSTHOOK: Input: database:default +shtb_test1_n0 +shtb_test2_n0 +PREHOOK: query: SHOW TABLES LIKE 'shtb_test1_n0|shtb_test2_n0' +PREHOOK: type: SHOWTABLES +PREHOOK: Input: database:default +POSTHOOK: query: SHOW TABLES LIKE 'shtb_test1_n0|shtb_test2_n0' +POSTHOOK: type: SHOWTABLES +POSTHOOK: Input: database:default +shtb_test1_n0 +shtb_test2_n0 diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 23e8a82b7a2e..1edc82eeeded 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -153,7 +153,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: ((cdate < DATE'1969-07-14') or (cdate > DATE'1970-01-21') or ((cdate > DATE'1969-07-14') and (cdate < DATE'1969-10-26')) or ((cdate > DATE'1969-10-26') and (cdate < DATE'1970-01-21'))) (type: boolean) + filterExpr: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean) Statistics: Num rows: 12289 Data size: 339304 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -161,8 +161,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterDateColLessDateScalar(col 3:date, val -171), FilterDateColGreaterDateScalar(col 3:date, val 20), FilterExprAndExpr(children: FilterDateColGreaterDateScalar(col 3:date, val -171), FilterDateColLessDateScalar(col 3:date, val -67)), FilterExprAndExpr(children: FilterDateColGreaterDateScalar(col 3:date, val -67), FilterDateColLessDateScalar(col 3:date, val 20))) - predicate: ((cdate < DATE'1969-07-14') or (cdate > DATE'1970-01-21') or ((cdate > DATE'1969-07-14') and (cdate < DATE'1969-10-26')) or ((cdate > DATE'1969-10-26') and (cdate < DATE'1970-01-21'))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterDateColNotEqualDateScalar(col 3:date, val -171), FilterDateColNotEqualDateScalar(col 3:date, val -67), FilterDateColNotEqualDateScalar(col 3:date, val 20)) + predicate: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean) Statistics: Num rows: 12289 Data size: 339304 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Select Vectorization: @@ -370,7 +370,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: ((cdecimal1 < -3367.6517567568) or (cdecimal1 > 2365.8945945946) or ((cdecimal1 > -3367.6517567568) and (cdecimal1 < 881.0135135135)) or ((cdecimal1 > 881.0135135135) and (cdecimal1 < 2365.8945945946))) (type: boolean) + filterExpr: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> 2365.8945945946)) (type: boolean) Statistics: Num rows: 12289 Data size: 1027600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -378,8 +378,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val -3367.6517567568), FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val 2365.8945945946), FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val -3367.6517567568), FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val 881.0135135135)), FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColLessDecimalScalar(col 1:decimal(20,10), val 2365.8945945946))) - predicate: ((cdecimal1 < -3367.6517567568) or (cdecimal1 > 2365.8945945946) or ((cdecimal1 > -3367.6517567568) and (cdecimal1 < 881.0135135135)) or ((cdecimal1 > 881.0135135135) and (cdecimal1 < 2365.8945945946))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946)) + predicate: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> 2365.8945945946)) (type: boolean) Statistics: Num rows: 12289 Data size: 1027600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Select Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 80c87d730585..630a39fab04d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -2980,8 +2980,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -3174,8 +3174,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -3697,8 +3697,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -3970,8 +3970,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -4212,8 +4212,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -4759,8 +4759,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -5032,8 +5032,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -5274,8 +5274,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out index ae3717090477..9948978d5a05 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out @@ -101,8 +101,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -204,8 +204,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -785,8 +785,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -888,8 +888,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/vectorization_numeric_overflows.q.out b/ql/src/test/results/clientpositive/llap/vectorization_numeric_overflows.q.out index acc0073a7a0c..374b0b572553 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_numeric_overflows.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_numeric_overflows.q.out @@ -168,8 +168,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -295,8 +295,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -420,8 +420,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -545,8 +545,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -672,8 +672,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -797,8 +797,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -1090,8 +1090,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false @@ -1313,8 +1313,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: true usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out b/ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out index e3bc4547b9dd..e0d69c927f32 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_parquet_projection.q.out @@ -247,8 +247,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -344,8 +344,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -563,8 +563,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 78eb986611ab..d0b011f3ef24 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -174,8 +174,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 51a5fc25c098..f14be4078046 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -311,7 +311,7 @@ STAGE PLANS: Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col7), sum(_col5), count(_col5), max(_col6) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCount(col 5:string) -> bigint, VectorUDAFSumDouble(col 3:float) -> double, VectorUDAFCount(col 3:float) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:double) -> bigint, VectorUDAFMaxDecimal(col 10:decimal(4,2)) -> decimal(4,2) + aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCount(col 5:string) -> bigint, VectorUDAFSumDouble(col 3:float) -> double, VectorUDAFCount(col 3:float) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:double) -> bigint, VectorUDAFMaxDecimal64(col 10:decimal(4,2)/DECIMAL_64) -> decimal(4,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:tinyint @@ -339,8 +339,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -652,8 +652,8 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false diff --git a/service/src/java/org/apache/hive/service/auth/saml/HiveSamlAuthTokenGenerator.java b/service/src/java/org/apache/hive/service/auth/saml/HiveSamlAuthTokenGenerator.java index 51cf646b01ee..d9060f44dfcc 100644 --- a/service/src/java/org/apache/hive/service/auth/saml/HiveSamlAuthTokenGenerator.java +++ b/service/src/java/org/apache/hive/service/auth/saml/HiveSamlAuthTokenGenerator.java @@ -19,6 +19,7 @@ package org.apache.hive.service.auth.saml; import com.google.common.annotations.VisibleForTesting; +import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; @@ -78,11 +79,11 @@ public String get(String username, String relayStateKey) { } private String encode(String token) { - return Base64.getEncoder().encodeToString(token.getBytes()); + return Base64.getEncoder().encodeToString(token.getBytes(StandardCharsets.UTF_8)); } private String decode(String encodedToken) { - return new String(Base64.getDecoder().decode(encodedToken)); + return new String(Base64.getDecoder().decode(encodedToken), StandardCharsets.UTF_8); } private String getTokenStr(String username, String id, String timestamp, @@ -100,7 +101,7 @@ private String getTokenStr(String username, String id, String timestamp, private String getSign(String input) { try { MessageDigest md = MessageDigest.getInstance("SHA-256"); - md.update(input.getBytes()); + md.update(input.getBytes(StandardCharsets.UTF_8)); md.update(signatureSecret); byte[] digest = md.digest(); return Base64.getEncoder().encodeToString(digest); @@ -144,7 +145,8 @@ private boolean isExpired(long currentTime, long tokenTime) { } private boolean signatureMatches(String origSign, String derivedSign) { - return !MessageDigest.isEqual(origSign.getBytes(), derivedSign.getBytes()); + return MessageDigest.isEqual(origSign.getBytes(StandardCharsets.UTF_8), + derivedSign.getBytes(StandardCharsets.UTF_8)); } public static boolean parse(String token, Map kv) { @@ -153,7 +155,7 @@ public static boolean parse(String token, Map kv) { return false; } for (String split : splits) { - String[] pair = split.split(SEPARATOR); + String[] pair = split.split(SEPARATOR, 2); if (pair.length != 2) { return false; } diff --git a/service/src/java/org/apache/hive/service/cli/session/LlapClusterRouter.java b/service/src/java/org/apache/hive/service/cli/session/LlapClusterRouter.java new file mode 100644 index 000000000000..20362be53230 --- /dev/null +++ b/service/src/java/org/apache/hive/service/cli/session/LlapClusterRouter.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.service.cli.session; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Resolves server-side LLAP cluster routing rules and applies the target cluster's + * namespace configs to the session. No-op when routing rules are not configured. + */ +public final class LlapClusterRouter { + + private static final Logger LOG = LoggerFactory.getLogger(LlapClusterRouter.class); + private static final String CLUSTER_PREFIX = "hive.llap.cluster."; + private static final String SESSIONS_NS_SUFFIX = ".sessions.namespace"; + private static final String REGISTRY_NS_SUFFIX = ".registry.namespace"; + private static final String SERVICE_HOSTS_SUFFIX = ".service.hosts"; + + private LlapClusterRouter() { + } + + /** + * Resolves routing rules and applies LLAP cluster configs to the session. + * No-op if routing rules are not configured or empty. + */ + public static void applyRouting(HiveConf sessionConf, String username) { + String rules = HiveConf.getVar(sessionConf, HiveConf.ConfVars.LLAP_CLUSTER_ROUTING_RULES); + if (rules == null || rules.isEmpty()) { + return; + } + + String cluster = resolveCluster(rules, username); + if (cluster == null) { + return; + } + + // Cast to Configuration to avoid HiveConf warnings for non-ConfVars keys + Configuration conf = sessionConf; + String sessionsNs = conf.get(CLUSTER_PREFIX + cluster + SESSIONS_NS_SUFFIX); + String registryNs = conf.get(CLUSTER_PREFIX + cluster + REGISTRY_NS_SUFFIX); + String serviceHosts = conf.get(CLUSTER_PREFIX + cluster + SERVICE_HOSTS_SUFFIX); + if (serviceHosts == null) { + serviceHosts = "@" + cluster; + } + + if (sessionsNs != null) { + sessionConf.setVar(HiveConf.ConfVars.HIVE_SERVER2_TEZ_EXTERNAL_SESSIONS_NAMESPACE, sessionsNs); + } + if (registryNs != null) { + sessionConf.set("tez.am.registry.namespace", registryNs); + } + sessionConf.setVar(HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS, serviceHosts); + LOG.info("Routed user {} to LLAP cluster '{}' (sessions.namespace={}, registry.namespace={}, " + + "service.hosts={})", username, cluster, sessionsNs, registryNs, serviceHosts); + } + + static String resolveCluster(String rules, String username) { + String userMatch = null; + String groupMatch = null; + String defaultCluster = null; + + for (String rule : rules.split(",")) { + rule = rule.trim(); + if (rule.startsWith("user:")) { + String[] kv = rule.substring(5).split("=", 2); + if (kv.length == 2 && kv[0].equals(username)) { + userMatch = kv[1]; + } + } else if (rule.startsWith("group:")) { + String[] kv = rule.substring(6).split("=", 2); + if (kv.length == 2 && groupMatch == null) { + try { + String[] groups = UserGroupInformation.createRemoteUser(username).getGroupNames(); + for (String g : groups) { + if (g.equals(kv[0])) { + groupMatch = kv[1]; + break; + } + } + } catch (Exception e) { + LOG.debug("Failed to resolve groups for user {}", username, e); + } + } + } else if (rule.startsWith("default=")) { + defaultCluster = rule.substring(8); + } + } + + if (userMatch != null) { + return userMatch; + } + if (groupMatch != null) { + return groupMatch; + } + return defaultCluster; + } +} diff --git a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java index 274df88390ef..c792eb6bbd92 100644 --- a/service/src/java/org/apache/hive/service/cli/session/SessionManager.java +++ b/service/src/java/org/apache/hive/service/cli/session/SessionManager.java @@ -109,6 +109,8 @@ public class SessionManager extends CompositeService { private String sessionImplWithUGIclassName; private String sessionImplclassName; private CleanupService cleanupService; + // Tracks which LLAP target gauges have been lazily registered. + private final java.util.Set registeredLlapTargetGauges = ConcurrentHashMap.newKeySet(); public SessionManager(HiveServer2 hiveServer2, boolean allowSessions) { super(SessionManager.class.getSimpleName()); @@ -209,6 +211,52 @@ public Integer getValue() { metrics.addRatio(MetricsConstant.HS2_AVG_ACTIVE_SESSION_TIME, activeSessionTime, activeSessionCnt); } + // Registers a per-LLAP-target session gauge on first use. + private void registerLlapTargetGaugeIfNeeded(HiveSession session) { + try { + String sanitized = extractLlapTarget(session.getSessionConf()); + if (sanitized == null) { + return; + } + if (registeredLlapTargetGauges.contains(sanitized)) { + return; + } + Metrics metrics = MetricsFactory.getInstance(); + if (metrics == null) { + return; + } + if (!registeredLlapTargetGauges.add(sanitized)) { + return; + } + String metricName = MetricsConstant.HS2_LLAP_TARGET_SESSIONS + "_" + sanitized; + metrics.addGauge(metricName, () -> { + int count = 0; + for (HiveSession s : getSessions()) { + try { + String t = extractLlapTarget(s.getSessionConf()); + if (sanitized.equals(t)) { + count++; + } + } catch (Exception e) { + // Session may be closing concurrently + } + } + return count; + }); + LOG.info("Registered LLAP target session gauge: {}", metricName); + } catch (Exception e) { + LOG.debug("Could not register LLAP target gauge: {}", e.getMessage()); + } + } + + private static String extractLlapTarget(HiveConf conf) { + String target = conf.getVar(ConfVars.LLAP_DAEMON_SERVICE_HOSTS); + if (target != null && !target.isEmpty()) { + return target.startsWith("@") ? target.substring(1) : target; + } + return null; + } + private void initSessionImplClassName() { this.sessionImplclassName = hiveConf.getVar(ConfVars.HIVE_SESSION_IMPL_CLASSNAME); this.sessionImplWithUGIclassName = hiveConf.getVar(ConfVars.HIVE_SESSION_IMPL_WITH_UGI_CLASSNAME); @@ -485,6 +533,7 @@ public HiveSession createSession(SessionHandle sessionHandle, TProtocolVersion p session.setOperationManager(operationManager); try { session.open(sessionConf); + LlapClusterRouter.applyRouting(session.getHiveConf(), session.getUserName()); } catch (Exception e) { LOG.warn("Failed to open session", e); try { @@ -527,6 +576,7 @@ public HiveSession createSession(SessionHandle sessionHandle, TProtocolVersion p } throw new HiveSQLException(FAIL_CLOSE_ERROR_MESSAGE); } + registerLlapTargetGaugeIfNeeded(session); LOG.info("Session opened, " + session.getSessionHandle() + ", current sessions:" + getOpenSessionCount()); return session; diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java index 67ebb605d901..43f963107fae 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java @@ -382,7 +382,8 @@ private String doSamlAuth(HttpServletRequest request, HttpServletResponse respon LOG.info("Successfully validated the token for user {}", user); // token is valid; now confirm if the client identifier matches with the relay state. Map keyValues = new HashMap<>(); - if (HiveSamlAuthTokenGenerator.parse(token, keyValues)) { + String decodedToken = new String(Base64.getDecoder().decode(token), java.nio.charset.StandardCharsets.UTF_8); + if (HiveSamlAuthTokenGenerator.parse(decodedToken, keyValues)) { String relayStateKey = keyValues.get(HiveSamlAuthTokenGenerator.RELAY_STATE); if (!HiveSamlRelayStateStore.get() .validateClientIdentifier(relayStateKey, clientIdentifier)) { diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 95c25008ca9b..582b24f7281e 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -467,9 +467,11 @@ private void addHAContextAttributes(HttpServer.Builder builder, HiveConf hiveCon builder.setContextAttribute("hs2.failover.callback", new FailoverHandlerCallback(hs2HARegistry)); } - private static HttpServer.Builder createHttpServerBuilder(String webHost, int port, String name, String contextPath, + @VisibleForTesting + static HttpServer.Builder createHttpServerBuilder(String webHost, int port, String name, String contextPath, HiveConf hiveConf, CLIService cliService, PamAuthenticator pamAuthenticator) throws IOException { HttpServer.Builder builder = new HttpServer.Builder(name); + hiveConf.set("startcode", String.valueOf(System.currentTimeMillis())); builder.setConf(hiveConf); builder.setHost(webHost); builder.setPort(port); @@ -478,7 +480,6 @@ private static HttpServer.Builder createHttpServerBuilder(String webHost, int po builder.setAdmins(hiveConf.getVar(ConfVars.USERS_IN_ADMIN_ROLE)); // SessionManager is initialized builder.setContextAttribute("hive.sm", cliService.getSessionManager()); - hiveConf.set("startcode", String.valueOf(System.currentTimeMillis())); if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_WEBUI_USE_SSL)) { String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_WEBUI_SSL_KEYSTORE_PATH); if (StringUtils.isBlank(keyStorePath)) { diff --git a/service/src/test/org/apache/hive/service/server/TestHiveServer2.java b/service/src/test/org/apache/hive/service/server/TestHiveServer2.java index 42dbdb887957..b7d1a332a106 100644 --- a/service/src/test/org/apache/hive/service/server/TestHiveServer2.java +++ b/service/src/test/org/apache/hive/service/server/TestHiveServer2.java @@ -21,9 +21,19 @@ import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hive.http.HttpServer; +import org.apache.hive.service.cli.CLIService; +import org.apache.hive.service.cli.session.SessionManager; import org.junit.Test; + +import java.lang.reflect.Field; import java.util.Map; + import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class TestHiveServer2 { @@ -98,4 +108,24 @@ public void testMaybeStartCompactorThreadsMultipleCustomPoolsAndDefaultPool() { assertEquals(Integer.valueOf(5), startedWorkers.get("pool3")); assertEquals(Integer.valueOf(3), startedWorkers.get(Constants.COMPACTION_DEFAULT_POOL)); } + + @Test + public void testCreateHttpServerBuilderStampsStartcodeBeforeConfIsCopied() throws Exception { + HiveConf conf = new HiveConf(); + + CLIService cli = mock(CLIService.class); + SessionManager sessionManager = mock(SessionManager.class); + when(cli.getSessionManager()).thenReturn(sessionManager); + + HttpServer.Builder builder = HiveServer2.createHttpServerBuilder( + "localhost", 0, "test", "/", conf, cli, null); + + // setConf stores a *copy* of the conf on the Builder. Read that copy back via + // reflection — that's the same instance the servlet context exposes to the JSP. + Field confField = HttpServer.Builder.class.getDeclaredField("conf"); + confField.setAccessible(true); + HiveConf builderConf = (HiveConf) confField.get(builder); + assertNotNull("Builder.conf must be set after createHttpServerBuilder", builderConf); + assertNotNull("startcode must be exists", builderConf.get("startcode")); + } } diff --git a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 122c0c3c491d..75defe50ebbe 100644 --- a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -61,7 +61,7 @@ public HiveMetaStoreClient(Configuration conf, HiveMetaHookLoader hookLoader) th public HiveMetaStoreClient(Configuration conf, HiveMetaHookLoader hookLoader, Boolean allowEmbedded) throws MetaException { - this(conf, hookLoader, new HiveMetaStoreClientBuilder(conf).newClient(allowEmbedded).build()); + this(conf, hookLoader, new HiveMetaStoreClientBuilder(conf, allowEmbedded).build()); } private HiveMetaStoreClient(Configuration conf, HiveMetaHookLoader hookLoader, @@ -75,8 +75,7 @@ private HiveMetaStoreClient(Configuration conf, HiveMetaHookLoader hookLoader, private static IMetaStoreClient createUnderlyingClient(Configuration conf, HiveMetaHookLoader hookLoader, IMetaStoreClient baseMetaStoreClient) { - return new HiveMetaStoreClientBuilder(conf) - .client(baseMetaStoreClient) + return new HiveMetaStoreClientBuilder(conf, baseMetaStoreClient) .withHooks(hookLoader) .threadSafe() .build(); diff --git a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index e351e6432e59..810e435f11f9 100644 --- a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -76,11 +76,12 @@ default void setHiveAddedJars(String addedJars) { /** * Returns true if the current client is using an in process metastore (local metastore). + * Default false, as in real production the client should always connect to a remote meta service * * @return */ - default boolean isLocalMetaStore(){ - throw new UnsupportedOperationException("MetaStore client does not support checking if metastore is local"); + default boolean isLocalMetaStore() { + return false; } /** diff --git a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java index 0cf9901fd2ad..14f6ba3c3427 100644 --- a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java +++ b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java @@ -33,9 +33,11 @@ import java.util.concurrent.TimeUnit; import java.util.function.Supplier; +import org.apache.commons.lang3.ClassUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.classification.RetrySemantics; +import org.apache.hadoop.hive.metastore.client.builder.HiveMetaStoreClientBuilder; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.utils.JavaUtils; @@ -69,15 +71,7 @@ public class RetryingMetaStoreClient implements InvocationHandler { private final Map metaCallTimeMap; private final long connectionLifeTimeInMillis; private long lastConnectionTime; - private boolean localMetaStore; - - - protected RetryingMetaStoreClient(Configuration conf, Class[] constructorArgTypes, - Object[] constructorArgs, Map metaCallTimeMap, - Class msClientClass) throws MetaException { - this(conf, metaCallTimeMap, () -> - JavaUtils.newInstance(msClientClass, constructorArgTypes, constructorArgs)); - } + private final boolean localMetaStore; protected RetryingMetaStoreClient(Configuration conf, Map metaCallTimeMap, Supplier msClient) throws MetaException { @@ -95,12 +89,11 @@ protected RetryingMetaStoreClient(Configuration conf, Map metaCall this.connectionLifeTimeInMillis = MetastoreConf.getTimeVar(conf, ConfVars.CLIENT_SOCKET_LIFETIME, TimeUnit.MILLISECONDS); this.lastConnectionTime = System.currentTimeMillis(); - String msUri = MetastoreConf.getVar(conf, ConfVars.THRIFT_URIS); - localMetaStore = (msUri == null) || msUri.trim().isEmpty(); SecurityUtils.reloginExpiringKeytabUser(); this.base = msClient.get(); + this.localMetaStore = base.isLocalMetaStore(); LOG.info("RetryingMetaStoreClient proxy=" + base.getClass() + " ugi=" + this.ugi + " retries=" + this.retryLimit + " delay=" + this.retryDelaySeconds @@ -109,9 +102,7 @@ protected RetryingMetaStoreClient(Configuration conf, Map metaCall public static IMetaStoreClient getProxy( Configuration hiveConf, boolean allowEmbedded) throws MetaException { - return getProxy(hiveConf, new Class[]{Configuration.class, HiveMetaHookLoader.class, Boolean.class}, - new Object[]{hiveConf, null, allowEmbedded}, null, HiveMetaStoreClient.class.getName() - ); + return new HiveMetaStoreClientBuilder(hiveConf, allowEmbedded).withRetry(null).build(); } @VisibleForTesting @@ -123,13 +114,10 @@ public static IMetaStoreClient getProxy(Configuration hiveConf, HiveMetaHookLoad public static IMetaStoreClient getProxy(Configuration hiveConf, HiveMetaHookLoader hookLoader, Map metaCallTimeMap, String mscClassName, boolean allowEmbedded) throws MetaException { - - return getProxy(hiveConf, - new Class[] {Configuration.class, HiveMetaHookLoader.class, Boolean.class}, - new Object[] {hiveConf, hookLoader, allowEmbedded}, - metaCallTimeMap, - mscClassName - ); + return + new HiveMetaStoreClientBuilder(hiveConf, mscClassName, allowEmbedded) + .withHooks(hookLoader) + .withRetry(metaCallTimeMap).build(); } /** @@ -148,26 +136,18 @@ public static IMetaStoreClient getProxy(Configuration hiveConf, Class[] const public static IMetaStoreClient getProxy(Configuration hiveConf, Class[] constructorArgTypes, Object[] constructorArgs, Map metaCallTimeMap, String mscClassName) throws MetaException { - @SuppressWarnings("unchecked") Class baseClass = JavaUtils.getClass(mscClassName, IMetaStoreClient.class); - - RetryingMetaStoreClient handler = - new RetryingMetaStoreClient(hiveConf, constructorArgTypes, constructorArgs, - metaCallTimeMap, baseClass); - return getProxy(baseClass.getInterfaces(), handler); + IMetaStoreClient baseClient = JavaUtils.newInstance(baseClass, constructorArgTypes, constructorArgs); + return new HiveMetaStoreClientBuilder(hiveConf, baseClient).withRetry(metaCallTimeMap).build(); } public static IMetaStoreClient getProxy(Configuration hiveConf, Map metaCallTimeMap, IMetaStoreClient msClient) throws MetaException { RetryingMetaStoreClient handler = new RetryingMetaStoreClient(hiveConf, metaCallTimeMap, () -> msClient); - return getProxy(msClient.getClass().getInterfaces(), handler); - } - - private static IMetaStoreClient getProxy(Class[] interfaces, - RetryingMetaStoreClient handler) { + Class[] interfaces = ClassUtils.getAllInterfaces(msClient.getClass()).toArray(new Class[0]); return (IMetaStoreClient) Proxy.newProxyInstance( RetryingMetaStoreClient.class.getClassLoader(), interfaces, handler); } diff --git a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/builder/HiveMetaStoreClientBuilder.java b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/builder/HiveMetaStoreClientBuilder.java index 903a3543ee29..3f858d4f09b9 100644 --- a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/builder/HiveMetaStoreClientBuilder.java +++ b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/builder/HiveMetaStoreClientBuilder.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.metastore.client.builder; import org.apache.commons.lang3.exception.ExceptionUtils; +import org.apache.commons.lang3.reflect.ConstructorUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -32,28 +34,43 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.lang.reflect.Constructor; import java.util.Map; import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; public class HiveMetaStoreClientBuilder { private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreClientBuilder.class); + private static final Map, MetaStoreClientFactory> + CLIENT_FACTORIES = new ConcurrentHashMap<>(); private final Configuration conf; private IMetaStoreClient client; - public HiveMetaStoreClientBuilder(Configuration conf) { - this.conf = Objects.requireNonNull(conf); + public HiveMetaStoreClientBuilder(Configuration configuration) throws MetaException { + this(configuration, true); } - public HiveMetaStoreClientBuilder newClient(boolean allowEmbedded) throws MetaException { - this.client = createClient(conf, allowEmbedded); - return this; + public HiveMetaStoreClientBuilder(Configuration configuration, boolean allowEmbedded) throws MetaException { + this.conf = Objects.requireNonNull(configuration); + Class mscClass = MetastoreConf.getClass( + conf, MetastoreConf.ConfVars.METASTORE_CLIENT_IMPL, + ThriftHiveMetaStoreClient.class, IMetaStoreClient.class); + this.client = createClient(conf, mscClass, allowEmbedded); } - public HiveMetaStoreClientBuilder client(IMetaStoreClient client) { - this.client = client; - return this; + public HiveMetaStoreClientBuilder(Configuration configuration, String clientImpl, + boolean allowEmbedded) throws MetaException { + this.conf = Objects.requireNonNull(configuration); + Class baseClass = + JavaUtils.getClass(clientImpl, IMetaStoreClient.class); + this.client = createClient(configuration, baseClass, allowEmbedded); + } + + public HiveMetaStoreClientBuilder(Configuration configuration, IMetaStoreClient client) { + this.conf = Objects.requireNonNull(configuration); + this.client = Objects.requireNonNull(client); } public HiveMetaStoreClientBuilder enhanceWith(Function wrapperFunction) { @@ -80,17 +97,15 @@ public IMetaStoreClient build() { return Objects.requireNonNull(client); } - private static IMetaStoreClient createClient(Configuration conf, boolean allowEmbedded) throws MetaException { - Class mscClass = MetastoreConf.getClass( - conf, MetastoreConf.ConfVars.METASTORE_CLIENT_IMPL, - ThriftHiveMetaStoreClient.class, IMetaStoreClient.class); - LOG.info("Using {} as a base MetaStoreClient", mscClass.getName()); - - IMetaStoreClient baseMetaStoreClient = null; + private static IMetaStoreClient createClient(Configuration conf, + Class mscClass, boolean allowEmbedded) throws MetaException { try { - baseMetaStoreClient = JavaUtils.newInstance(mscClass, - new Class[]{Configuration.class, boolean.class}, - new Object[]{conf, allowEmbedded}); + LOG.info("Using {} as a base MetaStoreClient", mscClass.getName()); + MetaStoreClientFactory factory = CLIENT_FACTORIES.get(mscClass); + if (factory == null) { + CLIENT_FACTORIES.put(mscClass, factory = new MetaStoreClientFactory(mscClass)); + } + return factory.createClient(conf, allowEmbedded); } catch (Throwable t) { // Reflection by JavaUtils will throw RuntimeException, try to get real MetaException here. Throwable rootCause = ExceptionUtils.getRootCause(t); @@ -100,7 +115,34 @@ private static IMetaStoreClient createClient(Configuration conf, boolean allowEm throw new MetaException(rootCause.getMessage()); } } + } - return baseMetaStoreClient; + private static class MetaStoreClientFactory { + private Constructor bestMatchingCtr; + private Function, Object[]> argsTransformer; + + MetaStoreClientFactory(Class mscClass) { + Constructor candidate = + ConstructorUtils.getMatchingAccessibleConstructor(mscClass, Configuration.class, boolean.class); + if (candidate != null) { + this.bestMatchingCtr = candidate; + this.argsTransformer = args -> new Object[] {args.getLeft(), (boolean) args.getRight()}; + } else if ((candidate = ConstructorUtils.getMatchingAccessibleConstructor(mscClass, Configuration.class, + HiveMetaHookLoader.class, Boolean.class)) != null) { + this.bestMatchingCtr = candidate; + this.argsTransformer = args -> + new Object[] {args.getLeft(), null, Boolean.valueOf(args.getRight())}; + } else if ((candidate = ConstructorUtils.getMatchingAccessibleConstructor(mscClass, Configuration.class)) != null) { + this.bestMatchingCtr = candidate; + this.argsTransformer = args -> new Object[] {args.getLeft()}; + } + if (bestMatchingCtr == null) { + throw new RuntimeException("No matching constructor found for this IMetaStoreClient " + mscClass); + } + } + + IMetaStoreClient createClient(Configuration conf, boolean allowEmbedded) throws Exception { + return bestMatchingCtr.newInstance(argsTransformer.apply(Pair.of(conf, allowEmbedded))); + } } } diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java index cd75934b006a..4aff66de7883 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java @@ -57,6 +57,8 @@ public class MetricsConstant { public static final String HS2_OPEN_SESSIONS = "hs2_open_sessions"; public static final String HS2_ACTIVE_SESSIONS = "hs2_active_sessions"; public static final String HS2_ABANDONED_SESSIONS = "hs2_abandoned_sessions"; + // Per-LLAP-cluster session count: metric name is HS2_LLAP_TARGET_SESSIONS + "_" + sanitizedTarget + public static final String HS2_LLAP_TARGET_SESSIONS = "hs2_llap_target_sessions"; public static final String HS2_AVG_OPEN_SESSION_TIME = "hs2_avg_open_session_time"; public static final String HS2_AVG_ACTIVE_SESSION_TIME = "hs2_avg_active_session_time"; diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index e81a9f456d53..e955d1e07d67 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -113,6 +113,10 @@ public class MetastoreConf { "metastore.authentication.ldap.userMembershipKey"; public static final String METASTORE_RETRYING_HANDLER_CLASS = "org.apache.hadoop.hive.metastore.RetryingHMSHandler"; + public static final String ACID_TABLE_OPTIMIZER_CLASS = + "org.apache.hadoop.hive.ql.txn.compactor.AcidTableOptimizer"; + public static final String ICEBERG_TABLE_OPTIMIZER_CLASS = + "org.apache.iceberg.mr.hive.compaction.IcebergTableOptimizer"; private static final Map metaConfs = new HashMap<>(); private static volatile URL hiveSiteURL = null; @@ -663,8 +667,7 @@ public enum ConfVars { "Enable table caching in the initiator. Currently the cache is cleaned after each cycle."), COMPACTOR_INITIATOR_TABLE_OPTIMIZERS("compactor.table.optimizers", "hive.compactor.table.optimizers", - "org.apache.hadoop.hive.ql.txn.compactor.AcidTableOptimizer," + - "org.apache.iceberg.mr.hive.compaction.IcebergTableOptimizer", + ACID_TABLE_OPTIMIZER_CLASS + "," + ICEBERG_TABLE_OPTIMIZER_CLASS, "Comma separated list of table optimizers executed by compaction Initiator."), COMPACTOR_WORKER_THREADS("metastore.compactor.worker.threads", "hive.compactor.worker.threads", 0, @@ -2012,6 +2015,8 @@ public enum ConfVars { "The maximum non-native tables allowed per table type during collecting the summary."), METADATA_SUMMARY_NONNATIVE_THREADS("hive.metatool.summary.nonnative.threads", "hive.metatool.summary.nonnative.threads", 20, "Number of threads to be allocated for MetaToolTaskMetadataSummary for collecting the non-native table's summary."), + METASTORE_SUPPORT_ACID("metastore.support.acid", "hive.metastore.support.acid", true, + "Whether to support acid functionality in Hive metastore server."), // These are all values that we put here just for testing STR_TEST_ENTRY("test.str", "hive.test.str", "defaultval", "comment"), diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile new file mode 100644 index 000000000000..25aa55c61504 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM postgres:12.3 + +ADD https://github.com/zabetak/hive-test-datasets/releases/download/1.0/metastore_tpcds30tb_3_1_3000.dump.gz /tmp/metastore.dump.gz +RUN chown postgres:postgres /tmp/metastore.dump.gz +COPY init_user_db.sql /docker-entrypoint-initdb.d/init_user_db.sql +COPY restore_metastore.sh /docker-entrypoint-initdb.d/restore_metastore.sh diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md new file mode 100644 index 000000000000..e21d791f4248 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md @@ -0,0 +1,72 @@ + +# Postgres TPC-DS metastore + +A dockerized Postgres database with a Hive metastore dump from a +[TPC-DS 30TB dataset](https://github.com/zabetak/hive-test-datasets/releases/download/1.0/metastore_tpcds30tb_3_1_3000.dump.gz). + +## Build and deploy + +### Docker Hub + +Use the GitHub CI workflow `postgres-tpcds-metastore.yml` for building and deploying the image to +the official ASF Docker Hub registry. + +## Manual + +Build and tag the docker image: `docker build --tag apache/hive-postgres-tpcds-metastore:1.4 .` + +## Usage + +- Create and start Postgres container: + `docker run --name postgres_metastore -p 5432:5432 -e POSTGRES_PASSWORD=postgres -d apache/hive-postgres-tpcds-metastore:1.4` +- Verify that the container is running: `docker ps` +- Stop Postgres container: `docker stop postgres_metastore` +- Remove Postgres container: `docker rm postgres_metastore` + +If you want to check the contents of the metastore the easiest way would be to +open a shell in the container and connect to the database via psql. + + docker exec -it postgres_metastore bash + su postgres + psql -U hive -d metastore + +The default configuration binds the host port 5432 to the database running in +the container. You can access the database via JDBC using the following +information: + +- URL: `jdbc:postgresql://localhost:5432/metastore` +- DRIVER: `org.postgresql.Driver` +- USER: `hive` +- PASSWORD: `hive` + +If you want to start Hive and instruct it to use this database as the metastore +you have to set the following properties in `hive-site.xml`: + +- `javax.jdo.option.ConnectionURL` +- `javax.jdo.option.ConnectionDriverName` +- `javax.jdo.option.ConnectionUserName` +- `javax.jdo.option.ConnectionPassword` + +If you need to use the current dumps with a more recent version of Hive then +after creating and starting the Postgres container you can use the +[schematool](https://hive.apache.org/docs/latest/admin/hive-schema-tool/) +to upgrade the metastore: + + schematool -dbType postgres -upgradeSchemaFrom 3.1.3000 -driver org.postgresql.Driver -url jdbc:postgresql://localhost:5432/metastore -userName hive -passWord hive diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql new file mode 100644 index 000000000000..a91fe8f446e2 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql @@ -0,0 +1,17 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +CREATE ROLE hive WITH LOGIN PASSWORD 'hive'; +CREATE DATABASE metastore WITH OWNER = hive TEMPLATE template0; diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh new file mode 100644 index 000000000000..4590ddf28a31 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +# Restore may exit with non-blocking errors so we shouldn't stop the script +# since in many cases the dump will be restored correctly +pg_restore -d metastore /tmp/metastore.dump.gz || true +# Remove the temporary file +rm /tmp/metastore.dump.gz diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java index 5a5d7ac95dfc..76eedeadb617 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java @@ -3253,6 +3253,11 @@ public Function get_function(String dbName, String funcName) throws TException { @Override public void update_table_params(List updates) throws TException { + for (TableParamsUpdate update : updates) { + if (!update.isSetCat_name()) { + update.setCat_name(getDefaultCatalog(conf)); + } + } getMS().updateTableParams(updates); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index 88ee4a4b8c58..d3ff9e96377d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -639,6 +639,18 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str FileSystem destFs = null; boolean dataWasMoved = false; Database db; + Partition check_part; + try { + check_part = msdb.getPartition(catName, dbname, name, new_part.getValues()); + } catch(NoSuchObjectException e) { + // this means there is no existing partition + check_part = null; + } + + if (check_part != null) { + throw new AlreadyExistsException("Partition already exists:" + dbname + "." + name + "." + + new_part.getValues()); + } try { msdb.openTransaction(); Table tbl = msdb.getTable(catName, dbname, name, null); @@ -655,19 +667,6 @@ public Partition alterPartition(RawStore msdb, Warehouse wh, String catName, Str "Unable to rename partition because old partition does not exist"); } - Partition check_part; - try { - check_part = msdb.getPartition(catName, dbname, name, new_part.getValues()); - } catch(NoSuchObjectException e) { - // this means there is no existing partition - check_part = null; - } - - if (check_part != null) { - throw new AlreadyExistsException("Partition already exists:" + dbname + "." + name + "." + - new_part.getValues()); - } - // when renaming a partition, we should update // 1) partition SD Location // 2) partition column stats if there are any because of part_name field in HMS table PART_COL_STATS diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MaterializationsRebuildLockCleanerTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MaterializationsRebuildLockCleanerTask.java index 10f9721be21a..5d1f5dbfe549 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MaterializationsRebuildLockCleanerTask.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MaterializationsRebuildLockCleanerTask.java @@ -42,7 +42,8 @@ public class MaterializationsRebuildLockCleanerTask implements MetastoreTaskThre @Override public long runFrequency(TimeUnit unit) { - return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.TXN_TIMEOUT, unit) / 2; + return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID) ? + MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.TXN_TIMEOUT, unit) / 2 : 0; } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java index 468f3b0db5f4..e0f66ae7fa19 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetastoreDefaultTransformer.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; +import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.slf4j.Logger; @@ -614,6 +615,14 @@ private static final Path getDefaultPath(IHMSHandler hmsHandler, Database db, St } + private void validateIfAcidTablePermitted(Table table) throws MetaException { + if (!MetastoreConf.getBoolVar(hmsHandler.getConf(), ConfVars.METASTORE_SUPPORT_ACID) && + TxnUtils.isTransactionalTable(table)) { + throw new MetaException("ACID tables are not permitted when the " + + ConfVars.METASTORE_SUPPORT_ACID.getHiveName() + " property is set to false"); + } + } + @Override public Table transformCreateTable(Table table, List processorCapabilities, String processorId) throws MetaException { if (!defaultCatalog.equalsIgnoreCase(table.getCatName())) { @@ -624,9 +633,10 @@ public Table transformCreateTable(Table table, List processorCapabilitie Table newTable = new Table(table); LOG.info("Starting translation for CreateTable for processor " + processorId + " with " + processorCapabilities + " on table " + newTable.getTableName()); - Map params = table.getParameters(); + Map params = newTable.getParameters(); if (params == null) { params = new HashMap<>(); + newTable.setParameters(params); } String tableType = newTable.getTableType(); String dbName = table.getDbName(); @@ -647,7 +657,6 @@ public Table transformCreateTable(Table table, List processorCapabilitie params.put(HiveMetaHook.EXTERNAL, "TRUE"); params.put(EXTERNAL_TABLE_PURGE, "TRUE"); params.put(HiveMetaHook.TRANSLATED_TO_EXTERNAL, "TRUE"); - newTable.setParameters(params); LOG.info("Modified table params are:" + params.toString()); if (getLocation(table) == null) { @@ -663,6 +672,7 @@ public Table transformCreateTable(Table table, List processorCapabilitie // should we check tbl directory existence? } } else { // ACID table + validateIfAcidTablePermitted(newTable); // if the property 'EXTERNAL_TABLES_ONLY'='true' is set on the database, then creating managed/ACID tables are prohibited. See HIVE-25724 for more details. if (db.getParameters().containsKey(EXTERNALTABLESONLY) && db.getParameters().get(EXTERNALTABLESONLY).equalsIgnoreCase("true")) { @@ -673,8 +683,8 @@ public Table transformCreateTable(Table table, List processorCapabilitie throw new MetaException("Processor has no capabilities, cannot create an ACID table."); } - newTable = validateTablePaths(table); - if (MetaStoreUtils.isInsertOnlyTableParam(table.getParameters())) { // MICRO_MANAGED Tables + validateTablePaths(newTable); + if (MetaStoreUtils.isInsertOnlyTableParam(newTable.getParameters())) { // MICRO_MANAGED Tables if (processorCapabilities.contains(HIVEMANAGEDINSERTWRITE)) { LOG.debug("Processor has required capabilities to be able to create INSERT-only tables"); return newTable; @@ -694,7 +704,8 @@ public Table transformCreateTable(Table table, List processorCapabilitie } } else if (TableType.EXTERNAL_TABLE.name().equals(tableType)) { LOG.debug("Table to be created is of type " + tableType); - newTable = validateTablePaths(table); + params.put(HiveMetaHook.EXTERNAL, "TRUE"); + validateTablePaths(newTable); } LOG.info("Transformer returning table:" + newTable.toString()); return newTable; @@ -734,7 +745,7 @@ public Table transformAlterTable(Table oldTable, Table newTable, List pr LOG.info("Starting translation for Alter table for processor " + processorId + " with " + processorCapabilities + " on table " + newTable.getTableName()); - + validateIfAcidTablePermitted(newTable); if (tableLocationChanged(oldTable, newTable)) { validateTablePaths(newTable); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 88dbd1ff4727..17ad48698ffb 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -18,11 +18,9 @@ package org.apache.hadoop.hive.metastore; -import static org.apache.commons.lang3.StringUtils.join; import static org.apache.hadoop.hive.metastore.Batchable.NO_BATCHING; import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.COMPACTOR_USE_CUSTOM_POOL; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; -import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.newMetaException; import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifiers; @@ -39,10 +37,8 @@ import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -50,18 +46,15 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.TreeMap; -import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; -import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -77,7 +70,6 @@ import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; @@ -101,7 +93,6 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.CreationMetadata; -import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.AddPackageRequest; import org.apache.hadoop.hive.metastore.api.DefaultConstraintsRequest; @@ -115,42 +106,26 @@ import org.apache.hadoop.hive.metastore.api.GetPackageRequest; import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec; -import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; -import org.apache.hadoop.hive.metastore.api.HiveObjectRef; -import org.apache.hadoop.hive.metastore.api.HiveObjectType; import org.apache.hadoop.hive.metastore.api.ISchema; import org.apache.hadoop.hive.metastore.api.ISchemaName; import org.apache.hadoop.hive.metastore.api.InvalidInputException; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.InvalidPartitionException; import org.apache.hadoop.hive.metastore.api.ListPackageRequest; import org.apache.hadoop.hive.metastore.api.ListStoredProcedureRequest; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.NotNullConstraintsRequest; -import org.apache.hadoop.hive.metastore.api.NotificationEvent; -import org.apache.hadoop.hive.metastore.api.NotificationEventRequest; -import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; -import org.apache.hadoop.hive.metastore.api.NotificationEventsCountRequest; -import org.apache.hadoop.hive.metastore.api.NotificationEventsCountResponse; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Package; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.PartitionEventType; import org.apache.hadoop.hive.metastore.api.PartitionFilterMode; -import org.apache.hadoop.hive.metastore.api.PartitionValuesResponse; -import org.apache.hadoop.hive.metastore.api.PartitionValuesRow; import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; -import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; import org.apache.hadoop.hive.metastore.api.PrincipalType; -import org.apache.hadoop.hive.metastore.api.PrivilegeBag; import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; import org.apache.hadoop.hive.metastore.api.QueryState; import org.apache.hadoop.hive.metastore.api.ResourceType; import org.apache.hadoop.hive.metastore.api.ResourceUri; -import org.apache.hadoop.hive.metastore.api.Role; -import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant; import org.apache.hadoop.hive.metastore.api.RuntimeStat; import org.apache.hadoop.hive.metastore.api.ReplicationMetricList; import org.apache.hadoop.hive.metastore.api.GetReplicationMetricsRequest; @@ -181,13 +156,9 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StoredProcedure; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.api.TableParamsUpdate; import org.apache.hadoop.hive.metastore.api.Type; import org.apache.hadoop.hive.metastore.api.UniqueConstraintsRequest; -import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan; import org.apache.hadoop.hive.metastore.api.WMMapping; import org.apache.hadoop.hive.metastore.api.WMNullablePool; @@ -198,14 +169,13 @@ import org.apache.hadoop.hive.metastore.api.WMResourcePlanStatus; import org.apache.hadoop.hive.metastore.api.WMTrigger; import org.apache.hadoop.hive.metastore.api.WMValidateResourcePlanResponse; -import org.apache.hadoop.hive.metastore.api.WriteEventInfo; import org.apache.hadoop.hive.metastore.client.builder.GetPartitionsArgs; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.directsql.DirectSqlAggrStats; +import org.apache.hadoop.hive.metastore.metastore.iface.PrivilegeStore; import org.apache.hadoop.hive.metastore.metrics.Metrics; import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; -import org.apache.hadoop.hive.metastore.model.FetchGroups; import org.apache.hadoop.hive.metastore.model.MCatalog; import org.apache.hadoop.hive.metastore.model.MColumnDescriptor; import org.apache.hadoop.hive.metastore.model.MConstraint; @@ -217,23 +187,16 @@ import org.apache.hadoop.hive.metastore.model.MDelegationToken; import org.apache.hadoop.hive.metastore.model.MFieldSchema; import org.apache.hadoop.hive.metastore.model.MFunction; -import org.apache.hadoop.hive.metastore.model.MGlobalPrivilege; import org.apache.hadoop.hive.metastore.model.MISchema; import org.apache.hadoop.hive.metastore.model.MMVSource; import org.apache.hadoop.hive.metastore.model.MMasterKey; import org.apache.hadoop.hive.metastore.model.MMetastoreDBProperties; -import org.apache.hadoop.hive.metastore.model.MNotificationLog; -import org.apache.hadoop.hive.metastore.model.MNotificationNextId; import org.apache.hadoop.hive.metastore.model.MOrder; import org.apache.hadoop.hive.metastore.model.MPackage; import org.apache.hadoop.hive.metastore.model.MPartition; -import org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; import org.apache.hadoop.hive.metastore.model.MPartitionEvent; -import org.apache.hadoop.hive.metastore.model.MPartitionPrivilege; import org.apache.hadoop.hive.metastore.model.MResourceUri; -import org.apache.hadoop.hive.metastore.model.MRole; -import org.apache.hadoop.hive.metastore.model.MRoleMap; import org.apache.hadoop.hive.metastore.model.MRuntimeStat; import org.apache.hadoop.hive.metastore.model.MScheduledExecution; import org.apache.hadoop.hive.metastore.model.MScheduledQuery; @@ -243,10 +206,8 @@ import org.apache.hadoop.hive.metastore.model.MStoredProc; import org.apache.hadoop.hive.metastore.model.MStringList; import org.apache.hadoop.hive.metastore.model.MTable; -import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics; import org.apache.hadoop.hive.metastore.model.MTablePrivilege; -import org.apache.hadoop.hive.metastore.model.MTxnWriteNotificationLog; import org.apache.hadoop.hive.metastore.model.MType; import org.apache.hadoop.hive.metastore.model.MVersionTable; import org.apache.hadoop.hive.metastore.model.MWMMapping; @@ -256,17 +217,18 @@ import org.apache.hadoop.hive.metastore.model.MWMResourcePlan.Status; import org.apache.hadoop.hive.metastore.model.MWMTrigger; import org.apache.hadoop.hive.metastore.model.MReplicationMetrics; -import org.apache.hadoop.hive.metastore.parser.ExpressionTree; -import org.apache.hadoop.hive.metastore.parser.ExpressionTree.FilterBuilder; import org.apache.hadoop.hive.metastore.properties.CachingPropertyStore; import org.apache.hadoop.hive.metastore.properties.PropertyStore; +import org.apache.hadoop.hive.metastore.metastore.PersistenceManagerProxy; +import org.apache.hadoop.hive.metastore.metastore.RawStoreAware; +import org.apache.hadoop.hive.metastore.metastore.MetaDescriptor; +import org.apache.hadoop.hive.metastore.metastore.TransactionHandler; import org.apache.hadoop.hive.metastore.tools.SQLGenerator; import org.apache.hadoop.hive.metastore.txn.TxnUtils; -import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.RetryingExecutor; -import org.apache.thrift.TException; import org.datanucleus.ExecutionContext; import org.datanucleus.api.jdo.JDOPersistenceManager; import org.datanucleus.api.jdo.JDOTransaction; @@ -282,7 +244,6 @@ import com.cronutils.model.time.ExecutionTime; import com.cronutils.parser.CronParser; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -335,7 +296,6 @@ private enum TXN_STATUS { private MetaStoreDirectSql directSql = null; private DirectSqlAggrStats directSqlAggrStats; protected DatabaseProduct dbType = null; - private PartitionExpressionProxy expressionProxy = null; protected Configuration conf; private volatile int openTrasactionCalls = 0; private Transaction currentTransaction = null; @@ -343,6 +303,7 @@ private enum TXN_STATUS { private Counter directSqlErrors; private boolean areTxnStatsSupported = false; private PropertyStore propertyStore; + private Map, Object> cachedImpls = new HashMap<>(); public ObjectStore() { } @@ -373,7 +334,6 @@ public void setConf(Configuration conf) { pm = null; directSql = null; directSqlAggrStats = null; - expressionProxy = null; openTrasactionCalls = 0; currentTransaction = null; transactionStatus = TXN_STATUS.NO_STATE; @@ -410,7 +370,6 @@ private void initialize() { if (isInitialized) { dbType = PersistenceManagerProvider.getDatabaseProduct(); sqlGenerator = new SQLGenerator(dbType, conf); - expressionProxy = PartFilterExprUtil.createExpressionProxy(conf); if (MetastoreConf.getBoolVar(getConf(), ConfVars.TRY_DIRECT_SQL)) { String schema = PersistenceManagerProvider.getProperty("javax.jdo.mapping.Schema"); schema = org.apache.commons.lang3.StringUtils.defaultIfBlank(schema, null); @@ -529,6 +488,7 @@ public void shutdown() { pm.close(); pm = null; } + cachedImpls.clear(); } /** @@ -558,27 +518,33 @@ public boolean openTransaction() { return result; } + @SuppressWarnings("unchecked") @Override - public long updateParameterWithExpectedValue(Table table, String key, String expectedValue, String newValue) - throws MetaException, NoSuchObjectException { - return new GetHelper(table.getCatName(), table.getDbName(), table.getTableName(), true, false) { - - @Override - protected String describeResult() { - return "Affected rows"; - } - - @Override - protected Long getSqlResult(GetHelper ctx) throws MetaException { - return directSql.updateTableParam(table, key, expectedValue, newValue); - } - - @Override - protected Long getJdoResult(GetHelper ctx) throws MetaException, NoSuchObjectException, InvalidObjectException { - throw new UnsupportedOperationException( - "Cannot update parameter with JDO, make sure direct SQL is enabled"); + public T unwrap(Class iface) { + MetaDescriptor descriptor = iface.getAnnotation(MetaDescriptor.class); + if (descriptor == null) { + throw new IllegalArgumentException("Unable to unwrap the store as " + iface); + } + String implClassName = conf.get("metastore." + descriptor.alias() + ".store.impl", ""); + T simpl; + T impl = (T) cachedImpls.get(iface); + if (impl != null && + (StringUtils.isEmpty(implClassName) || impl.getClass().getName().equals(implClassName))) { + simpl = impl; + } else { + Class ifaceImpl = descriptor.defaultImpl(); + if (StringUtils.isNotEmpty(implClassName)) { + ifaceImpl = conf.getClass(implClassName, ifaceImpl); } - }.run(false); + simpl = (T) JavaUtils.newInstance(ifaceImpl); + cachedImpls.put(iface, simpl); + } + List openQueries = new LinkedList<>(); + if (simpl instanceof RawStoreAware rsa) { + rsa.setBaseStore(this); + rsa.setPersistentManager(PersistenceManagerProxy.getProxy(pm, openQueries)); + } + return TransactionHandler.getProxy(iface, new TransactionHandler<>(this, simpl, openQueries)); } @Override @@ -854,7 +820,8 @@ public void createDatabase(Database db) throws InvalidObjectException, MetaExcep } @SuppressWarnings("nls") - private MDatabase getMDatabase(String catName, String name) throws NoSuchObjectException { + @Override + public MDatabase ensureGetMDatabase(String catName, String name) throws NoSuchObjectException { MDatabase mdb = null; boolean commited = false; Query query = null; @@ -919,7 +886,7 @@ public Database getJDODatabase(String catName, String name) throws NoSuchObjectE boolean commited = false; try { openTransaction(); - mdb = getMDatabase(catName, name); + mdb = ensureGetMDatabase(catName, name); commited = commitTransaction(); } finally { rollbackAndCleanup(commited, null); @@ -927,7 +894,7 @@ public Database getJDODatabase(String catName, String name) throws NoSuchObjectE Database db = new Database(); db.setName(mdb.getName()); db.setDescription(mdb.getDescription()); - db.setParameters(convertMap(mdb.getParameters())); + db.setParameters(convertMap(mdb.getParameters(), conf)); db.setOwnerName(mdb.getOwnerName()); String type = org.apache.commons.lang3.StringUtils.defaultIfBlank(mdb.getOwnerType(), null); PrincipalType principalType = (type == null) ? null : PrincipalType.valueOf(type); @@ -959,7 +926,7 @@ public boolean alterDatabase(String catName, String dbName, Database db) MDatabase mdb = null; boolean committed = false; try { - mdb = getMDatabase(catName, dbName); + mdb = ensureGetMDatabase(catName, dbName); mdb.setParameters(db.getParameters()); mdb.setOwnerName(db.getOwnerName()); if (db.getOwnerType() != null) { @@ -994,9 +961,9 @@ public boolean dropDatabase(String catName, String dbname) openTransaction(); // then drop the database - MDatabase db = getMDatabase(catName, dbname); + MDatabase db = ensureGetMDatabase(catName, dbname); pm.retrieve(db); - List dbGrants = this.listDatabaseGrants(catName, dbname, null); + List dbGrants = unwrap(PrivilegeStore.class).listDatabaseGrants(catName, dbname, null); if (CollectionUtils.isNotEmpty(dbGrants)) { pm.deletePersistentAll(dbGrants); } @@ -1093,7 +1060,7 @@ private Database convertToDatabase(MDatabase mdb) { Database db = new Database(); db.setName(mdb.getName()); db.setDescription(mdb.getDescription()); - db.setParameters(convertMap(mdb.getParameters())); + db.setParameters(convertMap(mdb.getParameters(), conf)); db.setOwnerName(mdb.getOwnerName()); String type = org.apache.commons.lang3.StringUtils.defaultIfBlank(mdb.getOwnerType(), null); PrincipalType principalType = (type == null) ? null : PrincipalType.valueOf(type); @@ -1115,6 +1082,16 @@ private Database convertToDatabase(MDatabase mdb) { @Override public void createDataConnector(DataConnector connector) throws InvalidObjectException, MetaException { boolean commited = false; + try { + openTransaction(); + pm.makePersistent(convert(connector)); + commited = commitTransaction(); + } finally { + rollbackAndCleanup(commited, null); + } + } + + public static MDataConnector convert(DataConnector connector) { MDataConnector mDataConnector = new MDataConnector(); mDataConnector.setName(connector.getName().toLowerCase()); mDataConnector.setType(connector.getType()); @@ -1125,13 +1102,7 @@ public void createDataConnector(DataConnector connector) throws InvalidObjectExc PrincipalType ownerType = connector.getOwnerType(); mDataConnector.setOwnerType((null == ownerType ? PrincipalType.USER.name() : ownerType.name())); mDataConnector.setCreateTime(connector.getCreateTime()); - try { - openTransaction(); - pm.makePersistent(mDataConnector); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } + return mDataConnector; } @SuppressWarnings("nls") @@ -1175,7 +1146,7 @@ public DataConnector getDataConnector(String name) throws NoSuchObjectException connector.setType(mdc.getType()); connector.setUrl(mdc.getUrl()); connector.setDescription(mdc.getDescription()); - connector.setParameters(convertMap(mdc.getParameters())); + connector.setParameters(convertMap(mdc.getParameters(), conf)); connector.setOwnerName(mdc.getOwnerName()); String type = org.apache.commons.lang3.StringUtils.defaultIfBlank(mdc.getOwnerType(), null); PrincipalType principalType = (type == null) ? null : PrincipalType.valueOf(type); @@ -1247,7 +1218,7 @@ public boolean dropDataConnector(String dcname) // then drop the dataconnector MDataConnector mdb = getMDataConnector(dcname); pm.retrieve(mdb); - List dcGrants = this.listDataConnectorGrants(dcname, null); + List dcGrants = unwrap(PrivilegeStore.class).listDataConnectorGrants(dcname, null); if (CollectionUtils.isNotEmpty(dcGrants)) { pm.deletePersistentAll(dcGrants); } @@ -1386,53 +1357,12 @@ public SQLAllTableConstraints createTableWithConstraints(Table tbl, SQLAllTableC } } - @Override - public void createTable(Table tbl) throws InvalidObjectException, MetaException { - boolean commited = false; - MTable mtbl = null; - - try { - openTransaction(); - - mtbl = convertToMTable(tbl); - if (TxnUtils.isTransactionalTable(tbl)) { - mtbl.setWriteId(tbl.getWriteId()); - } - pm.makePersistent(mtbl); - - if (tbl.getCreationMetadata() != null) { - MCreationMetadata mcm = convertToMCreationMetadata(tbl.getCreationMetadata()); - pm.makePersistent(mcm); - } - tbl.setId(mtbl.getId()); - - PrincipalPrivilegeSet principalPrivs = tbl.getPrivileges(); - List toPersistPrivObjs = new ArrayList<>(); - if (principalPrivs != null) { - int now = (int) (System.currentTimeMillis() / 1000); - - Map> userPrivs = principalPrivs.getUserPrivileges(); - putPersistentPrivObjects(mtbl, toPersistPrivObjs, now, userPrivs, PrincipalType.USER, "SQL"); - - Map> groupPrivs = principalPrivs.getGroupPrivileges(); - putPersistentPrivObjects(mtbl, toPersistPrivObjs, now, groupPrivs, PrincipalType.GROUP, "SQL"); - - Map> rolePrivs = principalPrivs.getRolePrivileges(); - putPersistentPrivObjects(mtbl, toPersistPrivObjs, now, rolePrivs, PrincipalType.ROLE, "SQL"); - } - pm.makePersistentAll(toPersistPrivObjs); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - } - /** * Convert PrivilegeGrantInfo from privMap to MTablePrivilege, and add all of * them to the toPersistPrivObjs. These privilege objects will be persisted as * part of createTable. */ - private void putPersistentPrivObjects(MTable mtbl, List toPersistPrivObjs, + public static void putPersistentPrivObjects(MTable mtbl, List toPersistPrivObjs, int now, Map> privMap, PrincipalType type, String authorizer) { if (privMap != null) { for (Map.Entry> entry : privMap @@ -1454,148 +1384,6 @@ private void putPersistentPrivObjects(MTable mtbl, List toPersistPrivObj } } - @Override - public boolean dropTable(String catName, String dbName, String tableName) - throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { - boolean materializedView = false; - boolean success = false; - try { - openTransaction(); - MTable tbl = getMTable(catName, dbName, tableName); - pm.retrieve(tbl); - if (tbl != null) { - materializedView = TableType.MATERIALIZED_VIEW.toString().equals(tbl.getTableType()); - // first remove all the grants - List tabGrants = listAllTableGrants(catName, dbName, tableName); - if (CollectionUtils.isNotEmpty(tabGrants)) { - pm.deletePersistentAll(tabGrants); - } - List tblColGrants = listTableAllColumnGrants(catName, dbName, - tableName); - if (CollectionUtils.isNotEmpty(tblColGrants)) { - pm.deletePersistentAll(tblColGrants); - } - - List partGrants = this.listTableAllPartitionGrants(catName, dbName, tableName); - if (CollectionUtils.isNotEmpty(partGrants)) { - pm.deletePersistentAll(partGrants); - } - - List partColGrants = listTableAllPartitionColumnGrants(catName, dbName, - tableName); - if (CollectionUtils.isNotEmpty(partColGrants)) { - pm.deletePersistentAll(partColGrants); - } - // delete column statistics if present - try { - deleteTableColumnStatistics(catName, dbName, tableName, null, null); - } catch (NoSuchObjectException e) { - LOG.info("Found no table level column statistics associated with {} to delete", - TableName.getQualified(catName, dbName, tableName)); - } - - List tabConstraints = listAllTableConstraintsWithOptionalConstraintName( - catName, dbName, tableName, null); - if (CollectionUtils.isNotEmpty(tabConstraints)) { - pm.deletePersistentAll(tabConstraints); - } - - preDropStorageDescriptor(tbl.getSd()); - - if (materializedView) { - dropCreationMetadata(tbl.getDatabase().getCatalogName(), - tbl.getDatabase().getName(), tbl.getTableName()); - } - - // then remove the table - pm.deletePersistentAll(tbl); - } - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); - } - return success; - } - - private boolean dropCreationMetadata(String catName, String dbName, String tableName) { - boolean success = false; - try { - openTransaction(); - MCreationMetadata mcm = getCreationMetadata(catName, dbName, tableName); - pm.retrieve(mcm); - if (mcm != null) { - pm.deletePersistentAll(mcm); - } - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); - } - return success; - } - - @Override - public List isPartOfMaterializedView(String catName, String dbName, String tblName) { - - boolean committed = false; - Query query = null; - List mViewList = new ArrayList<>(); - - try { - openTransaction(); - - query = pm.newQuery("select from org.apache.hadoop.hive.metastore.model.MCreationMetadata"); - - List creationMetadata = (List)query.execute(); - Iterator iter = creationMetadata.iterator(); - - while (iter.hasNext()) - { - MCreationMetadata p = iter.next(); - Set tables = p.getTables(); - for (MMVSource sourceTable : tables) { - MTable table = sourceTable.getTable(); - if (dbName.equals(table.getDatabase().getName()) && tblName.equals(table.getTableName())) { - LOG.info("Cannot drop table " + table.getTableName() + - " as it is being used by MView " + p.getTblName()); - mViewList.add(p.getDbName() + "." + p.getTblName()); - } - } - } - - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, query); - } - return mViewList; - } - - @Override - public List dropAllPartitionsAndGetLocations(TableName table, - String baseLocationToNotShow, AtomicReference message) - throws MetaException, InvalidInputException, NoSuchObjectException, InvalidObjectException { - String catName = table.getCat(); - String dbName = table.getDb(); - String tableName = table.getTable(); - return new GetHelper>(catName, dbName, tableName, true, true) { - @Override - protected String describeResult() { - return "delete all partitions from " + table; - } - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql.dropAllPartitionsAndGetLocations(getTable().getId(), baseLocationToNotShow, message); - } - @Override - protected List getJdoResult(GetHelper> ctx) - throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { - Map partitionLocations = - getPartitionLocations(catName, dbName, tableName, baseLocationToNotShow, -1); - dropPartitionsViaJdo(catName, dbName, tableName, new ArrayList<>(partitionLocations.keySet()), message); - return partitionLocations.values().stream().filter(Objects::nonNull).toList(); - } - }.run(true); - } - private List listAllTableConstraintsWithOptionalConstraintName( String catName, String dbName, String tableName, String constraintname) { catName = normalizeIdentifier(catName); @@ -1640,84 +1428,6 @@ private List listAllTableConstraintsWithOptionalConstraintName( return mConstraints; } - @Override - public Table getTable(String catName, String dbName, String tableName) - throws MetaException { - return getTable( - ObjectUtils.defaultIfNull(catName, getDefaultCatalog(conf)), - dbName, tableName, - null - ); - } - - @Override - public Table getTable(String catName, String dbName, String tableName, String writeIdList) - throws MetaException { - boolean commited = false; - Table tbl = null; - try { - openTransaction(); - MTable mtable = getMTable(catName, dbName, tableName); - tbl = convertToTable(mtable); - // Retrieve creation metadata if needed - if (tbl != null && TableType.MATERIALIZED_VIEW.toString().equals(tbl.getTableType())) { - tbl.setCreationMetadata( - convertToCreationMetadata(getCreationMetadata(catName, dbName, tableName))); - } - - // If transactional non partitioned table, - // check whether the current version table statistics - // in the metastore comply with the client query's snapshot isolation. - // Note: a partitioned table has table stats and table snapshot in MPartiiton. - if (writeIdList != null) { - boolean isTxn = TxnUtils.isTransactionalTable(tbl); - if (isTxn && !areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Table's parameters."); - } else if (isTxn && tbl.getPartitionKeysSize() == 0) { - if (isCurrentStatsValidForTheQuery(mtable, writeIdList, false)) { - tbl.setIsStatsCompliant(true); - } else { - tbl.setIsStatsCompliant(false); - // Do not make persistent the following state since it is the query specific (not global). - StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Table's parameters."); - } - } - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - - return tbl; - } - - @Override - public Table getTable(String catalogName, String dbName, String tableName, String writeIdList, long tableId) - throws MetaException { - return getTable( catalogName, dbName, tableName, writeIdList); - } - - @Override - public List getTables(String catName, String dbName, String pattern) - throws MetaException { - return getTables(catName, dbName, pattern, null, -1); - } - - @Override - public List getTables(String catName, String dbName, String pattern, TableType tableType, int limit) - throws MetaException { - try { - // We only support pattern matching via jdo since pattern matching in Java - // might be different than the one used by the metastore backends - return getTablesInternal(catName, dbName, pattern, tableType, - (pattern == null || pattern.equals(".*")), true, limit); - } catch (NoSuchObjectException e) { - throw new MetaException(ExceptionUtils.getStackTrace(e)); - } - } - @Override public List getTableNamesWithStats() throws MetaException, NoSuchObjectException { return new GetListHelper(null, null, null, true, false) { @@ -1807,318 +1517,89 @@ protected List getJdoResult( }.run(false); } - protected List getTablesInternal(String catName, String dbName, String pattern, - TableType tableType, boolean allowSql, boolean allowJdo, int limit) - throws MetaException, NoSuchObjectException { - final String db_name = normalizeIdentifier(dbName); - final String cat_name = normalizeIdentifier(catName); - return new GetListHelper(cat_name, dbName, null, allowSql, allowJdo) { - @Override - protected List getSqlResult(GetHelper> ctx) - throws MetaException { - return directSql.getTables(cat_name, db_name, tableType, limit); - } + public static StringBuilder appendPatternCondition(StringBuilder builder, + String fieldName, String elements, List parameters) { + elements = normalizeIdentifier(elements); + return appendCondition(builder, fieldName, elements.split("\\|"), true, parameters); + } - @Override - protected List getJdoResult(GetHelper> ctx) - throws MetaException, NoSuchObjectException { - return getTablesInternalViaJdo(cat_name, db_name, pattern, tableType, limit); - } - }.run(false); + public static StringBuilder appendSimpleCondition(StringBuilder builder, + String fieldName, String[] elements, List parameters) { + return appendCondition(builder, fieldName, elements, false, parameters); } - private List getTablesInternalViaJdo(String catName, String dbName, String pattern, - TableType tableType, int limit) { - boolean commited = false; - Query query = null; - List tbls = null; - try { - openTransaction(); - dbName = normalizeIdentifier(dbName); - // Take the pattern and split it on the | to get all the composing - // patterns - List parameterVals = new ArrayList<>(); - StringBuilder filterBuilder = new StringBuilder(); - //adds database.name == dbName to the filter - appendSimpleCondition(filterBuilder, "database.name", new String[] {dbName}, parameterVals); - appendSimpleCondition(filterBuilder, "database.catalogName", new String[] {catName}, parameterVals); - if(pattern != null) { - appendPatternCondition(filterBuilder, "tableName", pattern, parameterVals); + private static StringBuilder appendCondition(StringBuilder builder, + String fieldName, String[] elements, boolean pattern, List parameters) { + if (builder.length() > 0) { + builder.append(" && "); + } + builder.append(" ("); + int length = builder.length(); + for (String element : elements) { + if (pattern) { + element = element.replaceAll("\\*", ".*"); } - if(tableType != null) { - appendSimpleCondition(filterBuilder, "tableType", new String[] {tableType.toString()}, parameterVals); + parameters.add(element); + if (builder.length() > length) { + builder.append(" || "); } - - query = pm.newQuery(MTable.class, filterBuilder.toString()); - query.setResult("tableName"); - query.setOrdering("tableName ascending"); - if (limit >= 0) { - query.setRange(0, limit); + builder.append(fieldName); + if (pattern) { + builder.append(".matches(").append(JDO_PARAM).append(parameters.size()).append(")"); + } else { + builder.append(" == ").append(JDO_PARAM).append(parameters.size()); } - Collection names = (Collection) query.executeWithArray(parameterVals.toArray(new String[0])); - tbls = new ArrayList<>(names); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); } - return tbls; + builder.append(" )"); + return builder; } - @Override - public List getAllMaterializedViewObjectsForRewriting(String catName) throws MetaException { - List
allMaterializedViews = new ArrayList<>(); - boolean commited = false; - Query query = null; - try { - openTransaction(); - catName = normalizeIdentifier(catName); - query = pm.newQuery(MTable.class); - query.setFilter("database.catalogName == catName && tableType == tt && rewriteEnabled == re"); - query.declareParameters("java.lang.String catName, java.lang.String tt, boolean re"); - Collection mTbls = (Collection) query.executeWithArray( - catName, TableType.MATERIALIZED_VIEW.toString(), true); - for (MTable mTbl : mTbls) { - Table tbl = convertToTable(mTbl); - tbl.setCreationMetadata( - convertToCreationMetadata( - getCreationMetadata(tbl.getCatName(), tbl.getDbName(), tbl.getTableName()))); - allMaterializedViews.add(tbl); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); + class AttachedMTableInfo { + MTable mtbl; + MColumnDescriptor mcd; + + public AttachedMTableInfo() {} + + public AttachedMTableInfo(MTable mtbl, MColumnDescriptor mcd) { + this.mtbl = mtbl; + this.mcd = mcd; } - return allMaterializedViews; } - @Override - public List getMaterializedViewsForRewriting(String catName, String dbName) - throws MetaException, NoSuchObjectException { - final String db_name = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); + private AttachedMTableInfo getMTable(String catName, String db, String table, + boolean retrieveCD) { + AttachedMTableInfo nmtbl = new AttachedMTableInfo(); + MTable mtbl = null; boolean commited = false; - Query query = null; - List tbls = null; + Query query = null; try { openTransaction(); - dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(Optional.ofNullable(catName).orElse(getDefaultCatalog(conf))); + db = normalizeIdentifier(db); + table = normalizeIdentifier(table); query = pm.newQuery(MTable.class, - "database.name == db && database.catalogName == cat && tableType == tt && rewriteEnabled == re"); + "tableName == table && database.name == db && database.catalogName == catname"); query.declareParameters( - "java.lang.String db, java.lang.String cat, java.lang.String tt, boolean re"); - query.setResult("tableName"); - Collection names = (Collection) query.executeWithArray( - db_name, catName, TableType.MATERIALIZED_VIEW.toString(), true); - tbls = new ArrayList<>(names); + "java.lang.String table, java.lang.String db, java.lang.String catname"); + query.setUnique(true); + if (LOG.isDebugEnabled()) { + LOG.debug("Executing getMTable for {}", + TableName.getQualified(catName, db, table)); + } + mtbl = (MTable) query.execute(table, db, catName); + pm.retrieve(mtbl); + // Retrieving CD can be expensive and unnecessary, so do it only when required. + if (mtbl != null && retrieveCD) { + pm.retrieve(mtbl.getSd()); + pm.retrieveAll(mtbl.getSd().getCD()); + nmtbl.mcd = mtbl.getSd().getCD(); + } commited = commitTransaction(); } finally { rollbackAndCleanup(commited, query); } - return tbls; - } - - @Override - public int getDatabaseCount() throws MetaException { - return getObjectCount("name", MDatabase.class.getName()); - } - - @Override - public int getPartitionCount() throws MetaException { - return getObjectCount("partitionName", MPartition.class.getName()); - } - - @Override - public int getTableCount() throws MetaException { - return getObjectCount("tableName", MTable.class.getName()); - } - - private int getObjectCount(String fieldName, String objName) { - Long result = 0L; - boolean commited = false; - Query query = null; - try { - openTransaction(); - String queryStr = - "select count(" + fieldName + ") from " + objName; - query = pm.newQuery(queryStr); - result = (Long) query.execute(); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); - } - return result.intValue(); - } - - @Override - public List getTableMeta(String catName, String dbNames, String tableNames, - List tableTypes) throws MetaException { - - boolean commited = false; - Query query = null; - List metas = new ArrayList<>(); - try { - openTransaction(); - // Take the pattern and split it on the | to get all the composing - // patterns - StringBuilder filterBuilder = new StringBuilder(); - List parameterVals = new ArrayList<>(); - appendSimpleCondition(filterBuilder, "database.catalogName", new String[] {catName}, parameterVals); - if (dbNames != null && !dbNames.equals("*")) { - appendPatternCondition(filterBuilder, "database.name", dbNames, parameterVals); - } - if (tableNames != null && !tableNames.equals("*")) { - appendPatternCondition(filterBuilder, "tableName", tableNames, parameterVals); - } - if (tableTypes != null && !tableTypes.isEmpty()) { - appendSimpleCondition(filterBuilder, "tableType", tableTypes.toArray(new String[0]), parameterVals); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("getTableMeta with filter " + filterBuilder + " params: " + - StringUtils.join(parameterVals, ", ")); - } - // Add the fetch group here which retrieves the database object along with the MTable - // objects. If we don't prefetch the database object, we could end up in a situation where - // the database gets dropped while we are looping through the tables throwing a - // JDOObjectNotFoundException. This causes HMS to go into a retry loop which greatly degrades - // performance of this function when called with dbNames="*" and tableNames="*" (fetch all - // tables in all databases, essentially a full dump) - pm.getFetchPlan().addGroup(FetchGroups.FETCH_DATABASE_ON_MTABLE); - query = pm.newQuery(MTable.class, filterBuilder.toString()) ; - query.setResult("database.name, tableName, tableType, parameters.get(\"comment\"), owner, ownerType"); - List tables = (List) query.executeWithArray(parameterVals.toArray(new String[0])); - for (Object[] table : tables) { - TableMeta metaData = new TableMeta(table[0].toString(), table[1].toString(), table[2].toString()); - metaData.setCatName(catName); - if (table[3] != null) { - metaData.setComments(table[3].toString()); - } - if (table[4] != null) { - metaData.setOwnerName(table[4].toString()); - } - if (table[5] != null) { - metaData.setOwnerType(getPrincipalTypeFromStr(table[5].toString())); - } - metas.add(metaData); - } - commited = commitTransaction(); - } finally { - pm.getFetchPlan().removeGroup(FetchGroups.FETCH_DATABASE_ON_MTABLE); - rollbackAndCleanup(commited, query); - } - return metas; - } - - protected StringBuilder appendPatternCondition(StringBuilder builder, - String fieldName, String elements, List parameters) { - elements = normalizeIdentifier(elements); - return appendCondition(builder, fieldName, elements.split("\\|"), true, parameters); - } - - private StringBuilder appendSimpleCondition(StringBuilder builder, - String fieldName, String[] elements, List parameters) { - return appendCondition(builder, fieldName, elements, false, parameters); - } - - private StringBuilder appendCondition(StringBuilder builder, - String fieldName, String[] elements, boolean pattern, List parameters) { - if (builder.length() > 0) { - builder.append(" && "); - } - builder.append(" ("); - int length = builder.length(); - for (String element : elements) { - if (pattern) { - element = element.replaceAll("\\*", ".*"); - } - parameters.add(element); - if (builder.length() > length) { - builder.append(" || "); - } - builder.append(fieldName); - if (pattern) { - builder.append(".matches(").append(JDO_PARAM).append(parameters.size()).append(")"); - } else { - builder.append(" == ").append(JDO_PARAM).append(parameters.size()); - } - } - builder.append(" )"); - return builder; - } - - @Override - public List getAllTables(String catName, String dbName) throws MetaException { - return getTables(catName, dbName, ".*"); - } - - class AttachedMTableInfo { - MTable mtbl; - MColumnDescriptor mcd; - - public AttachedMTableInfo() {} - - public AttachedMTableInfo(MTable mtbl, MColumnDescriptor mcd) { - this.mtbl = mtbl; - this.mcd = mcd; - } - } - - private AttachedMTableInfo getMTable(String catName, String db, String table, - boolean retrieveCD) { - AttachedMTableInfo nmtbl = new AttachedMTableInfo(); - MTable mtbl = null; - boolean commited = false; - Query query = null; - try { - openTransaction(); - catName = normalizeIdentifier(Optional.ofNullable(catName).orElse(getDefaultCatalog(conf))); - db = normalizeIdentifier(db); - table = normalizeIdentifier(table); - query = pm.newQuery(MTable.class, - "tableName == table && database.name == db && database.catalogName == catname"); - query.declareParameters( - "java.lang.String table, java.lang.String db, java.lang.String catname"); - query.setUnique(true); - if (LOG.isDebugEnabled()) { - LOG.debug("Executing getMTable for {}", - TableName.getQualified(catName, db, table)); - } - mtbl = (MTable) query.execute(table, db, catName); - pm.retrieve(mtbl); - // Retrieving CD can be expensive and unnecessary, so do it only when required. - if (mtbl != null && retrieveCD) { - pm.retrieve(mtbl.getSd()); - pm.retrieveAll(mtbl.getSd().getCD()); - nmtbl.mcd = mtbl.getSd().getCD(); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); - } - nmtbl.mtbl = mtbl; - return nmtbl; - } - - private MCreationMetadata getCreationMetadata(String catName, String dbName, String tblName) { - boolean commited = false; - MCreationMetadata mcm = null; - Query query = null; - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - try { - openTransaction(); - query = pm.newQuery( - MCreationMetadata.class, "tblName == table && dbName == db && catalogName == cat"); - query.declareParameters("java.lang.String table, java.lang.String db, java.lang.String cat"); - query.setUnique(true); - mcm = (MCreationMetadata) query.execute(tblName, dbName, catName); - pm.retrieve(mcm); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); - } - return mcm; + nmtbl.mtbl = mtbl; + return nmtbl; } private MTable getMTable(String catName, String db, String table) { @@ -2126,111 +1607,15 @@ private MTable getMTable(String catName, String db, String table) { return nmtbl.mtbl; } - @Override - public List
getTableObjectsByName(String catName, String db, List tbl_names, - GetProjectionsSpec projectionSpec, String tablePattern) throws MetaException, UnknownDBException { - List
tables = new ArrayList<>(); - boolean committed = false; - Query query = null; - List mtables = null; - - try { - openTransaction(); - catName = normalizeIdentifier(catName); - - List lowered_tbl_names = normalizeIdentifiers(tbl_names); - StringBuilder filterBuilder = new StringBuilder(); - List parameterVals = new ArrayList<>(); - appendPatternCondition(filterBuilder, "database.name", db, parameterVals); - appendSimpleCondition(filterBuilder, "database.catalogName", new String[] {catName}, parameterVals); - if(tbl_names != null){ - appendSimpleCondition(filterBuilder, "tableName", lowered_tbl_names.toArray(new String[0]), parameterVals); - } - if(tablePattern != null){ - appendPatternCondition(filterBuilder, "tableName", tablePattern, parameterVals); - } - query = pm.newQuery(MTable.class, filterBuilder.toString()) ; - List projectionFields = null; - - // If a projection specification has been set, validate it and translate it to JDO columns. - if (projectionSpec != null) { - //Validate the projection fields for multi-valued fields. - projectionFields = TableFields.getMFieldNames(projectionSpec.getFieldList()); - } - - // If the JDO translation resulted in valid JDO columns names, use it to create a projection for the JDO query. - if (projectionFields != null) { - // fetch partially filled tables using result clause - query.setResult(Joiner.on(',').join(projectionFields)); - } - - if (projectionFields == null) { - mtables = (List) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); - } else { - if (projectionFields.size() > 1) { - // Execute the query to fetch the partial results. - List results = (List) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); - // Declare the tables array to return the list of tables - mtables = new ArrayList<>(results.size()); - // Iterate through each row of the result and create the MTable object. - for (Object[] row : results) { - MTable mtable = new MTable(); - int i = 0; - for (Object val : row) { - MetaStoreServerUtils.setNestedProperty(mtable, projectionFields.get(i), val, true); - i++; - } - mtables.add(mtable); - } - } else if (projectionFields.size() == 1) { - // Execute the query to fetch the partial results. - List results = (List) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); - // Iterate through each row of the result and create the MTable object. - mtables = new ArrayList<>(results.size()); - for (Object row : results) { - MTable mtable = new MTable(); - MetaStoreServerUtils.setNestedProperty(mtable, projectionFields.get(0), row, true); - mtables.add(mtable); - } - } - } - - if (mtables == null || mtables.isEmpty()) { - ensureGetDatabase(catName, db); - } else { - for (Iterator iter = mtables.iterator(); iter.hasNext(); ) { - Table tbl = convertToTable((MTable) iter.next()); - // Retrieve creation metadata if needed - if (TableType.MATERIALIZED_VIEW.toString().equals(tbl.getTableType())) { - tbl.setCreationMetadata( - convertToCreationMetadata( - getCreationMetadata(tbl.getCatName(), tbl.getDbName(), tbl.getTableName()))); - } - tables.add(tbl); - } - } - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, query); - } - return tables; - } - - @Override - public List
getTableObjectsByName(String catName, String db, List tbl_names) - throws MetaException, UnknownDBException { - return getTableObjectsByName(catName, db, tbl_names, null, null); - } - /** Makes shallow copy of a list to avoid DataNucleus mucking with our objects. */ - private List convertList(List dnList) { + private static List convertList(List dnList) { return (dnList == null) ? null : Lists.newArrayList(dnList); } /** Makes shallow copy of a map to avoid DataNucleus mucking with our objects. */ - private Map convertMap(Map dnMap, GetPartitionsArgs... args) { + private static Map convertMap(Map dnMap, Configuration conf, GetPartitionsArgs... args) { Map parameters = MetaStoreServerUtils.trimMapNulls(dnMap, - MetastoreConf.getBoolVar(getConf(), ConfVars.ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS)); + MetastoreConf.getBoolVar(conf, ConfVars.ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS)); if (parameters != null && args != null && args.length == 1) { // Pattern matching in Java might be different from the one used by the metastore backends, // An underscore (_) in pattern stands for (matches) any single character; @@ -2262,7 +1647,7 @@ private Map convertMap(Map dnMap, GetPartitionsA return parameters; } - private Table convertToTable(MTable mtbl) throws MetaException { + public static Table convertToTable(MTable mtbl, Configuration conf) throws MetaException { if (mtbl == null) { return null; } @@ -2284,11 +1669,11 @@ private Table convertToTable(MTable mtbl) throws MetaException { viewExpandedText = mtbl.getViewExpandedText(); } } - Map parameters = convertMap(mtbl.getParameters()); + Map parameters = convertMap(mtbl.getParameters(), conf); boolean isAcidTable = TxnUtils.isAcidTable(parameters); final Table t = new Table(mtbl.getTableName(), mtbl.getDatabase() != null ? mtbl.getDatabase().getName() : null, mtbl.getOwner(), mtbl.getCreateTime(), mtbl.getLastAccessTime(), mtbl.getRetention(), - convertToStorageDescriptor(mtbl.getSd(), false, isAcidTable), + convertToStorageDescriptor(mtbl.getSd(), false, isAcidTable, conf), convertToFieldSchemas(mtbl.getPartitionKeys()), parameters, viewOriginalText, viewExpandedText, tableType); @@ -2307,7 +1692,7 @@ private Table convertToTable(MTable mtbl) throws MetaException { return t; } - private MTable convertToMTable(Table tbl) throws InvalidObjectException, + public static MTable convertToMTable(Table tbl, RawStore base) throws InvalidObjectException, MetaException { // NOTE: we don't set writeId in this method. Write ID is only set after validating the // existing write ID against the caller's valid list. @@ -2315,9 +1700,9 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, return null; } MDatabase mdb = null; - String catName = tbl.isSetCatName() ? tbl.getCatName() : getDefaultCatalog(conf); + String catName = tbl.isSetCatName() ? tbl.getCatName() : getDefaultCatalog(base.getConf()); try { - mdb = getMDatabase(catName, tbl.getDbName()); + mdb = base.ensureGetMDatabase(catName, tbl.getDbName()); } catch (NoSuchObjectException e) { LOG.error("Could not convert to MTable", e); throw new InvalidObjectException("Database " + @@ -2351,7 +1736,7 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, tableType); } - private List convertToMFieldSchemas(List keys) { + private static List convertToMFieldSchemas(List keys) { List mkeys = null; if (keys != null) { mkeys = new ArrayList<>(keys.size()); @@ -2363,7 +1748,7 @@ private List convertToMFieldSchemas(List keys) { return mkeys; } - protected List convertToFieldSchemas(List mkeys) { + public static List convertToFieldSchemas(List mkeys) { List keys = null; if (mkeys != null) { keys = new ArrayList<>(); @@ -2375,7 +1760,7 @@ protected List convertToFieldSchemas(List mkeys) { return keys; } - private List convertToMOrders(List keys) { + private static List convertToMOrders(List keys) { List mkeys = null; if (keys != null) { mkeys = new ArrayList<>(); @@ -2386,7 +1771,7 @@ private List convertToMOrders(List keys) { return mkeys; } - private List convertToOrders(List mkeys) { + private static List convertToOrders(List mkeys) { List keys = null; if (mkeys != null) { keys = new ArrayList<>(); @@ -2397,7 +1782,8 @@ private List convertToOrders(List mkeys) { return keys; } - private SerDeInfo convertToSerDeInfo(MSerDeInfo ms, boolean allowNull) throws MetaException { + private static SerDeInfo convertToSerDeInfo(MSerDeInfo ms, Configuration conf, boolean allowNull) + throws MetaException { if (ms == null) { if (allowNull) { return null; @@ -2405,7 +1791,7 @@ private SerDeInfo convertToSerDeInfo(MSerDeInfo ms, boolean allowNull) throws Me throw new MetaException("Invalid SerDeInfo object"); } SerDeInfo serde = - new SerDeInfo(ms.getName(), ms.getSerializationLib(), convertMap(ms.getParameters())); + new SerDeInfo(ms.getName(), ms.getSerializationLib(), convertMap(ms.getParameters(), conf)); if (ms.getDescription() != null) { serde.setDescription(ms.getDescription()); } @@ -2421,7 +1807,7 @@ private SerDeInfo convertToSerDeInfo(MSerDeInfo ms, boolean allowNull) throws Me return serde; } - private MSerDeInfo convertToMSerDeInfo(SerDeInfo ms) throws MetaException { + private static MSerDeInfo convertToMSerDeInfo(SerDeInfo ms) throws MetaException { if (ms == null) { throw new MetaException("Invalid SerDeInfo object"); } @@ -2435,15 +1821,15 @@ private MSerDeInfo convertToMSerDeInfo(SerDeInfo ms) throws MetaException { * @param cols the columns the column descriptor contains * @return a new column descriptor db-backed object */ - private MColumnDescriptor createNewMColumnDescriptor(List cols) { + private static MColumnDescriptor createNewMColumnDescriptor(List cols) { if (cols == null) { return null; } return new MColumnDescriptor(cols); } - private StorageDescriptor convertToStorageDescriptor( - MStorageDescriptor msd, boolean noFS, boolean isAcidTable) throws MetaException { + private static StorageDescriptor convertToStorageDescriptor( + MStorageDescriptor msd, boolean noFS, boolean isAcidTable, Configuration conf) throws MetaException { if (msd == null) { return null; } @@ -2457,11 +1843,11 @@ private StorageDescriptor convertToStorageDescriptor( List bucList = convertList(msd.getBucketCols()); SkewedInfo skewedInfo = null; - Map sdParams = isAcidTable ? Collections.emptyMap() : convertMap(msd.getParameters()); + Map sdParams = isAcidTable ? Collections.emptyMap() : convertMap(msd.getParameters(), conf); StorageDescriptor sd = new StorageDescriptor(convertToFieldSchemas(mFieldSchemas), msd.getLocation(), msd.getInputFormat(), msd.getOutputFormat(), msd .isCompressed(), msd.getNumBuckets(), - (!isAcidTable) ? convertToSerDeInfo(msd.getSerDeInfo(), true) + (!isAcidTable) ? convertToSerDeInfo(msd.getSerDeInfo(), conf, true) : new SerDeInfo(msd.getSerDeInfo().getName(), msd.getSerDeInfo().getSerializationLib(), Collections.emptyMap()), bucList , orderList, sdParams); if (!isAcidTable) { @@ -2480,7 +1866,7 @@ private StorageDescriptor convertToStorageDescriptor( /** * Convert a list of MStringList to a list of list string */ - private List> convertToSkewedValues(List mLists) { + private static List> convertToSkewedValues(List mLists) { List> lists = null; if (mLists != null) { lists = new ArrayList<>(); @@ -2491,7 +1877,7 @@ private List> convertToSkewedValues(List mLists) { return lists; } - private List convertToMStringLists(List> mLists) { + private static List convertToMStringLists(List> mLists) { List lists = null ; if (null != mLists) { lists = new ArrayList<>(); @@ -2505,7 +1891,7 @@ private List convertToMStringLists(List> mLists) { /** * Convert a MStringList Map to a Map */ - private Map, String> covertToSkewedMap(Map mMap) { + private static Map, String> covertToSkewedMap(Map mMap) { Map, String> map = null; if (mMap != null) { map = new HashMap<>(); @@ -2520,7 +1906,7 @@ private Map, String> covertToSkewedMap(Map mMa /** * Covert a Map to a MStringList Map */ - private Map covertToMapMStringList(Map, String> mMap) { + private static Map covertToMapMStringList(Map, String> mMap) { Map map = null; if (mMap != null) { map = new HashMap<>(); @@ -2538,7 +1924,7 @@ private Map covertToMapMStringList(Map, String * @param sd the storage descriptor to wrap in a db-backed object * @return the storage descriptor db-backed object */ - private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd) + private static MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd) throws MetaException { if (sd == null) { return null; @@ -2555,7 +1941,7 @@ private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd) * @param mcd the db-backed column descriptor * @return the db-backed storage descriptor object */ - private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd, + private static MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd, MColumnDescriptor mcd) throws MetaException { if (sd == null) { return null; @@ -2573,29 +1959,34 @@ private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd, .getSkewedColValueLocationMaps()), sd.isStoredAsSubDirectories()); } - private MCreationMetadata convertToMCreationMetadata(CreationMetadata m) { + public static MCreationMetadata convertToMCreationMetadata(CreationMetadata m, RawStore base) + throws MetaException { if (m == null) { return null; } assert !m.isSetMaterializationTime(); - Set tablesUsed = new HashSet<>(); - if (m.isSetSourceTables()) { - for (SourceTable sourceTable : m.getSourceTables()) { - tablesUsed.add(convertToSourceTable(m.getCatName(), sourceTable)); - } - } else { - for (String fullyQualifiedName : m.getTablesUsed()) { - tablesUsed.add(convertToSourceTable(m.getCatName(), fullyQualifiedName)); + try { + Set tablesUsed = new HashSet<>(); + if (m.isSetSourceTables()) { + for (SourceTable sourceTable : m.getSourceTables()) { + tablesUsed.add(convertToSourceTable(m.getCatName(), sourceTable, base)); + } + } else { + for (String fullyQualifiedName : m.getTablesUsed()) { + tablesUsed.add(convertToSourceTable(m.getCatName(), fullyQualifiedName, base)); + } } + return new MCreationMetadata(normalizeIdentifier(m.getCatName()), normalizeIdentifier(m.getDbName()), + normalizeIdentifier(m.getTblName()), tablesUsed, m.getValidTxnList(), System.currentTimeMillis()); + } catch (NoSuchObjectException nse) { + throw new MetaException(nse.getMessage()); } - return new MCreationMetadata(normalizeIdentifier(m.getCatName()), - normalizeIdentifier(m.getDbName()), normalizeIdentifier(m.getTblName()), - tablesUsed, m.getValidTxnList(), System.currentTimeMillis()); } - private MMVSource convertToSourceTable(String catalog, SourceTable sourceTable) { + public static MMVSource convertToSourceTable(String catalog, SourceTable sourceTable, RawStore base) + throws NoSuchObjectException { Table table = sourceTable.getTable(); - MTable mtbl = getMTable(catalog, table.getDbName(), table.getTableName(), false).mtbl; + MTable mtbl = base.ensureGetMTable(catalog, table.getDbName(), table.getTableName()); MMVSource source = new MMVSource(); source.setTable(mtbl); source.setInsertedCount(sourceTable.getInsertedCount()); @@ -2608,16 +1999,17 @@ private MMVSource convertToSourceTable(String catalog, SourceTable sourceTable) * This method resets the stats to 0 and supports only backward compatibility with clients does not * send {@link SourceTable} instances. * - * Use {@link ObjectStore#convertToSourceTable(String, SourceTable)} instead. + * Use {@link ObjectStore#convertToSourceTable(String, SourceTable, RawStore)} instead. * * @param catalog Catalog name where source table is located * @param fullyQualifiedTableName fully qualified name of source table * @return {@link MMVSource} instance represents this source table. */ @Deprecated - private MMVSource convertToSourceTable(String catalog, String fullyQualifiedTableName) { + private static MMVSource convertToSourceTable(String catalog, String fullyQualifiedTableName, RawStore base) + throws NoSuchObjectException { String[] names = fullyQualifiedTableName.split("\\."); - MTable mtbl = getMTable(catalog, names[0], names[1], false).mtbl; + MTable mtbl = base.ensureGetMTable(catalog, names[0], names[1]); MMVSource source = new MMVSource(); source.setTable(mtbl); source.setInsertedCount(0L); @@ -2626,30 +2018,38 @@ private MMVSource convertToSourceTable(String catalog, String fullyQualifiedTabl return source; } - private CreationMetadata convertToCreationMetadata(MCreationMetadata s) throws MetaException { + public static CreationMetadata convertToCreationMetadata(MCreationMetadata s, RawStore base) + throws MetaException { if (s == null) { return null; } - Set tablesUsed = new HashSet<>(); - List sourceTables = new ArrayList<>(s.getTables().size()); - for (MMVSource mtbl : s.getTables()) { - tablesUsed.add(Warehouse.getQualifiedName(mtbl.getTable().getDatabase().getName(), mtbl.getTable().getTableName())); - sourceTables.add(convertToSourceTable(mtbl, s.getCatalogName())); - } - CreationMetadata r = new CreationMetadata(s.getCatalogName(), - s.getDbName(), s.getTblName(), tablesUsed); - r.setMaterializationTime(s.getMaterializationTime()); - if (s.getTxnList() != null) { - r.setValidTxnList(s.getTxnList()); + try { + Set tablesUsed = new HashSet<>(); + List sourceTables = new ArrayList<>(s.getTables().size()); + for (MMVSource mtbl : s.getTables()) { + tablesUsed.add( + Warehouse.getQualifiedName(mtbl.getTable().getDatabase().getName(), mtbl.getTable().getTableName())); + sourceTables.add(convertToSourceTable(mtbl, s.getCatalogName(), base)); + } + CreationMetadata r = new CreationMetadata(s.getCatalogName(), s.getDbName(), s.getTblName(), tablesUsed); + r.setMaterializationTime(s.getMaterializationTime()); + if (s.getTxnList() != null) { + r.setValidTxnList(s.getTxnList()); + } + r.setSourceTables(sourceTables); + return r; + } catch (NoSuchObjectException nse) { + throw new MetaException(nse.getMessage()); } - r.setSourceTables(sourceTables); - return r; } - private SourceTable convertToSourceTable(MMVSource mmvSource, String catalogName) throws MetaException { + private static SourceTable convertToSourceTable(MMVSource mmvSource, String catalogName, RawStore base) + throws MetaException, NoSuchObjectException { SourceTable sourceTable = new SourceTable(); MTable mTable = mmvSource.getTable(); - Table table = getTable(catalogName, mTable.getDatabase().getName(), mTable.getTableName()); + Table table = + convertToTable(base.ensureGetMTable(catalogName, mTable.getDatabase().getName(), mTable.getTableName()), + base.getConf()); sourceTable.setTable(table); sourceTable.setInsertedCount(mmvSource.getInsertedCount()); sourceTable.setUpdatedCount(mmvSource.getUpdatedCount()); @@ -2657,292 +2057,39 @@ private SourceTable convertToSourceTable(MMVSource mmvSource, String catalogName return sourceTable; } - @Override - public boolean addPartitions(String catName, String dbName, String tblName, List parts) + /** + * Convert a Partition object into an MPartition, which is an object backed by the db + * If the Partition's set of columns is the same as the parent table's AND useTableCD + * is true, then this partition's storage descriptor's column descriptor will point + * to the same one as the table's storage descriptor. + * @param part the partition to convert + * @param mt the parent table object + * @return the model partition object, and null if the input partition is null. + */ + public static MPartition convertToMPart(Partition part, MTable mt) throws InvalidObjectException, MetaException { - boolean success = false; - openTransaction(); - try { - addPartitionsInternal(catName, dbName, tblName, parts); - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); + // NOTE: we don't set writeId in this method. Write ID is only set after validating the + // existing write ID against the caller's valid list. + if (part == null) { + return null; } - return success; - } - - private void addPartitionsInternal(String catName, String dbName, - String tblName, List parts) - throws MetaException, InvalidObjectException { - List tabGrants = null; - List tabColumnGrants = null; - MTable table = this.getMTable(catName, dbName, tblName); - if (table == null) { - throw new InvalidObjectException("Unable to add partitions because " - + TableName.getQualified(catName, dbName, tblName) + - " does not exist"); - } - if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { - tabGrants = this.listAllTableGrants(catName, dbName, tblName); - tabColumnGrants = this.listTableAllColumnGrants(catName, dbName, tblName); - } - List mParts = new ArrayList<>(); - List> mPartPrivilegesList = new ArrayList<>(); - List> mPartColPrivilegesList = new ArrayList<>(); - for (Partition part : parts) { - if (!part.getTableName().equalsIgnoreCase(tblName) || !part.getDbName().equalsIgnoreCase(dbName)) { - throw new MetaException("Partition does not belong to target table " - + dbName + "." + tblName + ": " + part); - } - MPartition mpart = convertToMPart(part, table); - mParts.add(mpart); - int now = (int) (System.currentTimeMillis() / 1000); - List mPartPrivileges = new ArrayList<>(); - if (tabGrants != null) { - for (MTablePrivilege tab: tabGrants) { - MPartitionPrivilege mPartPrivilege = new MPartitionPrivilege(tab.getPrincipalName(), tab.getPrincipalType(), - mpart, tab.getPrivilege(), now, tab.getGrantor(), tab.getGrantorType(), tab.getGrantOption(), - tab.getAuthorizer()); - mPartPrivileges.add(mPartPrivilege); - } - } - - List mPartColumnPrivileges = new ArrayList<>(); - if (tabColumnGrants != null) { - for (MTableColumnPrivilege col : tabColumnGrants) { - MPartitionColumnPrivilege mPartColumnPrivilege = new MPartitionColumnPrivilege(col.getPrincipalName(), - col.getPrincipalType(), mpart, col.getColumnName(), col.getPrivilege(), now, col.getGrantor(), - col.getGrantorType(), col.getGrantOption(), col.getAuthorizer()); - mPartColumnPrivileges.add(mPartColumnPrivilege); - } - } - mPartPrivilegesList.add(mPartPrivileges); - mPartColPrivilegesList.add(mPartColumnPrivileges); + if (mt == null) { + throw new InvalidObjectException( + "Partition doesn't have a valid table or database name"); } - if (CollectionUtils.isNotEmpty(mParts)) { - GetHelper helper = new GetHelper(null, null, null, true, true) { - @Override - protected Void getSqlResult(GetHelper ctx) throws MetaException { - directSql.addPartitions(mParts, mPartPrivilegesList, mPartColPrivilegesList); - return null; - } - - @Override - protected Void getJdoResult(GetHelper ctx) { - List toPersist = new ArrayList<>(mParts); - mPartPrivilegesList.forEach(toPersist::addAll); - mPartColPrivilegesList.forEach(toPersist::addAll); - pm.makePersistentAll(toPersist); - pm.flush(); - return null; - } - @Override - protected String describeResult() { - return "add partitions"; - } - }; - try { - helper.run(false); - } catch (NoSuchObjectException e) { - throw newMetaException(e); - } - } - } - - @Override - public boolean addPartition(Partition part) throws InvalidObjectException, - MetaException { - boolean committed = false; - try { - openTransaction(); - String catName = part.isSetCatName() ? part.getCatName() : getDefaultCatalog(conf); - addPartitionsInternal(catName, part.getDbName(), part.getTableName(), Arrays.asList(part)); - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, null); - } - return committed; - } - - @Override - public Partition getPartition(String catName, String dbName, String tableName, - List part_vals) throws NoSuchObjectException, MetaException { - return getPartition(catName, dbName, tableName, part_vals, null); - } - - @Override - public Partition getPartition(String catName, String dbName, String tableName, - List part_vals, - String validWriteIds) - throws NoSuchObjectException, MetaException { - Partition part = null; - boolean committed = false; - try { - openTransaction(); - MTable table = this.getMTable(catName, dbName, tableName); - if (table == null) { - throw new NoSuchObjectException("Unable to get partition because " - + TableName.getQualified(catName, dbName, tableName) + - " does not exist"); - } - MPartition mpart = getMPartition(catName, dbName, tableName, part_vals, table); - part = convertToPart(catName, dbName, tableName, mpart, - TxnUtils.isAcidTable(table.getParameters())); - committed = commitTransaction(); - if (part == null) { - throw new NoSuchObjectException("partition values=" - + part_vals.toString()); - } - - part.setValues(part_vals); - // If transactional table partition, check whether the current version partition - // statistics in the metastore comply with the client query's snapshot isolation. - long statsWriteId = mpart.getWriteId(); - if (TxnUtils.isTransactionalTable(table.getParameters())) { - if (!areTxnStatsSupported) { - // Do not make persistent the following state since it is query specific (not global). - StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); - } else if (validWriteIds != null) { - if (isCurrentStatsValidForTheQuery(part, statsWriteId, validWriteIds, false)) { - part.setIsStatsCompliant(true); - } else { - part.setIsStatsCompliant(false); - // Do not make persistent the following state since it is query specific (not global). - StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); - } - } - } - } finally { - rollbackAndCleanup(committed, (Query)null); - } - return part; - } - - /** - * Getting MPartition object. Use this method only if the partition name is not available, - * since then the table will be queried to get the partition keys. - * @param catName The catalogue - * @param dbName The database - * @param tableName The table - * @param part_vals The values defining the partition - * @return The MPartition object in the backend database - */ - private MPartition getMPartition(String catName, String dbName, String tableName, List part_vals, MTable mtbl) - throws MetaException { - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - boolean committed = false; - MPartition result = null; - try { - openTransaction(); - if (mtbl == null) { - mtbl = getMTable(catName, dbName, tableName); - if (mtbl == null) { - return null; - } - } - // Change the query to use part_vals instead of the name which is - // redundant TODO: callers of this often get part_vals out of name for no reason... - String name = - Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), part_vals); - result = getMPartition(catName, dbName, tableName, name); - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, (Query)null); - } - return result; - } - - /** - * Getting MPartition object. Use this method if the partition name is available, so we do not - * query the table object again. - * @param catName The catalogue - * @param dbName The database - * @param tableName The table - * @param name The partition name - * @return The MPartition object in the backend database - */ - private MPartition getMPartition(String catName, String dbName, String tableName, - String name) throws MetaException { - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - List mparts = null; - MPartition ret = null; - boolean commited = false; - Query query = null; - try { - openTransaction(); - query = - pm.newQuery(MPartition.class, - "table.tableName == t1 && table.database.name == t2 && partitionName == t3 " + - " && table.database.catalogName == t4"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " - + "java.lang.String t4"); - mparts = (List) query.executeWithArray(tableName, dbName, name, catName); - pm.retrieveAll(mparts); - commited = commitTransaction(); - // We need to compare partition name with requested name since some DBs - // (like MySQL, Derby) considers 'a' = 'a ' whereas others like (Postgres, - // Oracle) doesn't exhibit this problem. - if (CollectionUtils.isNotEmpty(mparts)) { - if (mparts.size() > 1) { - throw new MetaException( - "Expecting only one partition but more than one partitions are found."); - } else { - MPartition mpart = mparts.get(0); - if (name.equals(mpart.getPartitionName())) { - ret = mpart; - } else { - throw new MetaException("Expecting a partition with name " + name - + ", but metastore is returning a partition with name " + mpart.getPartitionName() - + "."); - } - } - } - } finally { - rollbackAndCleanup(commited, query); - } - return ret; - } - - /** - * Convert a Partition object into an MPartition, which is an object backed by the db - * If the Partition's set of columns is the same as the parent table's AND useTableCD - * is true, then this partition's storage descriptor's column descriptor will point - * to the same one as the table's storage descriptor. - * @param part the partition to convert - * @param mt the parent table object - * @return the model partition object, and null if the input partition is null. - */ - private MPartition convertToMPart(Partition part, MTable mt) - throws InvalidObjectException, MetaException { - // NOTE: we don't set writeId in this method. Write ID is only set after validating the - // existing write ID against the caller's valid list. - if (part == null) { - return null; - } - if (mt == null) { - throw new InvalidObjectException( - "Partition doesn't have a valid table or database name"); - } - - // If this partition's set of columns is the same as the parent table's, - // use the parent table's, so we do not create a duplicate column descriptor, - // thereby saving space - MStorageDescriptor msd; - if (mt.getSd() != null && mt.getSd().getCD() != null && - mt.getSd().getCD().getCols() != null && - part.getSd() != null && - convertToFieldSchemas(mt.getSd().getCD().getCols()). - equals(part.getSd().getCols())) { - msd = convertToMStorageDescriptor(part.getSd(), mt.getSd().getCD()); - } else { - msd = convertToMStorageDescriptor(part.getSd()); + // If this partition's set of columns is the same as the parent table's, + // use the parent table's, so we do not create a duplicate column descriptor, + // thereby saving space + MStorageDescriptor msd; + if (mt.getSd() != null && mt.getSd().getCD() != null && + mt.getSd().getCD().getCols() != null && + part.getSd() != null && + convertToFieldSchemas(mt.getSd().getCD().getCols()). + equals(part.getSd().getCols())) { + msd = convertToMStorageDescriptor(part.getSd(), mt.getSd().getCD()); + } else { + msd = convertToMStorageDescriptor(part.getSd()); } return new MPartition(Warehouse.makePartName(convertToFieldSchemas(mt @@ -2951,8 +2098,8 @@ private MPartition convertToMPart(Partition part, MTable mt) msd, part.getParameters()); } - private Partition convertToPart(String catName, String dbName, String tblName, - MPartition mpart, boolean isAcidTable, GetPartitionsArgs... args) + public static Partition convertToPart(String catName, String dbName, String tblName, + MPartition mpart, boolean isAcidTable, Configuration conf, GetPartitionsArgs... args) throws MetaException { if (mpart == null) { return null; @@ -2960,11 +2107,11 @@ private Partition convertToPart(String catName, String dbName, String tblName, catName = normalizeIdentifier(catName); dbName = normalizeIdentifier(dbName); tblName = normalizeIdentifier(tblName); - Map params = convertMap(mpart.getParameters(), args); + Map params = convertMap(mpart.getParameters(), conf, args); boolean noFS = args != null && args.length == 1 && args[0].isSkipColumnSchemaForPartition(); Partition p = new Partition(convertList(mpart.getValues()), dbName, tblName, mpart.getCreateTime(), mpart.getLastAccessTime(), - convertToStorageDescriptor(mpart.getSd(), noFS, isAcidTable), params); + convertToStorageDescriptor(mpart.getSd(), noFS, isAcidTable, conf), params); p.setCatName(catName); if(mpart.getWriteId()>0) { p.setWriteId(mpart.getWriteId()); @@ -2974,6192 +2121,1263 @@ private Partition convertToPart(String catName, String dbName, String tblName, return p; } - @Override - public boolean dropPartition(String catName, String dbName, String tableName, String partName) - throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { - boolean success = false; - try { - openTransaction(); - dropPartitionsInternal(catName, dbName, tableName, Arrays.asList(partName), true, true); - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); + public static List convertToParts(String catName, String dbName, String tblName, + List mparts, boolean isAcidTable, Configuration conf, GetPartitionsArgs args) + throws MetaException { + List parts = new ArrayList<>(mparts.size()); + for (MPartition mp : mparts) { + parts.add(convertToPart(catName, dbName, tblName, mp, isAcidTable, conf, args)); + Deadline.checkTimeout(); } - return success; - } - - @Override - public void dropPartitions(String catName, String dbName, String tblName, List partNames) - throws MetaException, NoSuchObjectException { - dropPartitionsInternal(catName, dbName, tblName, partNames, true, true); + return parts; } - @VisibleForTesting - void dropPartitionsInternal(String catName, String dbName, String tblName, - List partNames, boolean allowSql, boolean allowJdo) - throws MetaException, NoSuchObjectException { - if (CollectionUtils.isEmpty(partNames)) { - return; - } - new GetListHelper(catName, dbName, tblName, allowSql, allowJdo) { - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - directSql.dropPartitionsViaSqlFilter(catName, dbName, tblName, partNames); - return Collections.emptyList(); - } - @Override - protected List getJdoResult(GetHelper> ctx) throws MetaException { - dropPartitionsViaJdo(catName, dbName, tblName, partNames, new AtomicReference<>()); - return Collections.emptyList(); - } - }.run(false); + public static Pair> getPartQueryWithParams( + PersistenceManager pm, + String catName, String dbName, String tblName, + List partNames) { + Query query = pm.newQuery(); + Map params = new HashMap<>(); + String filterStr = getJDOFilterStrForPartitionNames(catName, dbName, tblName, partNames, params); + query.setFilter(filterStr); + LOG.debug(" JDOQL filter is {}", filterStr); + query.declareParameters(makeParameterDeclarationString(params)); + return Pair.of(query, params); } - private void dropPartitionsViaJdo(String catName, String dbName, String tblName, - List partNames, AtomicReference message) throws MetaException { - boolean success = false; - - if (partNames.isEmpty()) { - return; - } - openTransaction(); - - int batch = batchSize == NO_BATCHING ? 1 : (partNames.size() + batchSize) / batchSize; - AtomicLong batchIdx = new AtomicLong(1); - AtomicLong timeSpent = new AtomicLong(0); - try { - Batchable.runBatched(batchSize, partNames, new Batchable() { - @Override - public List run(List input) throws MetaException { - StringBuilder progress = new StringBuilder("Dropping partitions, batch: "); - long start = System.currentTimeMillis(); - progress.append(batchIdx.get()).append("/").append(batch); - if (batchIdx.get() > 1) { - long leftTime = (batch - batchIdx.get()) * timeSpent.get() / batchIdx.get(); - progress.append(", time left: ").append(leftTime).append("ms"); - } - message.set(progress.toString()); - // Delete all things. - dropPartitionGrantsNoTxn(catName, dbName, tblName, input); - dropPartitionAllColumnGrantsNoTxn(catName, dbName, tblName, input); - dropPartitionColumnStatisticsNoTxn(catName, dbName, tblName, input); - - // CDs are reused; go try partition SDs, detach all CDs from SDs, then remove unused CDs. - for (MColumnDescriptor mcd : detachCdsFromSdsNoTxn(catName, dbName, tblName, input)) { - removeUnusedColumnDescriptor(mcd); - } - dropPartitionsNoTxn(catName, dbName, tblName, input); - timeSpent.addAndGet(System.currentTimeMillis() - start); - batchIdx.incrementAndGet(); - return Collections.emptyList(); - } - }); - - if (!(success = commitTransaction())) { - throw new MetaException("Failed to drop partitions"); - } - } finally { - rollbackAndCleanup(success, null); + public static String getJDOFilterStrForPartitionNames(String catName, String dbName, String tblName, + List partNames, Map params) { + StringBuilder sb = new StringBuilder( + "table.tableName == t1 && table.database.name == t2 &&" + " table.database.catalogName == t3 && ("); + params.put("t1", normalizeIdentifier(tblName)); + params.put("t2", normalizeIdentifier(dbName)); + params.put("t3", normalizeIdentifier(catName)); + int n = 0; + for (Iterator itr = partNames.iterator(); itr.hasNext(); ) { + String pn = "p" + n; + n++; + String part = itr.next(); + params.put(pn, part); + sb.append("partitionName == ").append(pn); + sb.append(" || "); } + sb.setLength(sb.length() - 4); // remove the last " || " + sb.append(')'); + return sb.toString(); } - @Override - public List getPartitions(String catName, String dbName, String tableName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - List results = Collections.emptyList(); - boolean success = false; - - LOG.debug("Executing getPartitions"); - - try { - openTransaction(); - results = getPartitionsInternal(catName, dbName, tableName, true, true, args); - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); + public static String makeParameterDeclarationString(Map params) { + //Create the parameter declaration string + StringBuilder paramDecl = new StringBuilder(); + for (String key : params.keySet()) { + paramDecl.append(", java.lang.String ").append(key); } - return results; + return paramDecl.toString(); } - @Override - public Map getPartitionLocations(String catName, String dbName, String tblName, - String baseLocationToNotShow, int max) { - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); + /** Helper class for getting stuff w/transaction, direct SQL, perf logging, etc. */ + @VisibleForTesting + public abstract class GetHelper { + private final boolean isInTxn, doTrace, allowJdo; + private boolean doUseDirectSql; + private long start; + private Table table; + protected final List partitionFields; + protected final String catName, dbName, tblName; + private boolean success = false; + protected T results = null; - boolean success = false; - Query query = null; - Map partLocations = new HashMap<>(); - try { - openTransaction(); - LOG.debug("Executing getPartitionLocations"); - - query = pm.newQuery(MPartition.class); - query.setFilter("this.table.database.catalogName == t1 && this.table.database.name == t2 " - + "&& this.table.tableName == t3"); - query.declareParameters("String t1, String t2, String t3"); - query.setResult("this.partitionName, this.sd.location"); - if (max >= 0) { - //Row limit specified, set it on the Query - query.setRange(0, max); - } - - List result = (List)query.execute(catName, dbName, tblName); - for(Object[] row:result) { - String location = (String)row[1]; - if (baseLocationToNotShow != null && location != null - && FileUtils.isSubdirectory(baseLocationToNotShow, location)) { - location = null; - } - partLocations.put((String)row[0], location); - } - LOG.debug("Done executing query for getPartitionLocations"); - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, query); + public GetHelper(String catalogName, String dbName, String tblName, + boolean allowSql, boolean allowJdo) throws MetaException { + this(catalogName, dbName, tblName, null, allowSql, allowJdo); } - return partLocations; - } - protected List getPartitionsInternal(String catName, String dbName, String tblName, - boolean allowSql, boolean allowJdo, GetPartitionsArgs args) - throws MetaException, NoSuchObjectException { - return new GetListHelper(catName, dbName, tblName, allowSql, allowJdo) { - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql.getPartitions(catName, dbName, tblName, args); - } - @Override - protected List getJdoResult(GetHelper> ctx) throws MetaException { - try { - return convertToParts(catName, dbName, tblName, - listMPartitions(catName, dbName, tblName, args.getMax()), false, args); - } catch (Exception e) { - LOG.error("Failed to convert to parts", e); - throw new MetaException(e.getMessage()); - } + public GetHelper(String catalogName, String dbName, String tblName, + List fields, boolean allowSql, boolean allowJdo) throws MetaException { + assert allowSql || allowJdo; + this.allowJdo = allowJdo; + this.catName = (catalogName != null) ? normalizeIdentifier(catalogName) : null; + this.dbName = (dbName != null) ? normalizeIdentifier(dbName) : null; + this.partitionFields = fields; + if (tblName != null) { + this.tblName = normalizeIdentifier(tblName); + } else { + // tblName can be null in cases of Helper being used at a higher + // abstraction level, such as with datbases + this.tblName = null; + this.table = null; } - }.run(false); - } + this.doTrace = LOG.isDebugEnabled(); + this.isInTxn = isActiveTransaction(); - @Override - public Partition getPartitionWithAuth(String catName, String dbName, String tblName, - List partVals, String user_name, List group_names) - throws NoSuchObjectException, MetaException, InvalidObjectException { - boolean success = false; - try { - openTransaction(); - MPartition mpart = getMPartition(catName, dbName, tblName, partVals, null); - if (mpart == null) { - commitTransaction(); - throw new NoSuchObjectException("partition values=" - + partVals.toString()); + boolean isConfigEnabled = MetastoreConf.getBoolVar(getConf(), ConfVars.TRY_DIRECT_SQL); + if (isConfigEnabled && directSql == null) { + directSql = new MetaStoreDirectSql(pm, getConf(), ""); } - MTable mtbl = mpart.getTable(); - Partition part = convertToPart(catName, dbName, tblName, mpart, TxnUtils.isAcidTable(mtbl.getParameters())); - if ("TRUE".equalsIgnoreCase(mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { - String partName = Warehouse.makePartName(this.convertToFieldSchemas(mtbl - .getPartitionKeys()), partVals); - PrincipalPrivilegeSet partAuth = this.getPartitionPrivilegeSet(catName, dbName, - tblName, partName, user_name, group_names); - part.setPrivileges(partAuth); + if (!allowJdo && isConfigEnabled && !directSql.isCompatibleDatastore()) { + throw new MetaException("SQL is not operational"); // test path; SQL is enabled and broken. } + this.doUseDirectSql = allowSql && isConfigEnabled && directSql.isCompatibleDatastore(); + } - success = commitTransaction(); - return part; - } finally { - rollbackAndCleanup(success, null); - } - } - - private List convertToParts(String catName, String dbName, String tblName, - List mparts, boolean isAcidTable, GetPartitionsArgs args) - throws MetaException { - List parts = new ArrayList<>(mparts.size()); - for (MPartition mp : mparts) { - parts.add(convertToPart(catName, dbName, tblName, mp, isAcidTable, args)); - Deadline.checkTimeout(); - } - return parts; - } - - // TODO:pc implement max - @Override - public List listPartitionNames(String catName, String dbName, String tableName, - short max) throws MetaException { - List pns = null; - boolean success = false; - try { - openTransaction(); - LOG.debug("Executing getPartitionNames"); - pns = getPartitionNamesNoTxn(catName, dbName, tableName, max); - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); + protected boolean canUseDirectSql(GetHelper ctx) throws MetaException { + return true; // By default, assume we can user directSQL - that's kind of the point. } - return pns; - } + protected abstract String describeResult(); + protected abstract T getSqlResult(GetHelper ctx) throws MetaException; + protected abstract T getJdoResult( + GetHelper ctx) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException; - @Override - public List listPartitionNames(final String catName, final String dbName, final String tblName, - final String defaultPartName, final byte[] exprBytes, - final String order, final int maxParts) throws MetaException, NoSuchObjectException { - final String defaultPartitionName = getDefaultPartitionName(defaultPartName); - final boolean isEmptyFilter = exprBytes.length == 1 && exprBytes[0] == -1; - ExpressionTree tmp = null; - if (!isEmptyFilter) { - tmp = PartFilterExprUtil.makeExpressionTree(expressionProxy, exprBytes, - getDefaultPartitionName(defaultPartName), conf); - } - final ExpressionTree exprTree = tmp; - return new GetListHelper(catName, dbName, tblName, true, true) { - private List getPartNamesPrunedByExpr(Table table, boolean isJdoQuery) throws MetaException { - int max = isEmptyFilter ? maxParts : -1; - List result; - if (isJdoQuery) { - result = getPartitionNamesViaOrm(catName, dbName, tblName, ExpressionTree.EMPTY_TREE, - order, max, true, table.getPartitionKeys()); - } else { - SqlFilterForPushdown filter = new SqlFilterForPushdown(table, false); - result = directSql.getPartitionNamesViaSql(filter, table.getPartitionKeys(), - defaultPartitionName, order, max); - } - if (!isEmptyFilter) { - prunePartitionNamesByExpr(catName, dbName, tblName, result, - new GetPartitionsArgs.GetPartitionsArgsBuilder() - .expr(exprBytes).defaultPartName(defaultPartName).max(maxParts).build()); - } - return result; - } - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - SqlFilterForPushdown filter = new SqlFilterForPushdown(ctx.getTable(), false); - List partNames = null; - Table table = ctx.getTable(); - if (exprTree != null) { - if (directSql.generateSqlFilterForPushdown(table.getCatName(), table.getDbName(), table.getTableName(), - ctx.getTable().getPartitionKeys(), exprTree, defaultPartitionName, filter)) { - partNames = directSql.getPartitionNamesViaSql(filter, table.getPartitionKeys(), - defaultPartitionName, order, (int)maxParts); - } - } - if (partNames == null) { - partNames = getPartNamesPrunedByExpr(table, false); - } - return partNames; - } - @Override - protected List getJdoResult( - GetHelper> ctx) throws MetaException, NoSuchObjectException { - List result = null; - if (exprTree != null) { + public T run(boolean initTable) throws MetaException, NoSuchObjectException { + try { + start(initTable); + String savePoint = isInTxn && allowJdo ? "rollback_" + System.nanoTime() : null; + if (doUseDirectSql) { try { - result = getPartitionNamesViaOrm(catName, dbName, tblName, exprTree, order, - maxParts, true, ctx.getTable().getPartitionKeys()); - } catch (MetaException e) { - result = null; + directSql.prepareTxn(); + setTransactionSavePoint(savePoint); + this.results = getSqlResult(this); + LOG.debug("Using direct SQL optimization."); + } catch (Exception ex) { + handleDirectSqlError(ex, savePoint); } } - if (result == null) { - result = getPartNamesPrunedByExpr(ctx.getTable(), true); + // Note that this will be invoked in 2 cases: + // 1) DirectSQL was disabled to start with; + // 2) DirectSQL threw and was disabled in handleDirectSqlError. + if (!doUseDirectSql) { + this.results = getJdoResult(this); + LOG.debug("Not using direct SQL optimization."); } - return result; + return commit(); + } catch (NoSuchObjectException | MetaException ex) { + throw ex; + } catch (Exception ex) { + LOG.error("", ex); + throw new MetaException(ex.getMessage()); + } finally { + close(); } - }.run(true); - } - - @Override - public List listPartitionNamesByFilter(String catName, String dbName, String tblName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - - MTable mTable = ensureGetMTable(catName, dbName, tblName); - List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); - String filter = args.getFilter(); - final ExpressionTree tree = (filter != null && !filter.isEmpty()) - ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; - return new GetListHelper(catName, dbName, tblName, true, true) { - private final SqlFilterForPushdown filter = new SqlFilterForPushdown(); + } - @Override - protected boolean canUseDirectSql(GetHelper> ctx) throws MetaException { - return directSql.generateSqlFilterForPushdown(catName, dbName, tblName, - partitionKeys, tree, null, filter); + private void start(boolean initTable) throws MetaException, NoSuchObjectException { + start = doTrace ? System.nanoTime() : 0; + openTransaction(); + if (initTable && (tblName != null)) { + table = ensureGetTable(catName, dbName, tblName); } + doUseDirectSql = doUseDirectSql && canUseDirectSql(this); + } - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql.getPartitionNamesViaSql(filter, partitionKeys, - getDefaultPartitionName(args.getDefaultPartName()), null, args.getMax()); + private void handleDirectSqlError(Exception ex, String savePoint) throws MetaException, NoSuchObjectException { + String message = null; + try { + message = generateShorterMessage(ex); + } catch (Throwable t) { + message = ex.toString() + "; error building a better message: " + t.getMessage(); } + LOG.warn(message); // Don't log the exception, people just get confused. + LOG.debug("Full DirectSQL callstack for debugging (not an error)", ex); - @Override - protected List getJdoResult(GetHelper> ctx) - throws MetaException, NoSuchObjectException, InvalidObjectException { - return getPartitionNamesViaOrm(catName, dbName, tblName, tree, null, - args.getMax(), true, partitionKeys); + if (!allowJdo || !DatabaseProduct.isRecoverableException(ex)) { + throw ExceptionHandler.newMetaException(ex); } - }.run(false); - } + + if (!isInTxn) { + JDOException rollbackEx = null; + try { + rollbackTransaction(); + } catch (JDOException jex) { + rollbackEx = jex; + } + if (rollbackEx != null) { + // Datanucleus propagates some pointless exceptions and rolls back in the finally. + if (currentTransaction != null && currentTransaction.isActive()) { + throw rollbackEx; // Throw if the tx wasn't rolled back. + } + LOG.info("Ignoring exception, rollback succeeded: " + rollbackEx.getMessage()); + } - private List getPartitionNamesViaOrm(String catName, String dbName, String tblName, - ExpressionTree tree, String order, Integer maxParts, boolean isValidatedFilter, - List partitionKeys) throws MetaException { - Map params = new HashMap(); - String jdoFilter = makeQueryFilterString(catName, dbName, tblName, tree, - params, isValidatedFilter, partitionKeys); - if (jdoFilter == null) { - assert !isValidatedFilter; - throw new MetaException("Failed to generate filter."); - } - - try (QueryWrapper query = new QueryWrapper(pm.newQuery( - "select partitionName from org.apache.hadoop.hive.metastore.model.MPartition"))) { - query.setFilter(jdoFilter); - List orderSpecs = MetaStoreUtils.makeOrderSpecs(order); - StringBuilder builder = new StringBuilder(); - for (Object[] spec : orderSpecs) { - // TODO: order by casted value if the type of partition key is not string - builder.append("values.get(").append(spec[0]).append(") ").append(spec[1]).append(","); - } - if (builder.length() > 0) { - builder.setLength(builder.length() - 1); - query.setOrdering(builder.toString()); + start = doTrace ? System.nanoTime() : 0; + openTransaction(); + if (table != null) { + table = ensureGetTable(catName, dbName, tblName); + } } else { - query.setOrdering("partitionName ascending"); + rollbackTransactionToSavePoint(savePoint); + start = doTrace ? System.nanoTime() : 0; } - if (maxParts > -1) { - query.setRange(0, maxParts); + if (directSqlErrors != null) { + directSqlErrors.inc(); } - String parameterDeclaration = makeParameterDeclarationStringObj(params); - query.declareParameters(parameterDeclaration); - Collection jdoRes = (Collection) query.executeWithMap(params); - List result = new LinkedList(); - for (Object partName : jdoRes) { - result.add((String) partName); + doUseDirectSql = false; + } + + private String generateShorterMessage(Exception ex) { + StringBuilder message = new StringBuilder( + "Falling back to ORM path due to direct SQL failure (this is not an error): "); + Throwable t = ex; + StackTraceElement[] prevStack = null; + while (t != null) { + message.append(t.getMessage()); + StackTraceElement[] stack = t.getStackTrace(); + int uniqueFrames = stack.length - 1; + if (prevStack != null) { + int n = prevStack.length - 1; + while (uniqueFrames >= 0 && n >= 0 && stack[uniqueFrames].equals(prevStack[n])) { + uniqueFrames--; n--; + } + } + for (int i = 0; i <= uniqueFrames; ++i) { + StackTraceElement ste = stack[i]; + message.append(" at ").append(ste); + if (ste.getMethodName().contains("getSqlResult") + && (ste.getFileName() == null || ste.getFileName().contains("ObjectStore"))) { + break; + } + } + prevStack = stack; + t = t.getCause(); + if (t != null) { + message.append(";\n Caused by: "); + } } - return result; + return message.toString(); } - } - private String extractPartitionKey(FieldSchema key, List pkeys) { - StringBuilder buffer = new StringBuilder(256); + private T commit() { + success = commitTransaction(); + if (doTrace) { + double time = ((System.nanoTime() - start) / 1000000.0); + String result = describeResult(); + String retrieveType = doUseDirectSql ? "SQL" : "ORM"; - assert pkeys.size() >= 1; + LOG.debug("{} retrieved using {} in {}ms", result, retrieveType, time); + } + return results; + } - String partKey = "/" + key.getName() + "="; + private void close() { + if (!success) { + rollbackTransaction(); + } + } - // Table is partitioned by single key - if (pkeys.size() == 1 && (pkeys.get(0).getName().matches(key.getName()))) { - buffer.append("partitionName.substring(partitionName.indexOf(\"") - .append(key.getName()).append("=\") + ").append(key.getName().length() + 1) - .append(")"); + public Table getTable() { + return table; + } + } - // First partition key - anything between key= and first / - } else if ((pkeys.get(0).getName().matches(key.getName()))) { + private abstract class GetListHelper extends GetHelper> { + public GetListHelper(String catName, String dbName, String tblName, boolean allowSql, + boolean allowJdo) throws MetaException { + super(catName, dbName, tblName, null, allowSql, allowJdo); + } - buffer.append("partitionName.substring(partitionName.indexOf(\"") - .append(key.getName()).append("=\") + ").append(key.getName().length() + 1).append(", ") - .append("partitionName.indexOf(\"/\")") - .append(")"); + public GetListHelper(String catName, String dbName, String tblName, List fields, + boolean allowSql, boolean allowJdo) throws MetaException { + super(catName, dbName, tblName, fields, allowSql, allowJdo); + } - // Last partition key - anything between /key= and end - } else if ((pkeys.get(pkeys.size() - 1).getName().matches(key.getName()))) { - buffer.append("partitionName.substring(partitionName.indexOf(\"") - .append(partKey).append("\") + ").append(partKey.length()) - .append(")"); + @Override + protected String describeResult() { + return results.size() + " entries"; + } + } - // Intermediate key - anything between /key= and the following / - } else { + @VisibleForTesting + public abstract class GetDbHelper extends GetHelper { + /** + * GetHelper for returning db info using directSql/JDO. + * @param dbName The Database Name + * @param allowSql Whether or not we allow DirectSQL to perform this query. + * @param allowJdo Whether or not we allow ORM to perform this query. + */ + public GetDbHelper(String catalogName, String dbName,boolean allowSql, boolean allowJdo) + throws MetaException { + super(catalogName, dbName,null,allowSql,allowJdo); + } - buffer.append("partitionName.substring(partitionName.indexOf(\"") - .append(partKey).append("\") + ").append(partKey.length()).append(", ") - .append("partitionName.indexOf(\"/\", partitionName.indexOf(\"").append(partKey) - .append("\") + 1))"); + @Override + protected String describeResult() { + return "db details for db ".concat(dbName); } - LOG.info("Query for Key:" + key.getName() + " is :" + buffer); - return buffer.toString(); } - @Override - public PartitionValuesResponse listPartitionValues(String catName, String dbName, - String tableName, List cols, - boolean applyDistinct, String filter, - boolean ascending, List order, - long maxParts) throws MetaException { + private abstract class GetStatHelper extends GetHelper { + public GetStatHelper(String catalogName, String dbName, String tblName, boolean allowSql, + boolean allowJdo, String writeIdList) throws MetaException { + super(catalogName, dbName, tblName, allowSql, allowJdo); + } - catName = normalizeIdentifier(catName); - dbName = dbName.toLowerCase().trim(); - tableName = tableName.toLowerCase().trim(); - try { - if (filter == null || filter.isEmpty()) { - PartitionValuesResponse response = getDistinctValuesForPartitionsNoTxn(catName, dbName, - tableName, cols, applyDistinct, maxParts); - LOG.info("Number of records fetched: {}", response.getPartitionValues().size()); - return response; - } else { - PartitionValuesResponse response = - extractPartitionNamesByFilter(catName, dbName, tableName, filter, cols, ascending, maxParts); - if (response.getPartitionValues() != null) { - LOG.info("Number of records fetched with filter: {}", response.getPartitionValues().size()); - } - return response; - } - } catch (Exception t) { - LOG.error("Exception in ORM", t); - throw new MetaException("Error retrieving partition values: " + t); + @Override + protected String describeResult() { + return "statistics for " + (results == null ? 0 : results.getStatsObjSize()) + " columns"; } } - private PartitionValuesResponse extractPartitionNamesByFilter( - String catName, String dbName, String tableName, String filter, List cols, - boolean ascending, long maxParts) - throws MetaException, NoSuchObjectException { + private Table ensureGetTable(String catName, String dbName, String tblName) + throws NoSuchObjectException, MetaException { + return convertToTable(ensureGetMTable(catName, dbName, tblName), conf); + } - LOG.info("Table: {} filter: \"{}\" cols: {}", - TableName.getQualified(catName, dbName, tableName), filter, cols); - List partitionNames = null; - List partitions = null; - Table tbl = getTable(catName, dbName, tableName, null); - try { - // Get partitions by name - ascending or descending - partitionNames = getPartitionNamesByFilter(catName, dbName, tableName, filter, ascending, - maxParts); - } catch (MetaException e) { - LOG.warn("Querying by partition names failed, trying out with partition objects, filter: {}", filter); + /** + * Verifies that the stats JSON string is unchanged for alter table (txn stats). + * @return Error message with the details of the change, or null if the value has not changed. + */ + public static String verifyStatsChangeCtx(String fullTableName, Map oldP, Map newP, + long writeId, String validWriteIds, boolean isColStatsChange) { + if (validWriteIds != null && writeId > 0) { + return null; // We have txn context. } - if (partitionNames == null) { - partitions = getPartitionsByFilter(catName, dbName, tableName, - new GetPartitionsArgs.GetPartitionsArgsBuilder().filter(filter).max((short) maxParts).build()); + if (!StatsSetupConst.areBasicStatsUptoDate(newP)) { + // The validWriteIds can be absent, for example, in case of Impala alter. + // If the new value is invalid, then we don't care, let the alter operation go ahead. + return null; } - if (partitions != null) { - partitionNames = new ArrayList<>(partitions.size()); - for (Partition partition : partitions) { - // Check for NULL's just to be safe - if (tbl.getPartitionKeys() != null && partition.getValues() != null) { - partitionNames.add(Warehouse.makePartName(tbl.getPartitionKeys(), partition.getValues())); - } + String oldVal = oldP == null ? null : oldP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); + String newVal = newP == null ? null : newP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); + if (StringUtils.equalsIgnoreCase(oldVal, newVal)) { + if (!isColStatsChange) { + return null; // No change in col stats or parameters => assume no change. } } - if (partitionNames == null) { - throw new MetaException("Cannot obtain list of partitions by filter:\"" + filter + - "\" for " + TableName.getQualified(catName, dbName, tableName)); - } + // Some change to the stats state is being made; it can only be made with a write ID. + return "Cannot change stats state for a transactional table " + fullTableName + " without " + + "providing the transactional write state for verification (new write ID " + + writeId + ", valid write IDs " + validWriteIds + "; current state " + oldVal + "; new" + + " state " + newVal; + } - if (!ascending) { - partitionNames.sort(Collections.reverseOrder()); + private static MFieldSchema getColumnFromTableColumns(List cols, String col) { + if (cols == null) { + return null; } - - // Return proper response - PartitionValuesResponse response = new PartitionValuesResponse(); - response.setPartitionValues(new ArrayList<>(partitionNames.size())); - LOG.info("Converting responses to Partition values for items: {}", partitionNames.size()); - for (String partName : partitionNames) { - ArrayList vals = new ArrayList<>(Collections.nCopies(tbl.getPartitionKeys().size(), null)); - PartitionValuesRow row = new PartitionValuesRow(); - Warehouse.makeValsFromName(partName, vals); - for (String value : vals) { - row.addToRow(value); + for (MFieldSchema mfs : cols) { + if (mfs.getName().equalsIgnoreCase(col)) { + return mfs; } - response.addToPartitionValues(row); } - return response; + return null; } - private List getPartitionNamesByFilter(String catName, String dbName, String tableName, - String filter, boolean ascending, long maxParts) - throws MetaException { - - boolean success = false; - List partNames = new ArrayList<>(); - Query query = null; - try { - openTransaction(); - LOG.debug("Executing getPartitionNamesByFilter"); - catName = normalizeIdentifier(catName); - dbName = dbName.toLowerCase(); - tableName = tableName.toLowerCase(); - - MTable mtable = getMTable(catName, dbName, tableName); - if( mtable == null ) { - // To be consistent with the behavior of listPartitionNames, if the - // table or db does not exist, we return an empty list - return partNames; - } - Map params = new HashMap<>(); - String queryFilterString = makeQueryFilterString(catName, dbName, mtable, filter, params); - query = pm.newQuery( - "select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " - + "where " + queryFilterString); - - if (maxParts >= 0) { - //User specified a row limit, set it on the Query - query.setRange(0, maxParts); - } - - LOG.debug("Filter specified is {}, JDOQL filter is {}", filter, - queryFilterString); - - LOG.debug("Parms is {}", params); - - String parameterDeclaration = makeParameterDeclarationStringObj(params); - query.declareParameters(parameterDeclaration); - if (ascending) { - query.setOrdering("partitionName ascending"); - } else { - query.setOrdering("partitionName descending"); + private static int getColumnIndexFromTableColumns(List cols, String col) { + if (cols == null) { + return -1; + } + for (int i = 0; i < cols.size(); i++) { + MFieldSchema mfs = cols.get(i); + if (mfs.getName().equalsIgnoreCase(col)) { + return i; } - query.setResult("partitionName"); - - Collection names = (Collection) query.executeWithMap(params); - partNames = new ArrayList<>(names); - - LOG.debug("Done executing query for getPartitionNamesByFilter"); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for getPartitionNamesByFilter, size: {}", partNames.size()); - } finally { - rollbackAndCleanup(success, query); } - return partNames; + return -1; } - private PartitionValuesResponse getDistinctValuesForPartitionsNoTxn( - String catName, String dbName, String tableName, List cols, - boolean applyDistinct, long maxParts) - throws MetaException { - try (QueryWrapper q = new QueryWrapper( - pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " - + "where table.database.name == t1 && table.database.catalogName == t2 && " - + "table.tableName == t3 "))) { + private boolean constraintNameAlreadyExists(MTable table, String constraintName) { + boolean commited = false; + Query constraintExistsQuery = null; + String constraintNameIfExists = null; + try { openTransaction(); - q.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - - // TODO: Ordering seems to affect the distinctness, needs checking, disabling. -/* - if (ascending) { - q.setOrdering("partitionName ascending"); - } else { - q.setOrdering("partitionName descending"); - } -*/ - if (maxParts > 0) { - q.setRange(0, maxParts); - } - StringBuilder partValuesSelect = new StringBuilder(256); - if (applyDistinct) { - partValuesSelect.append("DISTINCT "); - } - List partitionKeys = - getTable(catName, dbName, tableName, null).getPartitionKeys(); - for (FieldSchema key : cols) { - partValuesSelect.append(extractPartitionKey(key, partitionKeys)).append(", "); - } - partValuesSelect.setLength(partValuesSelect.length() - 2); - LOG.info("Columns to be selected from Partitions: {}", partValuesSelect); - q.setResult(partValuesSelect.toString()); - - PartitionValuesResponse response = new PartitionValuesResponse(); - response.setPartitionValues(new ArrayList<>()); - if (cols.size() > 1) { - List results = (List) q.execute(dbName, catName, tableName); - for (Object[] row : results) { - PartitionValuesRow rowResponse = new PartitionValuesRow(); - for (Object columnValue : row) { - rowResponse.addToRow((String) columnValue); - } - response.addToPartitionValues(rowResponse); - } - } else { - List results = (List) q.execute(dbName, catName, tableName); - for (Object row : results) { - PartitionValuesRow rowResponse = new PartitionValuesRow(); - rowResponse.addToRow((String) row); - response.addToPartitionValues(rowResponse); - } - } - return response; + constraintName = normalizeIdentifier(constraintName); + constraintExistsQuery = pm.newQuery(MConstraint.class, + "parentTable == parentTableP && constraintName == constraintNameP"); + constraintExistsQuery.declareParameters("MTable parentTableP, java.lang.String constraintNameP"); + constraintExistsQuery.setUnique(true); + constraintExistsQuery.setResult("constraintName"); + constraintNameIfExists = (String) constraintExistsQuery.executeWithArray(table, constraintName); + commited = commitTransaction(); } finally { - commitTransaction(); + rollbackAndCleanup(commited, constraintExistsQuery); } + return constraintNameIfExists != null && !constraintNameIfExists.isEmpty(); } - private List getPartitionNamesNoTxn(String catName, String dbName, String tableName, short max) { - List pns = new ArrayList<>(); - if (max == 0) { - return pns; - } - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - try (QueryWrapper query = new QueryWrapper( - pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " - + "where table.database.name == t1 && table.tableName == t2 && table.database.catalogName == t3 " - + "order by partitionName asc"))) { - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - query.setResult("partitionName"); - - if (max > 0) { - query.setRange(0, max); + private String generateConstraintName(MTable table, String... parameters) throws MetaException { + int hashcode = ArrayUtils.toString(parameters).hashCode() & 0xfffffff; + int counter = 0; + final int MAX_RETRIES = 10; + while (counter < MAX_RETRIES) { + String currName = (parameters.length == 0 ? "constraint_" : parameters[parameters.length-1]) + + "_" + hashcode + "_" + System.currentTimeMillis() + "_" + (counter++); + if (!constraintNameAlreadyExists(table, currName)) { + return currName; } - Collection names = (Collection) query.execute(dbName, tableName, catName); - pns.addAll(names); - - return pns; } + throw new MetaException("Error while trying to generate the constraint name for " + ArrayUtils.toString(parameters)); } @Override - public int getNumPartitionsByPs(String catName, String dbName, String tblName, List partVals) - throws MetaException, NoSuchObjectException { - - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - - return new GetHelper(catName, dbName, tblName, true, true) { - - @Override - protected String describeResult() { - return "Partition count by partial values"; - } - - @Override - protected Integer getSqlResult(GetHelper ctx) throws MetaException { - return directSql.getNumPartitionsViaSqlPs(ctx.getTable(), partVals); - } - - @Override - protected Integer getJdoResult(GetHelper ctx) - throws MetaException, NoSuchObjectException, InvalidObjectException { - // size is known since it contains dbName, catName, tblName and partialRegex pattern - Map params = new HashMap<>(4); - String filter = getJDOFilterStrForPartitionVals(ctx.getTable(), partVals, params); - try (QueryWrapper query = new QueryWrapper(pm.newQuery( - "select count(partitionName) from org.apache.hadoop.hive.metastore.model.MPartition"))) { - query.setFilter(filter); - query.declareParameters(makeParameterDeclarationString(params)); - Long result = (Long) query.executeWithMap(params); - - return result.intValue(); - } - } - }.run(true); + public List addForeignKeys( + List fks) throws InvalidObjectException, MetaException { + return addForeignKeys(fks, true, null, null); } - /** - * Retrieves a Collection of partition-related results from the database that match - * the partial specification given for a specific table. - * @param dbName the name of the database - * @param tableName the name of the table - * @param part_vals the partial specification values - * @param max_parts the maximum number of partitions to return - * @param resultsCol the metadata column of the data to return, e.g. partitionName, etc. - * if resultsCol is empty or null, a collection of MPartition objects is returned - * @return A Collection of partition-related items from the db that match the partial spec - * for a table. The type of each item in the collection corresponds to the column - * you want results for. E.g., if resultsCol is partitionName, the Collection - * has types of String, and if resultsCol is null, the types are MPartition. - */ - private Collection getPartitionPsQueryResults(String catName, String dbName, - String tableName, List part_vals, - int max_parts, String resultsCol) - throws MetaException, NoSuchObjectException { - - Preconditions.checkState(this.currentTransaction.isActive()); - - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - Table table = getTable(catName, dbName, tableName, null); - if (table == null) { - throw new NoSuchObjectException(TableName.getQualified(catName, dbName, tableName) + " table not found"); - } - // size is known since it contains dbName, catName, tblName and partialRegex - // pattern - Map params = new HashMap<>(4); - String filter = getJDOFilterStrForPartitionVals(table, part_vals, params); - try (QueryWrapper query = new QueryWrapper(pm.newQuery(MPartition.class))) { - query.setFilter(filter); - query.setOrdering("partitionName ascending"); - query.declareParameters(makeParameterDeclarationString(params)); - if (max_parts >= 0) { - // User specified a row limit, set it on the Query - query.setRange(0, max_parts); - } - if (resultsCol != null && !resultsCol.isEmpty()) { - query.setResult(resultsCol); - } - - Collection result = (Collection) query.executeWithMap(params); - - return Collections.unmodifiableCollection(new ArrayList<>(result)); + @Override + public String getMetastoreDbUuid() throws MetaException { + String ret = getGuidFromDB(); + if(ret != null) { + return ret; } + return createDbGuidAndPersist(); } - @Override - public List listPartitionsPsWithAuth(String catName, String db_name, String tbl_name, - GetPartitionsArgs args) throws MetaException, InvalidObjectException, NoSuchObjectException { - List partitions = new ArrayList<>(); + private String createDbGuidAndPersist() throws MetaException { boolean success = false; - + Query query = null; try { openTransaction(); - LOG.debug("executing listPartitionNamesPsWithAuth"); - MTable mtbl = getMTable(catName, db_name, tbl_name); - if (mtbl == null) { - throw new NoSuchObjectException( - TableName.getQualified(catName, db_name, tbl_name) + " table not found"); - } - String userName = args.getUserName(); - List groupNames = args.getGroupNames(); - List part_vals = args.getPart_vals(); - List partNames = args.getPartNames(); - boolean getauth = null != userName && null != groupNames && - "TRUE".equalsIgnoreCase( - mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")); - if (MetaStoreUtils.arePartValsEmpty(part_vals) && partNames == null) { - partitions = getPartitions(catName, db_name, tbl_name, args); - } else if (partNames != null) { - partitions = getPartitionsByNames(catName, db_name, tbl_name, args); - } else { - partitions = getPartitionsByPs(catName, db_name, tbl_name, args); - } - if (getauth) { - for (Partition part : partitions) { - String partName = Warehouse.makePartName(this.convertToFieldSchemas(mtbl - .getPartitionKeys()), part.getValues()); - PrincipalPrivilegeSet partAuth = getPartitionPrivilegeSet(catName, db_name, - tbl_name, partName, userName, groupNames); - part.setPrivileges(partAuth); - } - } + MMetastoreDBProperties prop = new MMetastoreDBProperties(); + prop.setPropertykey("guid"); + final String guid = UUID.randomUUID().toString(); + LOG.debug("Attempting to add a guid {} for the metastore db", guid); + prop.setPropertyValue(guid); + prop.setDescription("Metastore DB GUID generated on " + + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"))); + pm.makePersistent(prop); success = commitTransaction(); - } catch (InvalidObjectException | NoSuchObjectException | MetaException e) { - throw e; + if (success) { + LOG.info("Metastore db guid {} created successfully", guid); + return guid; + } } catch (Exception e) { - throw new MetaException(e.getMessage()); + LOG.warn("Metastore db guid creation failed", e); } finally { - rollbackAndCleanup(success, null); + rollbackAndCleanup(success, query); } - return partitions; - } - - private List getPartitionsByPs(String catName, String dbName, - String tblName, GetPartitionsArgs args) - throws MetaException, NoSuchObjectException { - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - - return new GetListHelper(catName, dbName, tblName, true, true) { - - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql.getPartitionsViaSqlPs(ctx.getTable(), args); - } - - @Override - protected List getJdoResult(GetHelper> ctx) - throws MetaException, NoSuchObjectException { - List result = new ArrayList<>(); - Collection parts = getPartitionPsQueryResults(catName, dbName, tblName, - args.getPart_vals(), args.getMax(), null); - boolean isAcidTable = TxnUtils.isAcidTable(ctx.getTable()); - for (MPartition o : parts) { - Partition part = convertToPart(catName, dbName, tblName, o, isAcidTable, args); - result.add(part); - } - return result; - } - }.run(true); + // it possible that some other HMS instance could have created the guid + // at the same time due which this instance could not create a guid above + // in such case return the guid already generated + final String guid = getGuidFromDB(); + if (guid == null) { + throw new MetaException("Unable to create or fetch the metastore database uuid"); + } + return guid; } - @Override - public List listPartitionNamesPs(String catName, String dbName, String tableName, - List part_vals, short max_parts) throws MetaException, NoSuchObjectException { - List partitionNames = new ArrayList<>(); + private String getGuidFromDB() throws MetaException { boolean success = false; - + Query query = null; try { openTransaction(); - LOG.debug("Executing listPartitionNamesPs"); - Collection names = getPartitionPsQueryResults(catName, dbName, tableName, - part_vals, max_parts, "partitionName"); - partitionNames.addAll(names); + query = pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY); + query.declareParameters(PTYPARAM_STR_KEY); + Collection names = (Collection) query.execute("guid"); + List uuids = new ArrayList<>(); + for (Iterator i = names.iterator(); i.hasNext();) { + String uuid = i.next().getPropertyValue(); + LOG.debug("Found guid {}", uuid); + uuids.add(uuid); + } success = commitTransaction(); - } catch (NoSuchObjectException | MetaException e) { - throw e; - } catch (Exception e) { - throw new MetaException(e.getMessage()); - } finally { - rollbackAndCleanup(success, null); - } - return partitionNames; - } - - private List listMPartitions(String catName, String dbName, String tableName, int max) throws Exception { - LOG.debug("Executing listMPartitions"); - - Preconditions.checkState(this.currentTransaction.isActive()); - - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - - try (Query query = pm.newQuery(MPartition.class, - "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3")) { - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - query.setOrdering("partitionName ascending"); - if (max >= 0) { - query.setRange(0, max); + if(uuids.size() > 1) { + throw new MetaException("Multiple uuids found"); } - final List mparts = (List) query.execute(tableName, dbName, catName); - LOG.debug("Done executing query for listMPartitions"); - - pm.retrieveAll(mparts); - pm.makeTransientAll(mparts); - - LOG.debug("Done retrieving all objects for listMPartitions {}", mparts); - - return Collections.unmodifiableList(new ArrayList<>(mparts)); + if(!uuids.isEmpty()) { + LOG.debug("Returning guid of metastore db : {}", uuids.get(0)); + return uuids.get(0); + } + } finally { + rollbackAndCleanup(success, query); } + LOG.warn("Guid for metastore db not found"); + return null; } - // This code is only executed in JDO code path, not from direct SQL code path. - private List listMPartitionsWithProjection(List fieldNames, String jdoFilter, - Map params) throws Exception { + public boolean runInTransaction(Runnable exec) { boolean success = false; - List mparts = null; try { - openTransaction(); - LOG.debug("Executing listMPartitionsWithProjection"); - try (Query query = pm.newQuery(MPartition.class, jdoFilter)) { - String parameterDeclaration = makeParameterDeclarationStringObj(params); - query.declareParameters(parameterDeclaration); - query.setOrdering("partitionName ascending"); - if (fieldNames == null || fieldNames.isEmpty()) { - // full fetch of partitions - mparts = (List) query.executeWithMap(params); - pm.retrieveAll(mparts); - pm.makeTransientAll(mparts); - mparts = new ArrayList<>(mparts); - } else { - // fetch partially filled partitions using result clause - query.setResult(Joiner.on(',').join(fieldNames)); - // if more than one fields are in the result class the return type is - // List - if (fieldNames.size() > 1) { - List results = (List) query.executeWithMap(params); - mparts = new ArrayList<>(results.size()); - for (Object[] row : results) { - MPartition mpart = new MPartition(); - int i = 0; - for (Object val : row) { - MetaStoreServerUtils.setNestedProperty(mpart, fieldNames.get(i), val, true); - i++; - } - mparts.add(mpart); - } - } else { - // only one field is requested, return type is List - List results = (List) query.executeWithMap(params); - mparts = new ArrayList<>(results.size()); - for (Object row : results) { - MPartition mpart = new MPartition(); - MetaStoreServerUtils.setNestedProperty(mpart, fieldNames.get(0), row, true); - mparts.add(mpart); - } - } - } + if (openTransaction()) { + exec.run(); + success = commitTransaction(); } - success = commitTransaction(); - LOG.debug("Done retrieving {} objects for listMPartitionsWithProjection", mparts.size()); + } catch (Exception e) { + LOG.warn("Metastore operation failed", e); } finally { rollbackAndCleanup(success, null); } - return mparts; - } - - @Override - public List getPartitionsByNames(String catName, String dbName, String tblName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - return getPartitionsByNamesInternal(catName, dbName, tblName, true, true, args); + return success; } - protected List getPartitionsByNamesInternal(String catName, String dbName, - String tblName, boolean allowSql, boolean allowJdo, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - return new GetListHelper(catName, dbName, tblName, allowSql, allowJdo) { - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql.getPartitionsViaPartNames(catName, dbName, tblName, args); - } - @Override - protected List getJdoResult( - GetHelper> ctx) throws MetaException, NoSuchObjectException { - return getPartitionsViaOrmFilter(catName, dbName, tblName, false, args); + public boolean dropProperties(String key) { + boolean success = false; + Query query = null; + try { + if (openTransaction()) { + query = pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY); + query.declareParameters(PTYPARAM_STR_KEY); + @SuppressWarnings("unchecked") + Collection properties = (Collection) query.execute(key); + if (!properties.isEmpty()) { + pm.deletePersistentAll(properties); + } + success = commitTransaction(); } - }.run(false); - } - - @Override - public boolean getPartitionsByExpr(String catName, String dbName, String tblName, - List result, GetPartitionsArgs args) throws TException { - return getPartitionsByExprInternal(catName, dbName, tblName, result, true, true, args); - } - - private boolean prunePartitionNamesByExpr(String catName, String dbName, String tblName, - List result, GetPartitionsArgs args) throws MetaException { - MTable mTable = getMTable(catName, dbName, tblName); - List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); - boolean hasUnknownPartitions = expressionProxy.filterPartitionsByExpr( - partitionKeys, - args.getExpr(), - getDefaultPartitionName(args.getDefaultPartName()), - result); - if (args.getMax() >= 0 && result.size() > args.getMax()) { - result = result.subList(0, args.getMax()); + } catch (Exception e) { + LOG.warn("Metastore property drop failed", e); + } finally { + rollbackAndCleanup(success, query); } - return hasUnknownPartitions; + return success; } - protected boolean getPartitionsByExprInternal(String catName, String dbName, String tblName, - List result, boolean allowSql, boolean allowJdo, GetPartitionsArgs args) throws TException { - assert result != null; - - byte[] expr = args.getExpr(); - final ExpressionTree exprTree = expr.length != 0 ? PartFilterExprUtil.makeExpressionTree( - expressionProxy, expr, getDefaultPartitionName(args.getDefaultPartName()), conf) : ExpressionTree.EMPTY_TREE; - final AtomicBoolean hasUnknownPartitions = new AtomicBoolean(false); - - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - MTable mTable = ensureGetMTable(catName, dbName, tblName); - List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); - boolean isAcidTable = TxnUtils.isAcidTable(mTable.getParameters()); - result.addAll(new GetListHelper(catName, dbName, tblName, allowSql, allowJdo) { - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - // If we have some sort of expression tree, try SQL filter pushdown. - if (exprTree != null) { - SqlFilterForPushdown filter = new SqlFilterForPushdown(); - if (directSql.generateSqlFilterForPushdown(catName, dbName, tblName, partitionKeys, - exprTree, args.getDefaultPartName(), filter)) { - String catalogName = (catName != null) ? catName : getDefaultCatalog(conf); - return directSql.getPartitionsViaSqlFilter(catalogName, dbName, tblName, filter, - isAcidTable, args); - } + public MMetastoreDBProperties putProperties(String key, String value, String description, byte[] content) { + boolean success = false; + try { + if (openTransaction()) { + //pm.currentTransaction().setOptimistic(false); + // fetch first to determine new vs update + MMetastoreDBProperties properties = doFetchProperties(key, null); + final boolean newInstance; + if (properties == null) { + newInstance = true; + properties = new MMetastoreDBProperties(); + properties.setPropertykey(key); + } else { + newInstance = false; } - // We couldn't do SQL filter pushdown. Get names via normal means. - List partNames = new LinkedList<>(); - hasUnknownPartitions.set(getPartitionNamesPrunedByExprNoTxn( - catName, dbName, tblName, partitionKeys, expr, args.getDefaultPartName(), (short) args.getMax(), partNames)); - GetPartitionsArgs newArgs = new GetPartitionsArgs.GetPartitionsArgsBuilder(args).partNames(partNames).build(); - return directSql.getPartitionsViaPartNames(catName, dbName, tblName, newArgs); - } - - @Override - protected List getJdoResult( - GetHelper> ctx) throws MetaException, NoSuchObjectException { - // If we have some sort of expression tree, try JDOQL filter pushdown. - List result = null; - if (exprTree != null) { - result = getPartitionsViaOrmFilter(catName, dbName, tblName, exprTree, - false, partitionKeys, isAcidTable, args); + properties.setDescription(description); + properties.setPropertyValue(value); + properties.setPropertyContent(content); + LOG.debug("Attempting to add property {} for the metastore db", key); + properties.setDescription("Metastore property " + + (newInstance ? "created" : "updated") + + " " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"))); + if (newInstance) { + pm.makePersistent(properties); } - if (result == null) { - // We couldn't do JDOQL filter pushdown. Get names via normal means. - List partNames = new ArrayList<>(); - hasUnknownPartitions.set(getPartitionNamesPrunedByExprNoTxn( - catName, dbName, tblName, partitionKeys, expr, args.getDefaultPartName(), (short) args.getMax(), partNames)); - GetPartitionsArgs newArgs = new GetPartitionsArgs.GetPartitionsArgsBuilder(args).partNames(partNames).build(); - result = getPartitionsViaOrmFilter(catName, dbName, tblName, isAcidTable, newArgs); + success = commitTransaction(); + if (success) { + LOG.info("Metastore property {} created successfully", key); + return properties; } - return result; } - }.run(false)); - return hasUnknownPartitions.get(); - } - - /** - * Gets the default partition name. - * @param inputDefaultPartName Incoming default partition name. - * @return Valid default partition name - */ - private String getDefaultPartitionName(String inputDefaultPartName) { - return (((inputDefaultPartName == null) || (inputDefaultPartName.isEmpty())) - ? MetastoreConf.getVar(getConf(), ConfVars.DEFAULTPARTITIONNAME) - : inputDefaultPartName); - } - - /** - * Gets the partition names from a table, pruned using an expression. - * @param catName - * @param dbName - * @param tblName - * @param expr Expression. - * @param defaultPartName Default partition name from job config, if any. - * @param maxParts Maximum number of partition names to return. - * @param result The resulting names. - * @return Whether the result contains any unknown partitions. - */ - private boolean getPartitionNamesPrunedByExprNoTxn(String catName, String dbName, String tblName, List partColumns, byte[] expr, - String defaultPartName, short maxParts, List result) throws MetaException { - result.addAll(getPartitionNamesNoTxn(catName, dbName, tblName, (short) -1)); - return prunePartitionNamesByExpr(catName, dbName, tblName, result, - new GetPartitionsArgs.GetPartitionsArgsBuilder() - .expr(expr).defaultPartName(defaultPartName).max(maxParts).build()); - } - - /** - * Gets partition names from the table via ORM (JDOQL) filter pushdown. - * @param tblName The table. - * @param tree The expression tree from which JDOQL filter will be made. - * @param isValidatedFilter Whether the filter was pre-validated for JDOQL pushdown by a client - * (old hive client or non-hive one); if it was and we fail to create a filter, we will throw. - * @param args additional arguments for getting partitions - * @return Resulting partitions. Can be null if isValidatedFilter is false, and - * there was error deriving the JDO filter. - */ - private List getPartitionsViaOrmFilter(String catName, String dbName, String tblName, ExpressionTree tree, - boolean isValidatedFilter, List partitionKeys, boolean isAcidTable, - GetPartitionsArgs args) throws MetaException { - Map params = new HashMap<>(); - String jdoFilter = - makeQueryFilterString(catName, dbName, tblName, tree, params, isValidatedFilter, partitionKeys); - if (jdoFilter == null) { - assert !isValidatedFilter; - return null; - } - try (QueryWrapper query = new QueryWrapper(pm.newQuery(MPartition.class, jdoFilter))) { - if (args.getMax() >= 0) { - // User specified a row limit, set it on the Query - query.setRange(0, args.getMax()); - } - String parameterDeclaration = makeParameterDeclarationStringObj(params); - query.declareParameters(parameterDeclaration); - query.setOrdering("partitionName ascending"); - List mparts = (List) query.executeWithMap(params); - LOG.debug("Done executing query for getPartitionsViaOrmFilter"); - pm.retrieveAll(mparts); // TODO: why is this inconsistent with what we get by names? - LOG.debug("Done retrieving all objects for getPartitionsViaOrmFilter"); - List results = - convertToParts(catName, dbName, tblName, mparts, isAcidTable, args); - return results; + } finally { + rollbackAndCleanup(success, null); } + return null; } - private Integer getNumPartitionsViaOrmFilter(String catName, String dbName, String tblName, ExpressionTree tree, boolean isValidatedFilter, List partitionKeys) - throws MetaException { - Map params = new HashMap<>(); - String jdoFilter = makeQueryFilterString(catName, dbName, tblName, tree, - params, isValidatedFilter, partitionKeys); - if (jdoFilter == null) { - assert !isValidatedFilter; - return null; - } - - try (QueryWrapper query = new QueryWrapper(pm.newQuery( - "select count(partitionName) from org.apache.hadoop.hive.metastore.model.MPartition"))) { - query.setFilter(jdoFilter); - String parameterDeclaration = makeParameterDeclarationStringObj(params); - query.declareParameters(parameterDeclaration); - Long result = (Long) query.executeWithMap(params); - return result.intValue(); + public boolean renameProperties(String mapKey, String newKey) { + boolean success = false; + Query query = null; + try { + LOG.debug("Attempting to rename property {} to {} for the metastore db", mapKey, newKey); + if (openTransaction()) { + // ensure the target is clear; + // query is cleaned up in finally block + query = pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY); + query.declareParameters(PTYPARAM_STR_KEY); + query.setUnique(true); + MMetastoreDBProperties properties = (MMetastoreDBProperties) query.execute(newKey); + if (properties != null) { + return false; + } + // ensure we got a source + properties = (MMetastoreDBProperties) query.execute(mapKey); + if (properties == null) { + return false; + } + byte[] content = properties.getPropertyContent(); + String value = properties.getPropertyValue(); + // remove source from persistent storage + pm.deletePersistent(properties); + // make it persist with new key + MMetastoreDBProperties newProperties = new MMetastoreDBProperties(); + // update description + newProperties.setDescription("Metastore property renamed from " + mapKey + " to " + newKey + + " " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"))); + // change key + newProperties.setPropertykey(newKey); + newProperties.setPropertyValue(value); + newProperties.setPropertyContent(content); + pm.makePersistent(newProperties); + // commit + success = commitTransaction(); + if (success) { + LOG.info("Metastore property {} renamed {} successfully", mapKey, newKey); + return true; + } + } + } finally { + rollbackAndCleanup(success, query); } + return false; } - /** - * Gets partition names from the table via ORM (JDOQL) name filter. - * @param dbName Database name. - * @param tblName Table name. - * @param isAcidTable True if the table is ACID - * @param args additional arguments for getting partitions - * @return Resulting partitions. - */ - private List getPartitionsViaOrmFilter(String catName, String dbName, String tblName, - boolean isAcidTable, GetPartitionsArgs args) throws MetaException { - List partNames = args.getPartNames(); - if (partNames.isEmpty()) { - return Collections.emptyList(); - } - return Batchable.runBatched(batchSize, partNames, new Batchable() { - @Override - public List run(List input) throws MetaException { - Pair> queryWithParams = - getPartQueryWithParams(catName, dbName, tblName, input); - try (QueryWrapper query = new QueryWrapper(queryWithParams.getLeft())) { - query.setResultClass(MPartition.class); - query.setClass(MPartition.class); - query.setOrdering("partitionName ascending"); - - List mparts = (List) query.executeWithMap(queryWithParams.getRight()); - List partitions = convertToParts(catName, dbName, tblName, mparts, - isAcidTable, args); - - return partitions; - } + private T doFetchProperties(String key, java.util.function.Function transform) { + try(QueryWrapper query = new QueryWrapper(pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY))) { + query.declareParameters(PTYPARAM_STR_KEY); + query.setUnique(true); + MMetastoreDBProperties properties = (MMetastoreDBProperties) query.execute(key); + if (properties != null) { + return (T) (transform != null? transform.apply(properties) : properties); } - }); + } + return null; } - private void dropPartitionsNoTxn(String catName, String dbName, String tblName, List partNames) { - Pair> queryWithParams = - getPartQueryWithParams(catName, dbName, tblName, partNames); - try (QueryWrapper query = new QueryWrapper(queryWithParams.getLeft())) { - query.setClass(MPartition.class); - long deleted = query.deletePersistentAll(queryWithParams.getRight()); - LOG.debug("Deleted {} partition from store", deleted); + public T fetchProperties(String key, java.util.function.Function transform) { + boolean success = false; + T properties = null; + try { + if (openTransaction()) { + properties = doFetchProperties(key, transform); + success = commitTransaction(); + } + } finally { + rollbackAndCleanup(success, null); } + return properties; } - /** - * Detaches column descriptors from storage descriptors; returns the set of unique CDs - * thus detached. This is done before dropping partitions because CDs are reused between - * SDs; so, we remove the links to delete SDs and then check the returned CDs to see if - * they are referenced by other SDs. - */ - private Set detachCdsFromSdsNoTxn( - String catName, String dbName, String tblName, List partNames) { - Pair> queryWithParams = - getPartQueryWithParams(catName, dbName, tblName, partNames); - try (QueryWrapper query = new QueryWrapper(queryWithParams.getLeft())) { - query.setClass(MPartition.class); - query.setResult("sd"); - List sds = (List) query.executeWithMap( - queryWithParams.getRight()); - HashSet candidateCds = new HashSet<>(); - for (MStorageDescriptor sd : sds) { - if (sd != null && sd.getCD() != null) { - candidateCds.add(sd.getCD()); - sd.setCD(null); + public Map selectProperties(String key, java.util.function.Function transform) { + boolean success = false; + Query query = null; + Map results = null; + try { + if (openTransaction()) { + Collection properties; + if (key == null || key.isEmpty()) { + query = pm.newQuery(MMetastoreDBProperties.class); + properties = (Collection) query.execute(); + } else { + query = pm.newQuery(MMetastoreDBProperties.class, "this.propertyKey.startsWith(key)"); + query.declareParameters(PTYPARAM_STR_KEY); + properties = (Collection) query.execute(key); + } + pm.retrieveAll(properties); + if (!properties.isEmpty()) { + results = new TreeMap(); + for(MMetastoreDBProperties ptys : properties) { + T t = (T) (transform != null? transform.apply(ptys) : ptys); + if (t != null) { + results.put(ptys.getPropertykey(), t); + } + } } + success = commitTransaction(); } - return candidateCds; + } finally { + rollbackAndCleanup(success, query); } + return results; } - private String getJDOFilterStrForPartitionNames(String catName, String dbName, String tblName, - List partNames, Map params) { - StringBuilder sb = new StringBuilder("table.tableName == t1 && table.database.name == t2 &&" + - " table.database.catalogName == t3 && ("); - params.put("t1", normalizeIdentifier(tblName)); - params.put("t2", normalizeIdentifier(dbName)); - params.put("t3", normalizeIdentifier(catName)); - int n = 0; - for (Iterator itr = partNames.iterator(); itr.hasNext();) { - String pn = "p" + n; - n++; - String part = itr.next(); - params.put(pn, part); - sb.append("partitionName == ").append(pn); - sb.append(" || "); - } - sb.setLength(sb.length() - 4); // remove the last " || " - sb.append(')'); - return sb.toString(); - } + //TODO: clean up this method + private List addForeignKeys(List foreignKeys, boolean retrieveCD, + List primaryKeys, List uniqueConstraints) + throws InvalidObjectException, MetaException { + if (CollectionUtils.isNotEmpty(foreignKeys)) { + List mpkfks = new ArrayList<>(); + String currentConstraintName = null; + String catName = null; + // We start iterating through the foreign keys. This list might contain more than a single + // foreign key, and each foreign key might contain multiple columns. The outer loop retrieves + // the information that is common for a single key (table information) while the inner loop + // checks / adds information about each column. + for (int i = 0; i < foreignKeys.size(); i++) { + if (catName == null) { + catName = normalizeIdentifier(foreignKeys.get(i).isSetCatName() ? foreignKeys.get(i).getCatName() : + getDefaultCatalog(conf)); + } else { + String tmpCatName = normalizeIdentifier(foreignKeys.get(i).isSetCatName() ? + foreignKeys.get(i).getCatName() : getDefaultCatalog(conf)); + if (!catName.equals(tmpCatName)) { + throw new InvalidObjectException("Foreign keys cannot span catalogs"); + } + } + final String fkTableDB = normalizeIdentifier(foreignKeys.get(i).getFktable_db()); + final String fkTableName = normalizeIdentifier(foreignKeys.get(i).getFktable_name()); + // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. + // For instance, this is the case when we are creating the table. + final AttachedMTableInfo nChildTable = getMTable(catName, fkTableDB, fkTableName, retrieveCD); + final MTable childTable = nChildTable.mtbl; + if (childTable == null) { + throw new InvalidObjectException("Child table not found: " + fkTableName); + } + MColumnDescriptor childCD = retrieveCD ? nChildTable.mcd : childTable.getSd().getCD(); + final List childCols = childCD == null || childCD.getCols() == null ? + new ArrayList<>() : new ArrayList<>(childCD.getCols()); + if (childTable.getPartitionKeys() != null) { + childCols.addAll(childTable.getPartitionKeys()); + } - private String getJDOFilterStrForPartitionVals(Table table, List vals, - Map params) throws MetaException { - String partNameMatcher = MetaStoreUtils.makePartNameMatcher(table, vals, ".*"); - params.put("dbName", table.getDbName()); - params.put("catName", table.getCatName()); - params.put("tableName", table.getTableName()); - params.put("partialRegex", partNameMatcher); - return "table.database.name == dbName" + " && table.database.catalogName == catName" - + " && table.tableName == tableName" + " && partitionName.matches(partialRegex)"; - } + final String pkTableDB = normalizeIdentifier(foreignKeys.get(i).getPktable_db()); + final String pkTableName = normalizeIdentifier(foreignKeys.get(i).getPktable_name()); + // For primary keys, we retrieve the column descriptors if retrieveCD is true (which means + // it is an alter table statement) or if it is a create table statement but we are + // referencing another table instead of self for the primary key. + final AttachedMTableInfo nParentTable; + final MTable parentTable; + MColumnDescriptor parentCD; + final List parentCols; + final List existingTablePrimaryKeys; + final List existingTableUniqueConstraints; + final boolean sameTable = fkTableDB.equals(pkTableDB) && fkTableName.equals(pkTableName); + if (sameTable) { + nParentTable = nChildTable; + parentTable = childTable; + parentCD = childCD; + parentCols = childCols; + existingTablePrimaryKeys = primaryKeys; + existingTableUniqueConstraints = uniqueConstraints; + } else { + nParentTable = getMTable(catName, pkTableDB, pkTableName, true); + parentTable = nParentTable.mtbl; + if (parentTable == null) { + throw new InvalidObjectException("Parent table not found: " + pkTableName); + } + parentCD = nParentTable.mcd; + parentCols = parentCD == null || parentCD.getCols() == null ? + new ArrayList<>() : new ArrayList<>(parentCD.getCols()); + if (parentTable.getPartitionKeys() != null) { + parentCols.addAll(parentTable.getPartitionKeys()); + } + PrimaryKeysRequest primaryKeysRequest = new PrimaryKeysRequest(pkTableDB, pkTableName); + primaryKeysRequest.setCatName(catName); + existingTablePrimaryKeys = getPrimaryKeys(primaryKeysRequest); + existingTableUniqueConstraints = + getUniqueConstraints(new UniqueConstraintsRequest(catName, pkTableDB, pkTableName)); + } - private Pair> getPartQueryWithParams( - String catName, String dbName, String tblName, List partNames) { - Query query = pm.newQuery(); - Map params = new HashMap<>(); - String filterStr = getJDOFilterStrForPartitionNames(catName, dbName, tblName, partNames, params); - query.setFilter(filterStr); - LOG.debug(" JDOQL filter is {}", filterStr); - query.declareParameters(makeParameterDeclarationString(params)); - return Pair.of(query, params); - } - - @Override - public List getPartitionsByFilter(String catName, String dbName, String tblName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - return getPartitionsByFilterInternal(catName, dbName, tblName, true, true, args); - } - - /** Helper class for getting stuff w/transaction, direct SQL, perf logging, etc. */ - @VisibleForTesting - public abstract class GetHelper { - private final boolean isInTxn, doTrace, allowJdo; - private boolean doUseDirectSql; - private long start; - private Table table; - protected final List partitionFields; - protected final String catName, dbName, tblName; - private boolean success = false; - protected T results = null; - - public GetHelper(String catalogName, String dbName, String tblName, - boolean allowSql, boolean allowJdo) throws MetaException { - this(catalogName, dbName, tblName, null, allowSql, allowJdo); - } - - public GetHelper(String catalogName, String dbName, String tblName, - List fields, boolean allowSql, boolean allowJdo) throws MetaException { - assert allowSql || allowJdo; - this.allowJdo = allowJdo; - this.catName = (catalogName != null) ? normalizeIdentifier(catalogName) : null; - this.dbName = (dbName != null) ? normalizeIdentifier(dbName) : null; - this.partitionFields = fields; - if (tblName != null) { - this.tblName = normalizeIdentifier(tblName); - } else { - // tblName can be null in cases of Helper being used at a higher - // abstraction level, such as with datbases - this.tblName = null; - this.table = null; - } - this.doTrace = LOG.isDebugEnabled(); - this.isInTxn = isActiveTransaction(); - - boolean isConfigEnabled = MetastoreConf.getBoolVar(getConf(), ConfVars.TRY_DIRECT_SQL); - if (isConfigEnabled && directSql == null) { - directSql = new MetaStoreDirectSql(pm, getConf(), ""); - } - - if (!allowJdo && isConfigEnabled && !directSql.isCompatibleDatastore()) { - throw new MetaException("SQL is not operational"); // test path; SQL is enabled and broken. - } - this.doUseDirectSql = allowSql && isConfigEnabled && directSql.isCompatibleDatastore(); - } - - protected boolean canUseDirectSql(GetHelper ctx) throws MetaException { - return true; // By default, assume we can user directSQL - that's kind of the point. - } - protected abstract String describeResult(); - protected abstract T getSqlResult(GetHelper ctx) throws MetaException; - protected abstract T getJdoResult( - GetHelper ctx) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException; - - public T run(boolean initTable) throws MetaException, NoSuchObjectException { - try { - start(initTable); - String savePoint = isInTxn && allowJdo ? "rollback_" + System.nanoTime() : null; - if (doUseDirectSql) { - try { - directSql.prepareTxn(); - setTransactionSavePoint(savePoint); - this.results = getSqlResult(this); - LOG.debug("Using direct SQL optimization."); - } catch (Exception ex) { - handleDirectSqlError(ex, savePoint); - } - } - // Note that this will be invoked in 2 cases: - // 1) DirectSQL was disabled to start with; - // 2) DirectSQL threw and was disabled in handleDirectSqlError. - if (!doUseDirectSql) { - this.results = getJdoResult(this); - LOG.debug("Not using direct SQL optimization."); - } - return commit(); - } catch (NoSuchObjectException | MetaException ex) { - throw ex; - } catch (Exception ex) { - LOG.error("", ex); - throw new MetaException(ex.getMessage()); - } finally { - close(); - } - } - - private void start(boolean initTable) throws MetaException, NoSuchObjectException { - start = doTrace ? System.nanoTime() : 0; - openTransaction(); - if (initTable && (tblName != null)) { - table = ensureGetTable(catName, dbName, tblName); - } - doUseDirectSql = doUseDirectSql && canUseDirectSql(this); - } - - private void handleDirectSqlError(Exception ex, String savePoint) throws MetaException, NoSuchObjectException { - String message = null; - try { - message = generateShorterMessage(ex); - } catch (Throwable t) { - message = ex.toString() + "; error building a better message: " + t.getMessage(); - } - LOG.warn(message); // Don't log the exception, people just get confused. - LOG.debug("Full DirectSQL callstack for debugging (not an error)", ex); - - if (!allowJdo || !DatabaseProduct.isRecoverableException(ex)) { - throw ExceptionHandler.newMetaException(ex); - } - - if (!isInTxn) { - JDOException rollbackEx = null; - try { - rollbackTransaction(); - } catch (JDOException jex) { - rollbackEx = jex; - } - if (rollbackEx != null) { - // Datanucleus propagates some pointless exceptions and rolls back in the finally. - if (currentTransaction != null && currentTransaction.isActive()) { - throw rollbackEx; // Throw if the tx wasn't rolled back. - } - LOG.info("Ignoring exception, rollback succeeded: " + rollbackEx.getMessage()); - } - - start = doTrace ? System.nanoTime() : 0; - openTransaction(); - if (table != null) { - table = ensureGetTable(catName, dbName, tblName); - } - } else { - rollbackTransactionToSavePoint(savePoint); - start = doTrace ? System.nanoTime() : 0; - } - - if (directSqlErrors != null) { - directSqlErrors.inc(); - } - - doUseDirectSql = false; - } - - private String generateShorterMessage(Exception ex) { - StringBuilder message = new StringBuilder( - "Falling back to ORM path due to direct SQL failure (this is not an error): "); - Throwable t = ex; - StackTraceElement[] prevStack = null; - while (t != null) { - message.append(t.getMessage()); - StackTraceElement[] stack = t.getStackTrace(); - int uniqueFrames = stack.length - 1; - if (prevStack != null) { - int n = prevStack.length - 1; - while (uniqueFrames >= 0 && n >= 0 && stack[uniqueFrames].equals(prevStack[n])) { - uniqueFrames--; n--; - } - } - for (int i = 0; i <= uniqueFrames; ++i) { - StackTraceElement ste = stack[i]; - message.append(" at ").append(ste); - if (ste.getMethodName().contains("getSqlResult") - && (ste.getFileName() == null || ste.getFileName().contains("ObjectStore"))) { - break; - } - } - prevStack = stack; - t = t.getCause(); - if (t != null) { - message.append(";\n Caused by: "); - } - } - return message.toString(); - } - - private T commit() { - success = commitTransaction(); - if (doTrace) { - double time = ((System.nanoTime() - start) / 1000000.0); - String result = describeResult(); - String retrieveType = doUseDirectSql ? "SQL" : "ORM"; - - LOG.debug("{} retrieved using {} in {}ms", result, retrieveType, time); - } - return results; - } - - private void close() { - if (!success) { - rollbackTransaction(); - } - } - - public Table getTable() { - return table; - } - } - - private abstract class GetListHelper extends GetHelper> { - public GetListHelper(String catName, String dbName, String tblName, boolean allowSql, - boolean allowJdo) throws MetaException { - super(catName, dbName, tblName, null, allowSql, allowJdo); - } - - public GetListHelper(String catName, String dbName, String tblName, List fields, - boolean allowSql, boolean allowJdo) throws MetaException { - super(catName, dbName, tblName, fields, allowSql, allowJdo); - } - - @Override - protected String describeResult() { - return results.size() + " entries"; - } - } - - @VisibleForTesting - public abstract class GetDbHelper extends GetHelper { - /** - * GetHelper for returning db info using directSql/JDO. - * @param dbName The Database Name - * @param allowSql Whether or not we allow DirectSQL to perform this query. - * @param allowJdo Whether or not we allow ORM to perform this query. - */ - public GetDbHelper(String catalogName, String dbName,boolean allowSql, boolean allowJdo) - throws MetaException { - super(catalogName, dbName,null,allowSql,allowJdo); - } - - @Override - protected String describeResult() { - return "db details for db ".concat(dbName); - } - } - - private abstract class GetStatHelper extends GetHelper { - public GetStatHelper(String catalogName, String dbName, String tblName, boolean allowSql, - boolean allowJdo, String writeIdList) throws MetaException { - super(catalogName, dbName, tblName, allowSql, allowJdo); - } - - @Override - protected String describeResult() { - return "statistics for " + (results == null ? 0 : results.getStatsObjSize()) + " columns"; - } - } - - @Override - public int getNumPartitionsByFilter(String catName, String dbName, String tblName, - String filter) throws MetaException, NoSuchObjectException { - final ExpressionTree exprTree = org.apache.commons.lang3.StringUtils.isNotEmpty(filter) - ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; - - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - MTable mTable = ensureGetMTable(catName, dbName, tblName); - List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); - - return new GetHelper(catName, dbName, tblName, true, true) { - private final SqlFilterForPushdown filter = new SqlFilterForPushdown(); - - @Override - protected String describeResult() { - return "Partition count"; - } - - @Override - protected boolean canUseDirectSql(GetHelper ctx) throws MetaException { - return directSql.generateSqlFilterForPushdown(catName, dbName, tblName, partitionKeys, exprTree, null, filter); - } - - @Override - protected Integer getSqlResult(GetHelper ctx) throws MetaException { - return directSql.getNumPartitionsViaSqlFilter(filter); - } - @Override - protected Integer getJdoResult( - GetHelper ctx) throws MetaException, NoSuchObjectException { - return getNumPartitionsViaOrmFilter(catName ,dbName, tblName, exprTree, true, partitionKeys); - } - }.run(false); - } - - protected List getPartitionsByFilterInternal( - String catName, String dbName, String tblName, - boolean allowSql, boolean allowJdo, GetPartitionsArgs args) - throws MetaException, NoSuchObjectException { - - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - - MTable mTable = ensureGetMTable(catName, dbName, tblName); - List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); - boolean isAcidTable = TxnUtils.isAcidTable(mTable.getParameters()); - String filter = args.getFilter(); - final ExpressionTree tree = (filter != null && !filter.isEmpty()) - ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; - return new GetListHelper(catName, dbName, tblName, allowSql, allowJdo) { - private final SqlFilterForPushdown filter = new SqlFilterForPushdown(); - - @Override - protected boolean canUseDirectSql(GetHelper> ctx) throws MetaException { - return directSql.generateSqlFilterForPushdown(catName, dbName, tblName, partitionKeys, tree, null, filter); - } - - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql.getPartitionsViaSqlFilter(catName, dbName, tblName, filter, isAcidTable, args); - } - - @Override - protected List getJdoResult( - GetHelper> ctx) throws MetaException, NoSuchObjectException { - return getPartitionsViaOrmFilter(catName, dbName, tblName, tree, true, - partitionKeys, isAcidTable, args); - } - }.run(false); - } - - @Override - public List getPartitionSpecsByFilterAndProjection(final Table table, - GetProjectionsSpec partitionsProjectSpec, - final GetPartitionsFilterSpec filterSpec) throws MetaException, NoSuchObjectException { - List fieldList = null; - String inputIncludePattern = null; - String inputExcludePattern = null; - if (partitionsProjectSpec != null) { - fieldList = partitionsProjectSpec.getFieldList(); - if (partitionsProjectSpec.isSetIncludeParamKeyPattern()) { - inputIncludePattern = partitionsProjectSpec.getIncludeParamKeyPattern(); - } - if (partitionsProjectSpec.isSetExcludeParamKeyPattern()) { - inputExcludePattern = partitionsProjectSpec.getExcludeParamKeyPattern(); - } - } - if (fieldList == null || fieldList.isEmpty()) { - // no fields are requested. Fallback to regular getPartitions implementation to return all the fields - GetPartitionsArgs.GetPartitionsArgsBuilder argsBuilder = new GetPartitionsArgs.GetPartitionsArgsBuilder() - .excludeParamKeyPattern(inputExcludePattern) - .includeParamKeyPattern(inputIncludePattern); - return getPartitionsInternal(table.getCatName(), table.getDbName(), table.getTableName(), - true, true, argsBuilder.build()); - } - - // anonymous class below requires final String objects - final String includeParamKeyPattern = inputIncludePattern; - final String excludeParamKeyPattern = inputExcludePattern; - - return new GetListHelper(table.getCatName(), table.getDbName(), table.getTableName(), - fieldList, true, true) { - private final SqlFilterForPushdown filter = new SqlFilterForPushdown(); - private ExpressionTree tree; - - @Override - protected boolean canUseDirectSql(GetHelper> ctx) throws MetaException { - if (filterSpec.isSetFilterMode() && filterSpec.getFilterMode().equals(PartitionFilterMode.BY_EXPR)) { - // if the filter mode is BY_EXPR initialize the filter and generate the expression tree - // if there are more than one filter string we AND them together - initExpressionTree(); - return directSql.generateSqlFilterForPushdown(table.getCatName(), table.getDbName(), table.getTableName(), - table.getPartitionKeys(), tree, null, filter); - } - // BY_VALUES and BY_NAMES are always supported - return true; - } - - private void initExpressionTree() throws MetaException { - StringBuilder filterBuilder = new StringBuilder(); - int len = filterSpec.getFilters().size(); - List filters = filterSpec.getFilters(); - for (int i = 0; i < len; i++) { - filterBuilder.append('('); - filterBuilder.append(filters.get(i)); - filterBuilder.append(')'); - if (i + 1 < len) { - filterBuilder.append(" AND "); - } - } - String filterStr = filterBuilder.toString(); - tree = PartFilterExprUtil.parseFilterTree(filterStr); - } - - @Override - protected List getSqlResult(GetHelper> ctx) throws MetaException { - return directSql - .getPartitionsUsingProjectionAndFilterSpec(ctx.getTable(), ctx.partitionFields, - includeParamKeyPattern, excludeParamKeyPattern, filterSpec, filter); - } - - @Override - protected List getJdoResult( - GetHelper> ctx) throws MetaException { - // For single-valued fields we can use setResult() to implement projection of fields but - // JDO doesn't support multi-valued fields in setResult() so currently JDO implementation - // fallbacks to full-partition fetch if the requested fields contain multi-valued fields - List fieldNames = PartitionProjectionEvaluator.getMPartitionFieldNames(ctx.partitionFields); - Map params = new HashMap<>(); - String jdoFilter = null; - if (filterSpec.isSetFilterMode()) { - // generate the JDO filter string - switch(filterSpec.getFilterMode()) { - case BY_EXPR: - if (tree == null) { - // tree could be null when directSQL is disabled - initExpressionTree(); - } - jdoFilter = - makeQueryFilterString(table.getCatName(), table.getDbName(), table, tree, params, - true); - if (jdoFilter == null) { - throw new MetaException("Could not generate JDO filter from given expression"); - } - break; - case BY_NAMES: - jdoFilter = getJDOFilterStrForPartitionNames(table.getCatName(), table.getDbName(), - table.getTableName(), filterSpec.getFilters(), params); - break; - case BY_VALUES: - jdoFilter = getJDOFilterStrForPartitionVals(table, filterSpec.getFilters(), params); - break; - default: - throw new MetaException("Unsupported filter mode " + filterSpec.getFilterMode()); - } - } else { - // filter mode is not set create simple JDOFilterStr and params - jdoFilter = "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"; - params.put("t1", normalizeIdentifier(tblName)); - params.put("t2", normalizeIdentifier(dbName)); - params.put("t3", normalizeIdentifier(catName)); - } - try { - List mparts = listMPartitionsWithProjection(fieldNames, jdoFilter, params); - return convertToParts(catName, dbName, tblName, mparts, false, new GetPartitionsArgs.GetPartitionsArgsBuilder() - .excludeParamKeyPattern(excludeParamKeyPattern) - .includeParamKeyPattern(includeParamKeyPattern) - .build()); - } catch (MetaException me) { - throw me; - } catch (Exception e) { - throw new MetaException(e.getMessage()); - } - } - }.run(true); - - } - - /** - * Gets the table object for a given table, throws if anything goes wrong. - * @param dbName Database name. - * @param tblName Table name. - * @return Table object. - */ - @Override - public MTable ensureGetMTable(String catName, String dbName, String tblName) - throws NoSuchObjectException { - MTable mtable = getMTable(catName, dbName, tblName); - if (mtable == null) { - throw new NoSuchObjectException("Specified catalog.database.table does not exist : " - + TableName.getQualified(catName, dbName, tblName)); - } - return mtable; - } - - private Table ensureGetTable(String catName, String dbName, String tblName) - throws NoSuchObjectException, MetaException { - return convertToTable(ensureGetMTable(catName, dbName, tblName)); - } - - private Database ensureGetDatabase(String catName, String dbName) throws UnknownDBException { - try { - return getDatabase(catName, dbName); - } catch (NoSuchObjectException nsoe) { - throw new UnknownDBException("Could not find database " + DatabaseName.getQualified(catName, dbName)); - } - } - - /** - * Makes a JDO query filter string. - * Makes a JDO query filter string for tables or partitions. - * @param dbName Database name. - * @param mtable Table. If null, the query returned is over tables in a database. - * If not null, the query returned is over partitions in a table. - * @param filter The filter from which JDOQL filter will be made. - * @param params Parameters for the filter. Some parameters may be added here. - * @return Resulting filter. - */ - private String makeQueryFilterString(String catName, String dbName, MTable mtable, String filter, - Map params) throws MetaException { - ExpressionTree tree = (filter != null && !filter.isEmpty()) - ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; - return makeQueryFilterString(catName, dbName, convertToTable(mtable), tree, params, true); - } - - /** - * Makes a JDO query filter string for tables or partitions. - * @param dbName Database name. - * @param table Table. If null, the query returned is over tables in a database. - * If not null, the query returned is over partitions in a table. - * @param tree The expression tree from which JDOQL filter will be made. - * @param params Parameters for the filter. Some parameters may be added here. - * @param isValidatedFilter Whether the filter was pre-validated for JDOQL pushdown - * by the client; if it was and we fail to create a filter, we will throw. - * @return Resulting filter. Can be null if isValidatedFilter is false, and there was error. - */ - private String makeQueryFilterString(String catName, String dbName, Table table, - ExpressionTree tree, Map params, - boolean isValidatedFilter) throws MetaException { - assert tree != null; - FilterBuilder queryBuilder = new FilterBuilder(isValidatedFilter); - if (table != null) { - queryBuilder.append("table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); - params.put("t1", table.getTableName()); - params.put("t2", table.getDbName()); - params.put("t3", table.getCatName()); - } else { - queryBuilder.append("database.name == dbName && database.catalogName == catName"); - params.put("dbName", dbName); - params.put("catName", catName); - } - - tree.accept(new ExpressionTree.JDOFilterGenerator(getConf(), - table != null ? table.getPartitionKeys() : null, queryBuilder, params)); - if (queryBuilder.hasError()) { - assert !isValidatedFilter; - LOG.debug("JDO filter pushdown cannot be used: {}", queryBuilder.getErrorMessage()); - return null; - } - String jdoFilter = queryBuilder.getFilter(); - LOG.debug("jdoFilter = {}", jdoFilter); - return jdoFilter; - } - - private String makeQueryFilterString(String catName, String dbName, String tblName, - ExpressionTree tree, Map params, - boolean isValidatedFilter, List partitionKeys) throws MetaException { - assert tree != null; - FilterBuilder queryBuilder = new FilterBuilder(isValidatedFilter); - queryBuilder.append("table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); - params.put("t1", tblName); - params.put("t2", dbName); - params.put("t3", catName); - tree.accept(new ExpressionTree.JDOFilterGenerator(getConf(), partitionKeys, queryBuilder, params)); - if (queryBuilder.hasError()) { - assert !isValidatedFilter; - LOG.debug("JDO filter pushdown cannot be used: {}", queryBuilder.getErrorMessage()); - return null; - } - String jdoFilter = queryBuilder.getFilter(); - LOG.debug("jdoFilter = {}", jdoFilter); - return jdoFilter; - } - - private String makeParameterDeclarationString(Map params) { - //Create the parameter declaration string - StringBuilder paramDecl = new StringBuilder(); - for (String key : params.keySet()) { - paramDecl.append(", java.lang.String ") - .append(key); - } - return paramDecl.toString(); - } - - private String makeParameterDeclarationStringObj(Map params) { - //Create the parameter declaration string - StringBuilder paramDecl = new StringBuilder(); - for (Entry entry : params.entrySet()) { - paramDecl.append(", "); - paramDecl.append(entry.getValue().getClass().getName()); - paramDecl.append(' '); - paramDecl.append(entry.getKey()); - } - return paramDecl.toString(); - } - - @Override - public List listTableNamesByFilter(String catName, String dbName, String filter, - short maxTables) throws MetaException, UnknownDBException { - boolean success = false; - Query query = null; - List tableNames = new ArrayList<>(); - try { - openTransaction(); - LOG.debug("Executing listTableNamesByFilter"); - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - - ensureGetDatabase(catName, dbName); - - Map params = new HashMap<>(); - String queryFilterString = makeQueryFilterString(catName, dbName, null, filter, params); - query = pm.newQuery(MTable.class); - query.declareImports("import java.lang.String"); - query.setResult("tableName"); - query.setResultClass(java.lang.String.class); - if (maxTables >= 0) { - query.setRange(0, maxTables); - } - LOG.debug("filter specified is {}, JDOQL filter is {}", filter, queryFilterString); - if (LOG.isDebugEnabled()) { - for (Entry entry : params.entrySet()) { - LOG.debug("key: {} value: {} class: {}", entry.getKey(), entry.getValue(), - entry.getValue().getClass().getName()); - } - } - String parameterDeclaration = makeParameterDeclarationStringObj(params); - query.declareParameters(parameterDeclaration); - query.setFilter(queryFilterString); - Collection names = (Collection)query.executeWithMap(params); - // have to emulate "distinct", otherwise tables with the same name may be returned - tableNames = new ArrayList<>(new HashSet<>(names)); - LOG.debug("Done executing query for listTableNamesByFilter"); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listTableNamesByFilter"); - } finally { - rollbackAndCleanup(success, query); - } - return tableNames; - } - - @Override - public Table alterTable(String catName, String dbname, String name, Table newTable, - String queryValidWriteIds) throws InvalidObjectException, MetaException { - boolean success = false; - try { - openTransaction(); - name = normalizeIdentifier(name); - dbname = normalizeIdentifier(dbname); - catName = normalizeIdentifier(catName); - MTable newt = convertToMTable(newTable); - if (newt == null) { - throw new InvalidObjectException("new table is invalid"); - } - - MTable oldt = getMTable(catName, dbname, name); - if (oldt == null) { - throw new MetaException("table " + dbname + "." + name + " doesn't exist"); - } - - // For now only alter name, owner, parameters, cols, bucketcols are allowed - oldt.setDatabase(newt.getDatabase()); - oldt.setTableName(normalizeIdentifier(newt.getTableName())); - boolean isTxn = TxnUtils.isTransactionalTable(newTable); - boolean isToTxn = isTxn && !TxnUtils.isTransactionalTable(oldt.getParameters()); - if (!isToTxn && isTxn && areTxnStatsSupported) { - // Transactional table is altered without a txn. Make sure there are no changes to the flag. - String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(name, dbname), oldt.getParameters(), - newTable.getParameters(), newTable.getWriteId(), queryValidWriteIds, false); - if (errorMsg != null) { - throw new MetaException(errorMsg); - } - } - oldt.setParameters(newt.getParameters()); - oldt.setOwner(newt.getOwner()); - oldt.setOwnerType(newt.getOwnerType()); - // Fully copy over the contents of the new SD into the old SD, - // so we don't create an extra SD in the metastore db that has no references. - MColumnDescriptor oldCD = null; - MStorageDescriptor oldSD = oldt.getSd(); - if (oldSD != null) { - oldCD = oldSD.getCD(); - } - copyMSD(newt.getSd(), oldt.getSd()); - removeUnusedColumnDescriptor(oldCD); - oldt.setRetention(newt.getRetention()); - oldt.setPartitionKeys(newt.getPartitionKeys()); - oldt.setTableType(newt.getTableType()); - oldt.setLastAccessTime(newt.getLastAccessTime()); - oldt.setViewOriginalText(newt.getViewOriginalText()); - oldt.setViewExpandedText(newt.getViewExpandedText()); - oldt.setRewriteEnabled(newt.isRewriteEnabled()); - - // If transactional, update the stats state for the current Stats updater query. - // Set stats invalid for ACID conversion; it doesn't pass in the write ID. - if (isTxn) { - if (!areTxnStatsSupported || isToTxn) { - StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); - } else if (queryValidWriteIds != null && newTable.getWriteId() > 0) { - // Check concurrent INSERT case and set false to the flag. - if (!isCurrentStatsValidForTheQuery(oldt, queryValidWriteIds, true)) { - StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + - dbname + "." + name + ". will be made persistent."); - } - assert newTable.getWriteId() > 0; - oldt.setWriteId(newTable.getWriteId()); - } - } - newTable = convertToTable(oldt); - - // commit the changes - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); - } - return newTable; - } - - /** - * Verifies that the stats JSON string is unchanged for alter table (txn stats). - * @return Error message with the details of the change, or null if the value has not changed. - */ - public static String verifyStatsChangeCtx(String fullTableName, Map oldP, Map newP, - long writeId, String validWriteIds, boolean isColStatsChange) { - if (validWriteIds != null && writeId > 0) { - return null; // We have txn context. - } - - if (!StatsSetupConst.areBasicStatsUptoDate(newP)) { - // The validWriteIds can be absent, for example, in case of Impala alter. - // If the new value is invalid, then we don't care, let the alter operation go ahead. - return null; - } - - String oldVal = oldP == null ? null : oldP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); - String newVal = newP == null ? null : newP.get(StatsSetupConst.COLUMN_STATS_ACCURATE); - if (StringUtils.equalsIgnoreCase(oldVal, newVal)) { - if (!isColStatsChange) { - return null; // No change in col stats or parameters => assume no change. - } - } - - // Some change to the stats state is being made; it can only be made with a write ID. - return "Cannot change stats state for a transactional table " + fullTableName + " without " + - "providing the transactional write state for verification (new write ID " + - writeId + ", valid write IDs " + validWriteIds + "; current state " + oldVal + "; new" + - " state " + newVal; - } - - @Override - public void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) - throws MetaException { - boolean success = false; - try { - openTransaction(); - catName = normalizeIdentifier(catName); - dbname = normalizeIdentifier(dbname); - tablename = normalizeIdentifier(tablename); - // Update creation metadata - MCreationMetadata newMcm = convertToMCreationMetadata(cm); - MCreationMetadata mcm = getCreationMetadata(catName, dbname, tablename); - mcm.setTables(newMcm.getTables()); - mcm.setMaterializationTime(newMcm.getMaterializationTime()); - mcm.setTxnList(newMcm.getTxnList()); - // commit the changes - success = commitTransaction(); - cm.setMaterializationTime(newMcm.getMaterializationTime()); - } finally { - rollbackAndCleanup(success, null); - } - } - - private static final class Ref { - public T t; - } - - /** - * Alters an existing partition. Initiates copy of SD. Returns the old CD. - * @param part_vals Partition values (of the original partition instance) - * @param newPart Partition object containing new information - */ - private Partition alterPartitionNoTxn(String catName, String dbname, String name, - List part_vals, Partition newPart, String validWriteIds, Ref oldCd) - throws InvalidObjectException, MetaException { - MTable table = this.getMTable(newPart.getCatName(), newPart.getDbName(), newPart.getTableName()); - MPartition oldp = getMPartition(catName, dbname, name, part_vals, table); - return alterPartitionNoTxn(catName, dbname, name, oldp, newPart, - validWriteIds, oldCd, table); - } - - private Partition alterPartitionNoTxn(String catName, String dbname, - String name, MPartition oldp, Partition newPart, - String validWriteIds, - Ref oldCd, MTable table) - throws InvalidObjectException, MetaException { - catName = normalizeIdentifier(catName); - name = normalizeIdentifier(name); - dbname = normalizeIdentifier(dbname); - MPartition newp = convertToMPart(newPart, table); - MColumnDescriptor oldCD = null; - MStorageDescriptor oldSD = oldp.getSd(); - if (oldSD != null) { - oldCD = oldSD.getCD(); - } - if (newp == null) { - throw new InvalidObjectException("partition does not exist."); - } - oldp.setValues(newp.getValues()); - oldp.setPartitionName(newp.getPartitionName()); - boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); - if (isTxn && areTxnStatsSupported) { - // Transactional table is altered without a txn. Make sure there are no changes to the flag. - String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(dbname, name), - oldp.getParameters(), - newPart.getParameters(), newPart.getWriteId(), validWriteIds, false); - if (errorMsg != null) { - throw new MetaException(errorMsg); - } - } - oldp.setParameters(newPart.getParameters()); - if (!TableType.VIRTUAL_VIEW.name().equals(oldp.getTable().getTableType())) { - copyMSD(newp.getSd(), oldp.getSd()); - } - if (newp.getCreateTime() != oldp.getCreateTime()) { - oldp.setCreateTime(newp.getCreateTime()); - } - if (newp.getLastAccessTime() != oldp.getLastAccessTime()) { - oldp.setLastAccessTime(newp.getLastAccessTime()); - } - - // If transactional, add/update the MUPdaterTransaction - // for the current updater query. - if (isTxn) { - if (!areTxnStatsSupported) { - StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); - } else if (validWriteIds != null && newPart.getWriteId() > 0) { - // Check concurrent INSERT case and set false to the flag. - if (!isCurrentStatsValidForTheQuery(oldp, validWriteIds, true)) { - StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); - LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + - dbname + "." + name + "." + oldp.getPartitionName() + " will be made persistent."); - } - oldp.setWriteId(newPart.getWriteId()); - } - } - - oldCd.t = oldCD; - return convertToPart(catName, dbname, name, oldp, TxnUtils.isAcidTable(table.getParameters())); - } - - @Override - public Partition alterPartition(String catName, String dbname, String name, List part_vals, - Partition newPart, String validWriteIds) throws InvalidObjectException, MetaException { - boolean success = false; - Throwable e = null; - Partition result = null; - try { - openTransaction(); - Ref oldCd = new Ref<>(); - result = alterPartitionNoTxn(catName, dbname, name, part_vals, newPart, validWriteIds, oldCd); - removeUnusedColumnDescriptor(oldCd.t); - // commit the changes - success = commitTransaction(); - } catch (Throwable exception) { - LOG.error("alterPartition failed", exception); - e = exception; - } finally { - if (!success) { - rollbackTransaction(); - MetaException metaException = new MetaException( - "The transaction for alter partition did not commit successfully."); - if (e != null) { - metaException.initCause(e); - } - throw metaException; - } - } - return result; - } - - @Override - public List alterPartitions(String catName, String dbName, String tblName, - List> part_vals, List newParts, - long writeId, String queryWriteIdList) - throws InvalidObjectException, MetaException { - List results = new ArrayList<>(newParts.size()); - if (newParts.isEmpty()) { - return results; - } - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - - boolean success = false; - try { - openTransaction(); - MTable table = ensureGetMTable(catName, dbName, tblName); - if (writeId > 0) { - newParts.forEach(newPart -> newPart.setWriteId(writeId)); - } - List partCols = convertToFieldSchemas(table.getPartitionKeys()); - List partNames = new ArrayList<>(); - for (List partVal : part_vals) { - partNames.add(Warehouse.makePartName(partCols, partVal)); - } - results = alterPartitionsInternal(table, partNames, newParts, queryWriteIdList, true, true); - // commit the changes - success = commitTransaction(); - } catch (Exception exception) { - LOG.error("Alter failed", exception); - throw new MetaException(exception.getMessage()); - } finally { - rollbackAndCleanup(success, null); - } - return results; - } - - protected List alterPartitionsInternal(MTable table, - List partNames, List newParts, String queryWriteIdList, - boolean allowSql, boolean allowJdo) - throws InvalidObjectException, MetaException, NoSuchObjectException { - // Validate new parts: StorageDescriptor and SerDeInfo must be set in Partition. - if (!TableType.VIRTUAL_VIEW.name().equals(table.getTableType())) { - for (Partition newPart : newParts) { - if (!newPart.isSetSd() || !newPart.getSd().isSetSerdeInfo()) { - throw new InvalidObjectException("Partition does not set storageDescriptor or serdeInfo."); - } - } - } - String catName = table.getDatabase().getCatalogName(); - String dbName = table.getDatabase().getName(); - String tblName = table.getTableName(); - for (Partition tmpPart : newParts) { - if (!tmpPart.getDbName().equalsIgnoreCase(dbName)) { - throw new MetaException("Invalid DB name : " + tmpPart.getDbName()); - } - if (!tmpPart.getTableName().equalsIgnoreCase(tblName)) { - throw new MetaException("Invalid table name : " + tmpPart.getDbName()); - } - } - return new GetListHelper(catName, dbName, tblName, allowSql, allowJdo) { - @Override - protected List getSqlResult(GetHelper> ctx) - throws MetaException { - return directSql.alterPartitions(table, partNames, newParts, queryWriteIdList); - } - - @Override - protected List getJdoResult(GetHelper> ctx) - throws MetaException, InvalidObjectException { - return alterPartitionsViaJdo(table, partNames, newParts, queryWriteIdList); - } - }.run(false); - } - - private List alterPartitionsViaJdo(MTable table, List partNames, - List newParts, String queryWriteIdList) - throws MetaException, InvalidObjectException { - String catName = table.getDatabase().getCatalogName(); - String dbName = table.getDatabase().getName(); - String tblName = table.getTableName(); - List results = new ArrayList<>(newParts.size()); - List mPartitionList; - - try (QueryWrapper query = new QueryWrapper(pm.newQuery(MPartition.class, - "table.tableName == t1 && table.database.name == t2 && t3.contains(partitionName) " + - " && table.database.catalogName == t4"))) { - query.declareParameters("java.lang.String t1, java.lang.String t2, java.util.Collection t3, " - + "java.lang.String t4"); - mPartitionList = (List) query.executeWithArray(tblName, dbName, partNames, catName); - pm.retrieveAll(mPartitionList); - - if (mPartitionList.size() > newParts.size()) { - throw new MetaException("Expecting only one partition but more than one partitions are found."); - } - - Map, MPartition> mPartsMap = new HashMap(); - for (MPartition mPartition : mPartitionList) { - mPartsMap.put(mPartition.getValues(), mPartition); - } - - Set oldCds = new HashSet<>(); - Ref oldCdRef = new Ref<>(); - for (Partition tmpPart : newParts) { - oldCdRef.t = null; - Partition result = alterPartitionNoTxn(catName, dbName, tblName, - mPartsMap.get(tmpPart.getValues()), tmpPart, queryWriteIdList, oldCdRef, table); - results.add(result); - if (oldCdRef.t != null) { - oldCds.add(oldCdRef.t); - } - } - for (MColumnDescriptor oldCd : oldCds) { - removeUnusedColumnDescriptor(oldCd); - } - } - - return results; - } - - private void copyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) { - oldSd.setLocation(newSd.getLocation()); - // If the columns of the old column descriptor != the columns of the new one, - // then change the old storage descriptor's column descriptor. - // Convert the MFieldSchema's to their thrift object counterparts, because we maintain - // datastore identity (i.e., identity of the model objects are managed by JDO, - // not the application). - List oldCols = oldSd.getCD() != null && oldSd.getCD().getCols() != null ? - convertToFieldSchemas(oldSd.getCD().getCols()) : null; - List newCols = newSd.getCD() != null && newSd.getCD().getCols() != null ? - convertToFieldSchemas(newSd.getCD().getCols()) : null; - if (oldCols == null || !oldCols.equals(newCols)) { - // First replace any constraints that may be associated with this CD - // Create mapping from old col indexes to new col indexes - if (oldCols != null && newCols != null) { - Map mapping = new HashMap<>(); - for (int i = 0; i < oldCols.size(); i++) { - FieldSchema oldCol = oldCols.get(i); - //TODO: replace for loop with list.indexOf() - for (int j = 0; j < newCols.size(); j++) { - FieldSchema newCol = newCols.get(j); - if (oldCol.equals(newCol)) { - mapping.put(i, j); - break; - } - } - } - // If we find it, we will change the reference for the CD. - // If we do not find it, i.e., the column will be deleted, we do not change it - // and we let the logic in removeUnusedColumnDescriptor take care of it - try (QueryWrapper query = new QueryWrapper(pm.newQuery(MConstraint.class, "parentColumn == inCD || childColumn == inCD"))) { - query.declareParameters("MColumnDescriptor inCD"); - List mConstraintsList = (List) query.execute(oldSd.getCD()); - pm.retrieveAll(mConstraintsList); - for (MConstraint mConstraint : mConstraintsList) { - if (oldSd.getCD().equals(mConstraint.getParentColumn())) { - Integer newIdx = mapping.get(mConstraint.getParentIntegerIndex()); - if (newIdx != null) { - mConstraint.setParentColumn(newSd.getCD()); - mConstraint.setParentIntegerIndex(newIdx); - } - } - if (oldSd.getCD().equals(mConstraint.getChildColumn())) { - Integer newIdx = mapping.get(mConstraint.getChildIntegerIndex()); - if (newIdx != null) { - mConstraint.setChildColumn(newSd.getCD()); - mConstraint.setChildIntegerIndex(newIdx); - } - } - } - pm.makePersistentAll(mConstraintsList); - } - // Finally replace CD - oldSd.setCD(newSd.getCD()); - } - } - - oldSd.setBucketCols(newSd.getBucketCols()); - oldSd.setIsCompressed(newSd.isCompressed()); - oldSd.setInputFormat(newSd.getInputFormat()); - oldSd.setOutputFormat(newSd.getOutputFormat()); - oldSd.setNumBuckets(newSd.getNumBuckets()); - oldSd.getSerDeInfo().setName(newSd.getSerDeInfo().getName()); - oldSd.getSerDeInfo().setSerializationLib( - newSd.getSerDeInfo().getSerializationLib()); - oldSd.getSerDeInfo().setParameters(newSd.getSerDeInfo().getParameters()); - oldSd.getSerDeInfo().setDescription(newSd.getSerDeInfo().getDescription()); - oldSd.setSkewedColNames(newSd.getSkewedColNames()); - oldSd.setSkewedColValues(newSd.getSkewedColValues()); - oldSd.setSkewedColValueLocationMaps(newSd.getSkewedColValueLocationMaps()); - oldSd.setSortCols(newSd.getSortCols()); - oldSd.setParameters(newSd.getParameters()); - oldSd.setStoredAsSubDirectories(newSd.isStoredAsSubDirectories()); - } - - /** - * Checks if a column descriptor has any remaining references by storage descriptors - * in the db. - * @param oldCD the column descriptor to check if it has references or not - * @return true if has references - */ - private boolean hasRemainingCDReference(MColumnDescriptor oldCD) { - assert oldCD != null; - Query query = null; - - /** - * In order to workaround oracle not supporting limit statement caused performance issue, HIVE-9447 makes - * all the backend DB run select count(1) from SDS where SDS.CD_ID=? to check if the specific CD_ID is - * referenced in SDS table before drop a partition. This select count(1) statement does not scale well in - * Postgres, and there is no index for CD_ID column in SDS table. - * For a SDS table with with 1.5 million rows, select count(1) has average 700ms without index, while in - * 10-20ms with index. But the statement before - * HIVE-9447( SELECT * FROM "SDS" "A0" WHERE "A0"."CD_ID" = $1 limit 1) uses less than 10ms . - */ - try { - // HIVE-21075: Fix Postgres performance regression caused by HIVE-9447 - LOG.debug("The dbType is {} ", dbType.getHiveSchemaPostfix()); - if (dbType.isPOSTGRES() || dbType.isMYSQL()) { - query = pm.newQuery(MStorageDescriptor.class, "this.cd == inCD"); - query.declareParameters("MColumnDescriptor inCD"); - List referencedSDs = null; - LOG.debug("Executing listStorageDescriptorsWithCD"); - // User specified a row limit, set it on the Query - query.setRange(0L, 1L); - referencedSDs = (List) query.execute(oldCD); - LOG.debug("Done executing query for listStorageDescriptorsWithCD"); - pm.retrieveAll(referencedSDs); - LOG.debug("Done retrieving all objects for listStorageDescriptorsWithCD"); - //if no other SD references this CD, we can throw it out. - return referencedSDs != null && !referencedSDs.isEmpty(); - } else { - query = pm.newQuery( - "select count(1) from org.apache.hadoop.hive.metastore.model.MStorageDescriptor where (this.cd == inCD)"); - query.declareParameters("MColumnDescriptor inCD"); - long count = (Long) query.execute(oldCD); - //if no other SD references this CD, we can throw it out. - return count != 0; - } - } finally { - if (query != null) { - query.closeAll(); - } - } - } - - /** - * Checks if a column descriptor has any remaining references by storage descriptors - * in the db. If it does not, then delete the CD. If it does, then do nothing. - * @param oldCD the column descriptor to delete if it is no longer referenced anywhere - */ - private void removeUnusedColumnDescriptor(MColumnDescriptor oldCD) { - if (oldCD == null) { - return; - } - Query query = null; - boolean success = false; - LOG.debug("execute removeUnusedColumnDescriptor"); - - try { - openTransaction(); - if (!hasRemainingCDReference(oldCD)) { - // First remove any constraints that may be associated with this CD - query = pm.newQuery(MConstraint.class, "parentColumn == inCD || childColumn == inCD"); - query.declareParameters("MColumnDescriptor inCD"); - List mConstraintsList = (List) query.execute(oldCD); - if (CollectionUtils.isNotEmpty(mConstraintsList)) { - pm.deletePersistentAll(mConstraintsList); - } - // Finally remove CD - pm.retrieve(oldCD); - pm.deletePersistent(oldCD); - LOG.debug("successfully deleted a CD in removeUnusedColumnDescriptor"); - - } - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, query); - } - } - - /** - * Called right before an action that would drop a storage descriptor. - * This function makes the SD's reference to a CD null, and then deletes the CD - * if it no longer is referenced in the table. - * @param msd the storage descriptor to drop - */ - private void preDropStorageDescriptor(MStorageDescriptor msd) { - if (msd == null || msd.getCD() == null) { - return; - } - - MColumnDescriptor mcd = msd.getCD(); - // Because there is a 1-N relationship between CDs and SDs, - // we must set the SD's CD to null first before dropping the storage descriptor - // to satisfy foreign key constraints. - msd.setCD(null); - removeUnusedColumnDescriptor(mcd); - } - - private static MFieldSchema getColumnFromTableColumns(List cols, String col) { - if (cols == null) { - return null; - } - for (MFieldSchema mfs : cols) { - if (mfs.getName().equalsIgnoreCase(col)) { - return mfs; - } - } - return null; - } - - private static int getColumnIndexFromTableColumns(List cols, String col) { - if (cols == null) { - return -1; - } - for (int i = 0; i < cols.size(); i++) { - MFieldSchema mfs = cols.get(i); - if (mfs.getName().equalsIgnoreCase(col)) { - return i; - } - } - return -1; - } - - private boolean constraintNameAlreadyExists(MTable table, String constraintName) { - boolean commited = false; - Query constraintExistsQuery = null; - String constraintNameIfExists = null; - try { - openTransaction(); - constraintName = normalizeIdentifier(constraintName); - constraintExistsQuery = pm.newQuery(MConstraint.class, - "parentTable == parentTableP && constraintName == constraintNameP"); - constraintExistsQuery.declareParameters("MTable parentTableP, java.lang.String constraintNameP"); - constraintExistsQuery.setUnique(true); - constraintExistsQuery.setResult("constraintName"); - constraintNameIfExists = (String) constraintExistsQuery.executeWithArray(table, constraintName); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, constraintExistsQuery); - } - return constraintNameIfExists != null && !constraintNameIfExists.isEmpty(); - } - - private String generateConstraintName(MTable table, String... parameters) throws MetaException { - int hashcode = ArrayUtils.toString(parameters).hashCode() & 0xfffffff; - int counter = 0; - final int MAX_RETRIES = 10; - while (counter < MAX_RETRIES) { - String currName = (parameters.length == 0 ? "constraint_" : parameters[parameters.length-1]) + - "_" + hashcode + "_" + System.currentTimeMillis() + "_" + (counter++); - if (!constraintNameAlreadyExists(table, currName)) { - return currName; - } - } - throw new MetaException("Error while trying to generate the constraint name for " + ArrayUtils.toString(parameters)); - } - - @Override - public List addForeignKeys( - List fks) throws InvalidObjectException, MetaException { - return addForeignKeys(fks, true, null, null); - } - - @Override - public String getMetastoreDbUuid() throws MetaException { - String ret = getGuidFromDB(); - if(ret != null) { - return ret; - } - return createDbGuidAndPersist(); - } - - private String createDbGuidAndPersist() throws MetaException { - boolean success = false; - Query query = null; - try { - openTransaction(); - MMetastoreDBProperties prop = new MMetastoreDBProperties(); - prop.setPropertykey("guid"); - final String guid = UUID.randomUUID().toString(); - LOG.debug("Attempting to add a guid {} for the metastore db", guid); - prop.setPropertyValue(guid); - prop.setDescription("Metastore DB GUID generated on " - + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"))); - pm.makePersistent(prop); - success = commitTransaction(); - if (success) { - LOG.info("Metastore db guid {} created successfully", guid); - return guid; - } - } catch (Exception e) { - LOG.warn("Metastore db guid creation failed", e); - } finally { - rollbackAndCleanup(success, query); - } - // it possible that some other HMS instance could have created the guid - // at the same time due which this instance could not create a guid above - // in such case return the guid already generated - final String guid = getGuidFromDB(); - if (guid == null) { - throw new MetaException("Unable to create or fetch the metastore database uuid"); - } - return guid; - } - - private String getGuidFromDB() throws MetaException { - boolean success = false; - Query query = null; - try { - openTransaction(); - query = pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY); - query.declareParameters(PTYPARAM_STR_KEY); - Collection names = (Collection) query.execute("guid"); - List uuids = new ArrayList<>(); - for (Iterator i = names.iterator(); i.hasNext();) { - String uuid = i.next().getPropertyValue(); - LOG.debug("Found guid {}", uuid); - uuids.add(uuid); - } - success = commitTransaction(); - if(uuids.size() > 1) { - throw new MetaException("Multiple uuids found"); - } - if(!uuids.isEmpty()) { - LOG.debug("Returning guid of metastore db : {}", uuids.get(0)); - return uuids.get(0); - } - } finally { - rollbackAndCleanup(success, query); - } - LOG.warn("Guid for metastore db not found"); - return null; - } - - public boolean runInTransaction(Runnable exec) { - boolean success = false; - try { - if (openTransaction()) { - exec.run(); - success = commitTransaction(); - } - } catch (Exception e) { - LOG.warn("Metastore operation failed", e); - } finally { - rollbackAndCleanup(success, null); - } - return success; - } - - public boolean dropProperties(String key) { - boolean success = false; - Query query = null; - try { - if (openTransaction()) { - query = pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY); - query.declareParameters(PTYPARAM_STR_KEY); - @SuppressWarnings("unchecked") - Collection properties = (Collection) query.execute(key); - if (!properties.isEmpty()) { - pm.deletePersistentAll(properties); - } - success = commitTransaction(); - } - } catch (Exception e) { - LOG.warn("Metastore property drop failed", e); - } finally { - rollbackAndCleanup(success, query); - } - return success; - } - - - public MMetastoreDBProperties putProperties(String key, String value, String description, byte[] content) { - boolean success = false; - try { - if (openTransaction()) { - //pm.currentTransaction().setOptimistic(false); - // fetch first to determine new vs update - MMetastoreDBProperties properties = doFetchProperties(key, null); - final boolean newInstance; - if (properties == null) { - newInstance = true; - properties = new MMetastoreDBProperties(); - properties.setPropertykey(key); - } else { - newInstance = false; - } - properties.setDescription(description); - properties.setPropertyValue(value); - properties.setPropertyContent(content); - LOG.debug("Attempting to add property {} for the metastore db", key); - properties.setDescription("Metastore property " - + (newInstance ? "created" : "updated") - + " " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"))); - if (newInstance) { - pm.makePersistent(properties); - } - success = commitTransaction(); - if (success) { - LOG.info("Metastore property {} created successfully", key); - return properties; - } - } - } finally { - rollbackAndCleanup(success, null); - } - return null; - } - - - public boolean renameProperties(String mapKey, String newKey) { - boolean success = false; - Query query = null; - try { - LOG.debug("Attempting to rename property {} to {} for the metastore db", mapKey, newKey); - if (openTransaction()) { - // ensure the target is clear; - // query is cleaned up in finally block - query = pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY); - query.declareParameters(PTYPARAM_STR_KEY); - query.setUnique(true); - MMetastoreDBProperties properties = (MMetastoreDBProperties) query.execute(newKey); - if (properties != null) { - return false; - } - // ensure we got a source - properties = (MMetastoreDBProperties) query.execute(mapKey); - if (properties == null) { - return false; - } - byte[] content = properties.getPropertyContent(); - String value = properties.getPropertyValue(); - // remove source from persistent storage - pm.deletePersistent(properties); - // make it persist with new key - MMetastoreDBProperties newProperties = new MMetastoreDBProperties(); - // update description - newProperties.setDescription("Metastore property renamed from " + mapKey + " to " + newKey - + " " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"))); - // change key - newProperties.setPropertykey(newKey); - newProperties.setPropertyValue(value); - newProperties.setPropertyContent(content); - pm.makePersistent(newProperties); - // commit - success = commitTransaction(); - if (success) { - LOG.info("Metastore property {} renamed {} successfully", mapKey, newKey); - return true; - } - } - } finally { - rollbackAndCleanup(success, query); - } - return false; - } - - private T doFetchProperties(String key, java.util.function.Function transform) { - try(QueryWrapper query = new QueryWrapper(pm.newQuery(MMetastoreDBProperties.class, PTYARG_EQ_KEY))) { - query.declareParameters(PTYPARAM_STR_KEY); - query.setUnique(true); - MMetastoreDBProperties properties = (MMetastoreDBProperties) query.execute(key); - if (properties != null) { - return (T) (transform != null? transform.apply(properties) : properties); - } - } - return null; - } - - public T fetchProperties(String key, java.util.function.Function transform) { - boolean success = false; - T properties = null; - try { - if (openTransaction()) { - properties = doFetchProperties(key, transform); - success = commitTransaction(); - } - } finally { - rollbackAndCleanup(success, null); - } - return properties; - } - - public Map selectProperties(String key, java.util.function.Function transform) { - boolean success = false; - Query query = null; - Map results = null; - try { - if (openTransaction()) { - Collection properties; - if (key == null || key.isEmpty()) { - query = pm.newQuery(MMetastoreDBProperties.class); - properties = (Collection) query.execute(); - } else { - query = pm.newQuery(MMetastoreDBProperties.class, "this.propertyKey.startsWith(key)"); - query.declareParameters(PTYPARAM_STR_KEY); - properties = (Collection) query.execute(key); - } - pm.retrieveAll(properties); - if (!properties.isEmpty()) { - results = new TreeMap(); - for(MMetastoreDBProperties ptys : properties) { - T t = (T) (transform != null? transform.apply(ptys) : ptys); - if (t != null) { - results.put(ptys.getPropertykey(), t); - } - } - } - success = commitTransaction(); - } - } finally { - rollbackAndCleanup(success, query); - } - return results; - } - - //TODO: clean up this method - private List addForeignKeys(List foreignKeys, boolean retrieveCD, - List primaryKeys, List uniqueConstraints) - throws InvalidObjectException, MetaException { - if (CollectionUtils.isNotEmpty(foreignKeys)) { - List mpkfks = new ArrayList<>(); - String currentConstraintName = null; - String catName = null; - // We start iterating through the foreign keys. This list might contain more than a single - // foreign key, and each foreign key might contain multiple columns. The outer loop retrieves - // the information that is common for a single key (table information) while the inner loop - // checks / adds information about each column. - for (int i = 0; i < foreignKeys.size(); i++) { - if (catName == null) { - catName = normalizeIdentifier(foreignKeys.get(i).isSetCatName() ? foreignKeys.get(i).getCatName() : - getDefaultCatalog(conf)); - } else { - String tmpCatName = normalizeIdentifier(foreignKeys.get(i).isSetCatName() ? - foreignKeys.get(i).getCatName() : getDefaultCatalog(conf)); - if (!catName.equals(tmpCatName)) { - throw new InvalidObjectException("Foreign keys cannot span catalogs"); - } - } - final String fkTableDB = normalizeIdentifier(foreignKeys.get(i).getFktable_db()); - final String fkTableName = normalizeIdentifier(foreignKeys.get(i).getFktable_name()); - // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. - // For instance, this is the case when we are creating the table. - final AttachedMTableInfo nChildTable = getMTable(catName, fkTableDB, fkTableName, retrieveCD); - final MTable childTable = nChildTable.mtbl; - if (childTable == null) { - throw new InvalidObjectException("Child table not found: " + fkTableName); - } - MColumnDescriptor childCD = retrieveCD ? nChildTable.mcd : childTable.getSd().getCD(); - final List childCols = childCD == null || childCD.getCols() == null ? - new ArrayList<>() : new ArrayList<>(childCD.getCols()); - if (childTable.getPartitionKeys() != null) { - childCols.addAll(childTable.getPartitionKeys()); - } - - final String pkTableDB = normalizeIdentifier(foreignKeys.get(i).getPktable_db()); - final String pkTableName = normalizeIdentifier(foreignKeys.get(i).getPktable_name()); - // For primary keys, we retrieve the column descriptors if retrieveCD is true (which means - // it is an alter table statement) or if it is a create table statement but we are - // referencing another table instead of self for the primary key. - final AttachedMTableInfo nParentTable; - final MTable parentTable; - MColumnDescriptor parentCD; - final List parentCols; - final List existingTablePrimaryKeys; - final List existingTableUniqueConstraints; - final boolean sameTable = fkTableDB.equals(pkTableDB) && fkTableName.equals(pkTableName); - if (sameTable) { - nParentTable = nChildTable; - parentTable = childTable; - parentCD = childCD; - parentCols = childCols; - existingTablePrimaryKeys = primaryKeys; - existingTableUniqueConstraints = uniqueConstraints; - } else { - nParentTable = getMTable(catName, pkTableDB, pkTableName, true); - parentTable = nParentTable.mtbl; - if (parentTable == null) { - throw new InvalidObjectException("Parent table not found: " + pkTableName); - } - parentCD = nParentTable.mcd; - parentCols = parentCD == null || parentCD.getCols() == null ? - new ArrayList<>() : new ArrayList<>(parentCD.getCols()); - if (parentTable.getPartitionKeys() != null) { - parentCols.addAll(parentTable.getPartitionKeys()); - } - PrimaryKeysRequest primaryKeysRequest = new PrimaryKeysRequest(pkTableDB, pkTableName); - primaryKeysRequest.setCatName(catName); - existingTablePrimaryKeys = getPrimaryKeys(primaryKeysRequest); - existingTableUniqueConstraints = - getUniqueConstraints(new UniqueConstraintsRequest(catName, pkTableDB, pkTableName)); - } - - // Here we build an aux structure that is used to verify that the foreign key that is declared - // is actually referencing a valid primary key or unique key. We also check that the types of - // the columns correspond. - if (existingTablePrimaryKeys.isEmpty() && existingTableUniqueConstraints.isEmpty()) { - throw new MetaException( - "Trying to define foreign key but there are no primary keys or unique keys for referenced table"); - } - final Set validPKsOrUnique = generateValidPKsOrUniqueSignatures(parentCols, - existingTablePrimaryKeys, existingTableUniqueConstraints); - - StringBuilder fkSignature = new StringBuilder(); - StringBuilder referencedKSignature = new StringBuilder(); - for (; i < foreignKeys.size(); i++) { - SQLForeignKey foreignKey = foreignKeys.get(i); - final String fkColumnName = normalizeIdentifier(foreignKey.getFkcolumn_name()); - int childIntegerIndex = getColumnIndexFromTableColumns(childCD.getCols(), fkColumnName); - if (childIntegerIndex == -1) { - if (childTable.getPartitionKeys() != null) { - childCD = null; - childIntegerIndex = getColumnIndexFromTableColumns(childTable.getPartitionKeys(), fkColumnName); - } - if (childIntegerIndex == -1) { - throw new InvalidObjectException("Child column not found: " + fkColumnName); - } - } - - final String pkColumnName = normalizeIdentifier(foreignKey.getPkcolumn_name()); - int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD.getCols(), pkColumnName); - if (parentIntegerIndex == -1) { - if (parentTable.getPartitionKeys() != null) { - parentCD = null; - parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), pkColumnName); - } - if (parentIntegerIndex == -1) { - throw new InvalidObjectException("Parent column not found: " + pkColumnName); - } - } - - if (foreignKey.getFk_name() == null) { - // When there is no explicit foreign key name associated with the constraint and the key is composite, - // we expect the foreign keys to be send in order in the input list. - // Otherwise, the below code will break. - // If this is the first column of the FK constraint, generate the foreign key name - // NB: The below code can result in race condition where duplicate names can be generated (in theory). - // However, this scenario can be ignored for practical purposes because of - // the uniqueness of the generated constraint name. - if (foreignKey.getKey_seq() == 1) { - currentConstraintName = generateConstraintName(parentTable, fkTableDB, fkTableName, pkTableDB, - pkTableName, pkColumnName, fkColumnName, "fk"); - } - } else { - currentConstraintName = normalizeIdentifier(foreignKey.getFk_name()); - if (constraintNameAlreadyExists(parentTable, currentConstraintName)) { - String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), - parentTable.getTableName(), currentConstraintName); - throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); - } - } - // Update Column, keys, table, database, catalog name - foreignKey.setFk_name(currentConstraintName); - foreignKey.setCatName(catName); - foreignKey.setFktable_db(fkTableDB); - foreignKey.setFktable_name(fkTableName); - foreignKey.setPktable_db(pkTableDB); - foreignKey.setPktable_name(pkTableName); - foreignKey.setFkcolumn_name(fkColumnName); - foreignKey.setPkcolumn_name(pkColumnName); - - Integer updateRule = foreignKey.getUpdate_rule(); - Integer deleteRule = foreignKey.getDelete_rule(); - int enableValidateRely = (foreignKey.isEnable_cstr() ? 4 : 0) + - (foreignKey.isValidate_cstr() ? 2 : 0) + (foreignKey.isRely_cstr() ? 1 : 0); - - MConstraint mpkfk = new MConstraint( - currentConstraintName, - foreignKey.getKey_seq(), - MConstraint.FOREIGN_KEY_CONSTRAINT, - deleteRule, - updateRule, - enableValidateRely, - parentTable, - childTable, - parentCD, - childCD, - childIntegerIndex, - parentIntegerIndex - ); - mpkfks.add(mpkfk); - - final String fkColType = getColumnFromTableColumns(childCols, fkColumnName).getType(); - fkSignature.append( - generateColNameTypeSignature(fkColumnName, fkColType)); - referencedKSignature.append( - generateColNameTypeSignature(pkColumnName, fkColType)); - - if (i + 1 < foreignKeys.size() && foreignKeys.get(i + 1).getKey_seq() == 1) { - // Next one is a new key, we bail out from the inner loop - break; - } - } - String referenced = referencedKSignature.toString(); - if (!validPKsOrUnique.contains(referenced)) { - throw new MetaException( - "Foreign key references " + referenced + " but no corresponding " - + "primary key or unique key exists. Possible keys: " + validPKsOrUnique); - } - if (sameTable && fkSignature.toString().equals(referenced)) { - throw new MetaException( - "Cannot be both foreign key and primary/unique key on same table: " + referenced); - } - fkSignature = new StringBuilder(); - referencedKSignature = new StringBuilder(); - } - pm.makePersistentAll(mpkfks); - - } - return foreignKeys; - } - - private static Set generateValidPKsOrUniqueSignatures(List tableCols, - List refTablePrimaryKeys, List refTableUniqueConstraints) { - final Set validPKsOrUnique = new HashSet<>(); - if (!refTablePrimaryKeys.isEmpty()) { - refTablePrimaryKeys.sort((o1, o2) -> { - int keyNameComp = o1.getPk_name().compareTo(o2.getPk_name()); - if (keyNameComp == 0) { - return Integer.compare(o1.getKey_seq(), o2.getKey_seq()); - } - return keyNameComp; - }); - StringBuilder pkSignature = new StringBuilder(); - for (SQLPrimaryKey pk : refTablePrimaryKeys) { - pkSignature.append( - generateColNameTypeSignature( - pk.getColumn_name(), getColumnFromTableColumns(tableCols, pk.getColumn_name()).getType())); - } - validPKsOrUnique.add(pkSignature.toString()); - } - if (!refTableUniqueConstraints.isEmpty()) { - refTableUniqueConstraints.sort((o1, o2) -> { - int keyNameComp = o1.getUk_name().compareTo(o2.getUk_name()); - if (keyNameComp == 0) { - return Integer.compare(o1.getKey_seq(), o2.getKey_seq()); - } - return keyNameComp; - }); - StringBuilder ukSignature = new StringBuilder(); - for (int j = 0; j < refTableUniqueConstraints.size(); j++) { - SQLUniqueConstraint uk = refTableUniqueConstraints.get(j); - ukSignature.append( - generateColNameTypeSignature( - uk.getColumn_name(), getColumnFromTableColumns(tableCols, uk.getColumn_name()).getType())); - if (j + 1 < refTableUniqueConstraints.size()) { - if (!refTableUniqueConstraints.get(j + 1).getUk_name().equals( - refTableUniqueConstraints.get(j).getUk_name())) { - validPKsOrUnique.add(ukSignature.toString()); - ukSignature = new StringBuilder(); - } - } else { - validPKsOrUnique.add(ukSignature.toString()); - } - } - } - return validPKsOrUnique; - } - - private static String generateColNameTypeSignature(String colName, String colType) { - return colName + ":" + colType + ";"; - } - - @Override - public List addPrimaryKeys(List pks) throws InvalidObjectException, - MetaException { - return addPrimaryKeys(pks, true); - } - - private List addPrimaryKeys(List pks, boolean retrieveCD) throws InvalidObjectException, - MetaException { - List mpks = new ArrayList<>(); - String constraintName = null; - - for (SQLPrimaryKey pk : pks) { - final String catName = normalizeIdentifier(pk.getCatName()); - final String tableDB = normalizeIdentifier(pk.getTable_db()); - final String tableName = normalizeIdentifier(pk.getTable_name()); - final String columnName = normalizeIdentifier(pk.getColumn_name()); - - // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. - // For instance, this is the case when we are creating the table. - AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); - MTable parentTable = nParentTable.mtbl; - if (parentTable == null) { - throw new InvalidObjectException("Parent table not found: " + tableName); - } - - MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); - int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); - if (parentIntegerIndex == -1) { - if (parentTable.getPartitionKeys() != null) { - parentCD = null; - parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); - } - if (parentIntegerIndex == -1) { - throw new InvalidObjectException("Parent column not found: " + columnName); - } - } - if (getPrimaryKeyConstraintName(parentTable.getDatabase().getCatalogName(), - parentTable.getDatabase().getName(), parentTable.getTableName()) != null) { - throw new MetaException(" Primary key already exists for: " + - TableName.getQualified(catName, tableDB, tableName)); - } - if (pk.getPk_name() == null) { - if (pk.getKey_seq() == 1) { - constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "pk"); - } - } else { - constraintName = normalizeIdentifier(pk.getPk_name()); - if (constraintNameAlreadyExists(parentTable, constraintName)) { - String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), - parentTable.getTableName(), constraintName); - throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); - } - } - - int enableValidateRely = (pk.isEnable_cstr() ? 4 : 0) + - (pk.isValidate_cstr() ? 2 : 0) + (pk.isRely_cstr() ? 1 : 0); - MConstraint mpk = new MConstraint( - constraintName, - pk.getKey_seq(), - MConstraint.PRIMARY_KEY_CONSTRAINT, - null, - null, - enableValidateRely, - parentTable, - null, - parentCD, - null, - null, - parentIntegerIndex); - mpks.add(mpk); - - // Add normalized identifier back to result - pk.setCatName(catName); - pk.setTable_db(tableDB); - pk.setTable_name(tableName); - pk.setColumn_name(columnName); - pk.setPk_name(constraintName); - } - pm.makePersistentAll(mpks); - return pks; - } - - @Override - public List addUniqueConstraints(List uks) - throws InvalidObjectException, MetaException { - return addUniqueConstraints(uks, true); - } - - private List addUniqueConstraints(List uks, boolean retrieveCD) - throws InvalidObjectException, MetaException { - - List cstrs = new ArrayList<>(); - String constraintName = null; - - for (SQLUniqueConstraint uk : uks) { - final String catName = normalizeIdentifier(uk.getCatName()); - final String tableDB = normalizeIdentifier(uk.getTable_db()); - final String tableName = normalizeIdentifier(uk.getTable_name()); - final String columnName = normalizeIdentifier(uk.getColumn_name()); - - // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. - // For instance, this is the case when we are creating the table. - AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); - MTable parentTable = nParentTable.mtbl; - if (parentTable == null) { - throw new InvalidObjectException("Parent table not found: " + tableName); - } - - MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); - int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); - if (parentIntegerIndex == -1) { - if (parentTable.getPartitionKeys() != null) { - parentCD = null; - parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); - } - if (parentIntegerIndex == -1) { - throw new InvalidObjectException("Parent column not found: " + columnName); - } - } - if (uk.getUk_name() == null) { - if (uk.getKey_seq() == 1) { - constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "uk"); - } - } else { - constraintName = normalizeIdentifier(uk.getUk_name()); - if (constraintNameAlreadyExists(parentTable, constraintName)) { - String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), - parentTable.getTableName(), constraintName); - throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); - } - } - - - int enableValidateRely = (uk.isEnable_cstr() ? 4 : 0) + - (uk.isValidate_cstr() ? 2 : 0) + (uk.isRely_cstr() ? 1 : 0); - MConstraint muk = new MConstraint( - constraintName, - uk.getKey_seq(), - MConstraint.UNIQUE_CONSTRAINT, - null, - null, - enableValidateRely, - parentTable, - null, - parentCD, - null, - null, - parentIntegerIndex); - cstrs.add(muk); - - // Add normalized identifier back to result - uk.setCatName(catName); - uk.setTable_db(tableDB); - uk.setTable_name(tableName); - uk.setColumn_name(columnName); - uk.setUk_name(constraintName); - - } - pm.makePersistentAll(cstrs); - return uks; - } - - @Override - public List addNotNullConstraints(List nns) - throws InvalidObjectException, MetaException { - return addNotNullConstraints(nns, true); - } - - @Override - public List addDefaultConstraints(List nns) - throws InvalidObjectException, MetaException { - return addDefaultConstraints(nns, true); - } - - @Override - public List addCheckConstraints(List nns) - throws InvalidObjectException, MetaException { - return addCheckConstraints(nns, true); - } - - private List addCheckConstraints(List ccs, boolean retrieveCD) - throws InvalidObjectException, MetaException { - List cstrs = new ArrayList<>(); - - for (SQLCheckConstraint cc: ccs) { - final String catName = normalizeIdentifier(cc.getCatName()); - final String tableDB = normalizeIdentifier(cc.getTable_db()); - final String tableName = normalizeIdentifier(cc.getTable_name()); - final String columnName = cc.getColumn_name() == null? null - : normalizeIdentifier(cc.getColumn_name()); - final String ccName = cc.getDc_name(); - boolean isEnable = cc.isEnable_cstr(); - boolean isValidate = cc.isValidate_cstr(); - boolean isRely = cc.isRely_cstr(); - String constraintValue = cc.getCheck_expression(); - MConstraint muk = addConstraint(catName, tableDB, tableName, columnName, ccName, isEnable, isRely, isValidate, - MConstraint.CHECK_CONSTRAINT, constraintValue, retrieveCD); - cstrs.add(muk); - - // Add normalized identifier back to result - cc.setCatName(catName); - cc.setTable_db(tableDB); - cc.setTable_name(tableName); - cc.setColumn_name(columnName); - cc.setDc_name(muk.getConstraintName()); - } - pm.makePersistentAll(cstrs); - return ccs; - } - - private MConstraint addConstraint(String catName, String tableDB, String tableName, String columnName, String ccName, - boolean isEnable, boolean isRely, boolean isValidate, int constraintType, - String constraintValue, boolean retrieveCD) - throws InvalidObjectException, MetaException { - String constraintName = null; - // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. - // For instance, this is the case when we are creating the table. - AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); - MTable parentTable = nParentTable.mtbl; - if (parentTable == null) { - throw new InvalidObjectException("Parent table not found: " + tableName); - } - - MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); - int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); - if (parentIntegerIndex == -1) { - if (parentTable.getPartitionKeys() != null) { - parentCD = null; - parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); - } - } - if (ccName == null) { - constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "dc"); - } else { - constraintName = normalizeIdentifier(ccName); - if (constraintNameAlreadyExists(parentTable, constraintName)) { - String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), - parentTable.getTableName(), constraintName); - throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); - } - } - - int enableValidateRely = (isEnable ? 4 : 0) + - (isValidate ? 2 : 0) + (isRely ? 1 : 0); - MConstraint muk = new MConstraint( - constraintName, - 1, - constraintType, // Not null constraint should reference a single column - null, - null, - enableValidateRely, - parentTable, - null, - parentCD, - null, - null, - parentIntegerIndex, - constraintValue); - - return muk; - } - - private List addDefaultConstraints(List dcs, boolean retrieveCD) - throws InvalidObjectException, MetaException { - - List cstrs = new ArrayList<>(); - for (SQLDefaultConstraint dc : dcs) { - final String catName = normalizeIdentifier(dc.getCatName()); - final String tableDB = normalizeIdentifier(dc.getTable_db()); - final String tableName = normalizeIdentifier(dc.getTable_name()); - final String columnName = normalizeIdentifier(dc.getColumn_name()); - final String dcName = dc.getDc_name(); - boolean isEnable = dc.isEnable_cstr(); - boolean isValidate = dc.isValidate_cstr(); - boolean isRely = dc.isRely_cstr(); - String constraintValue = dc.getDefault_value(); - MConstraint muk = addConstraint(catName, tableDB, tableName, columnName, dcName, isEnable, isRely, isValidate, - MConstraint.DEFAULT_CONSTRAINT, constraintValue, retrieveCD); - cstrs.add(muk); - - // Add normalized identifier back to result - dc.setCatName(catName); - dc.setTable_db(tableDB); - dc.setTable_name(tableName); - dc.setColumn_name(columnName); - dc.setDc_name(muk.getConstraintName()); - } - pm.makePersistentAll(cstrs); - return dcs; - } - - private List addNotNullConstraints(List nns, boolean retrieveCD) - throws InvalidObjectException, MetaException { - - List cstrs = new ArrayList<>(); - String constraintName; - - for (SQLNotNullConstraint nn : nns) { - final String catName = normalizeIdentifier(nn.getCatName()); - final String tableDB = normalizeIdentifier(nn.getTable_db()); - final String tableName = normalizeIdentifier(nn.getTable_name()); - final String columnName = normalizeIdentifier(nn.getColumn_name()); - - // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. - // For instance, this is the case when we are creating the table. - AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); - MTable parentTable = nParentTable.mtbl; - if (parentTable == null) { - throw new InvalidObjectException("Parent table not found: " + tableName); - } - - MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); - int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); - if (parentIntegerIndex == -1) { - if (parentTable.getPartitionKeys() != null) { - parentCD = null; - parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); - } - if (parentIntegerIndex == -1) { - throw new InvalidObjectException("Parent column not found: " + columnName); - } - } - if (nn.getNn_name() == null) { - constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "nn"); - } else { - constraintName = normalizeIdentifier(nn.getNn_name()); - if (constraintNameAlreadyExists(parentTable, constraintName)) { - String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), - parentTable.getTableName(), constraintName); - throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); - } - } - - int enableValidateRely = (nn.isEnable_cstr() ? 4 : 0) + - (nn.isValidate_cstr() ? 2 : 0) + (nn.isRely_cstr() ? 1 : 0); - MConstraint muk = new MConstraint( - constraintName, - 1, - MConstraint.NOT_NULL_CONSTRAINT, // Not null constraint should reference a single column - null, - null, - enableValidateRely, - parentTable, - null, - parentCD, - null, - null, - parentIntegerIndex); - cstrs.add(muk); - // Add normalized identifier back to result - nn.setCatName(catName); - nn.setTable_db(tableDB); - nn.setTable_name(tableName); - nn.setColumn_name(columnName); - nn.setNn_name(constraintName); - } - pm.makePersistentAll(cstrs); - return nns; - } - - @Override - public boolean addRole(String roleName, String ownerName) - throws InvalidObjectException, MetaException, NoSuchObjectException { - boolean success = false; - boolean commited = false; - try { - openTransaction(); - MRole nameCheck = this.getMRole(roleName); - if (nameCheck != null) { - throw new InvalidObjectException("Role " + roleName + " already exists."); - } - int now = (int) (System.currentTimeMillis() / 1000); - MRole mRole = new MRole(roleName, now, ownerName); - pm.makePersistent(mRole); - commited = commitTransaction(); - success = true; - } finally { - rollbackAndCleanup(commited, null); - } - return success; - } - - @Override - public boolean grantRole(Role role, String userName, - PrincipalType principalType, String grantor, PrincipalType grantorType, - boolean grantOption) throws MetaException, NoSuchObjectException,InvalidObjectException { - boolean success = false; - boolean commited = false; - try { - openTransaction(); - MRoleMap roleMap = null; - try { - roleMap = this.getMSecurityUserRoleMap(userName, principalType, role - .getRoleName()); - } catch (Exception e) { - } - if (roleMap != null) { - throw new InvalidObjectException("Principal " + userName - + " already has the role " + role.getRoleName()); - } - if (principalType == PrincipalType.ROLE) { - validateRole(userName); - } - MRole mRole = getMRole(role.getRoleName()); - long now = System.currentTimeMillis()/1000; - MRoleMap roleMember = new MRoleMap(userName, principalType.toString(), - mRole, (int) now, grantor, grantorType.toString(), grantOption); - pm.makePersistent(roleMember); - commited = commitTransaction(); - success = true; - } finally { - rollbackAndCleanup(commited, null); - } - return success; - } - - /** - * Verify that role with given name exists, if not throw exception - */ - private void validateRole(String roleName) throws NoSuchObjectException { - // if grantee is a role, check if it exists - MRole granteeRole = getMRole(roleName); - if (granteeRole == null) { - throw new NoSuchObjectException("Role " + roleName + " does not exist"); - } - } - - @Override - public boolean revokeRole(Role role, String userName, PrincipalType principalType, - boolean grantOption) throws MetaException, NoSuchObjectException { - boolean success = false; - try { - openTransaction(); - MRoleMap roleMember = getMSecurityUserRoleMap(userName, principalType, - role.getRoleName()); - if (grantOption) { - // Revoke with grant option - only remove the grant option but keep the role. - if (roleMember.getGrantOption()) { - roleMember.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with role " + role.getRoleName()); - } - } else { - // No grant option in revoke, remove the whole role. - pm.deletePersistent(roleMember); - } - success = commitTransaction(); - } finally { - rollbackAndCleanup(success, null); - } - return success; - } - - private MRoleMap getMSecurityUserRoleMap(String userName, PrincipalType principalType, - String roleName) { - MRoleMap mRoleMember = null; - boolean commited = false; - Query query = null; - try { - openTransaction(); - query = - pm.newQuery(MRoleMap.class, - "principalName == t1 && principalType == t2 && role.roleName == t3"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - query.setUnique(true); - mRoleMember = (MRoleMap) query.executeWithArray(userName, principalType.toString(), roleName); - pm.retrieve(mRoleMember); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); - } - return mRoleMember; - } - - @Override - public boolean removeRole(String roleName) throws MetaException, - NoSuchObjectException { - boolean success = false; - try { - openTransaction(); - MRole mRol = getMRole(roleName); - pm.retrieve(mRol); - if (mRol != null) { - // first remove all the membership, the membership that this role has - // been granted - List roleMap = listMRoleMembers(mRol.getRoleName()); - if (CollectionUtils.isNotEmpty(roleMap)) { - pm.deletePersistentAll(roleMap); - } - List roleMember = listMSecurityPrincipalMembershipRole(mRol - .getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(roleMember)) { - pm.deletePersistentAll(roleMember); - } - - // then remove all the grants - List userGrants = listPrincipalMGlobalGrants( - mRol.getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(userGrants)) { - pm.deletePersistentAll(userGrants); - } - - List dbGrants = listPrincipalAllDBGrant(mRol - .getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(dbGrants)) { - pm.deletePersistentAll(dbGrants); - } - - List dcGrants = listPrincipalAllDCGrant(mRol - .getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(dcGrants)) { - pm.deletePersistentAll(dcGrants); - } - - List tabPartGrants = listPrincipalAllTableGrants( - mRol.getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(tabPartGrants)) { - pm.deletePersistentAll(tabPartGrants); - } - - List partGrants = listPrincipalAllPartitionGrants( - mRol.getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(partGrants)) { - pm.deletePersistentAll(partGrants); - } - - List tblColumnGrants = listPrincipalAllTableColumnGrants( - mRol.getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(tblColumnGrants)) { - pm.deletePersistentAll(tblColumnGrants); - } - - List partColumnGrants = listPrincipalAllPartitionColumnGrants( - mRol.getRoleName(), PrincipalType.ROLE); - if (CollectionUtils.isNotEmpty(partColumnGrants)) { - pm.deletePersistentAll(partColumnGrants); - } - - // finally remove the role - pm.deletePersistent(mRol); - } - success = commitTransaction(); - } catch (Exception e) { - throw new MetaException(e.getMessage()); - } finally { - rollbackAndCleanup(success, null); - } - return success; - } - - /** - * Get all the roles in the role hierarchy that this user and groupNames belongs to - */ - private Set listAllRolesInHierarchy(String userName, - List groupNames) { - List ret = new ArrayList<>(); - if(userName != null) { - ret.addAll(listMRoles(userName, PrincipalType.USER)); - } - if (groupNames != null) { - for (String groupName: groupNames) { - ret.addAll(listMRoles(groupName, PrincipalType.GROUP)); - } - } - // get names of these roles and its ancestors - Set roleNames = new HashSet<>(); - getAllRoleAncestors(roleNames, ret); - return roleNames; - } - - /** - * Add role names of parentRoles and its parents to processedRoles - */ - private void getAllRoleAncestors(Set processedRoleNames, List parentRoles) { - for (MRoleMap parentRole : parentRoles) { - String parentRoleName = parentRole.getRole().getRoleName(); - if (!processedRoleNames.contains(parentRoleName)) { - // unprocessed role: get its parents, add it to processed, and call this - // function recursively - List nextParentRoles = listMRoles(parentRoleName, PrincipalType.ROLE); - processedRoleNames.add(parentRoleName); - getAllRoleAncestors(processedRoleNames, nextParentRoles); - } - } - } - - public List listMRoles(String principalName, - PrincipalType principalType) { - boolean success = false; - Query query = null; - List mRoleMember = new ArrayList<>(); - - try { - LOG.debug("Executing listRoles"); - - openTransaction(); - query = pm.newQuery(MRoleMap.class, "principalName == t1 && principalType == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - query.setUnique(false); - List mRoles = - (List) query.executeWithArray(principalName, principalType.toString()); - pm.retrieveAll(mRoles); - success = commitTransaction(); - - mRoleMember.addAll(mRoles); - - LOG.debug("Done retrieving all objects for listRoles"); - } finally { - rollbackAndCleanup(success, query); - } - - if (principalType == PrincipalType.USER) { - // All users belong to public role implicitly, add that role - // TODO MS-SPLIT Change this back to HMSHandler.PUBLIC once HiveMetaStore has moved to - // stand-alone metastore. - //MRole publicRole = new MRole(HMSHandler.PUBLIC, 0, HMSHandler.PUBLIC); - MRole publicRole = new MRole("public", 0, "public"); - mRoleMember.add(new MRoleMap(principalName, principalType.toString(), publicRole, 0, null, - null, false)); - } - - return mRoleMember; - } - - @Override - public List listRoles(String principalName, PrincipalType principalType) { - List result = new ArrayList<>(); - List roleMaps = listMRoles(principalName, principalType); - if (roleMaps != null) { - for (MRoleMap roleMap : roleMaps) { - MRole mrole = roleMap.getRole(); - Role role = new Role(mrole.getRoleName(), mrole.getCreateTime(), mrole.getOwnerName()); - result.add(role); - } - } - return result; - } - - @Override - public List listRolesWithGrants(String principalName, - PrincipalType principalType) { - List result = new ArrayList<>(); - List roleMaps = listMRoles(principalName, principalType); - if (roleMaps != null) { - for (MRoleMap roleMap : roleMaps) { - RolePrincipalGrant rolePrinGrant = new RolePrincipalGrant( - roleMap.getRole().getRoleName(), - roleMap.getPrincipalName(), - PrincipalType.valueOf(roleMap.getPrincipalType()), - roleMap.getGrantOption(), - roleMap.getAddTime(), - roleMap.getGrantor(), - // no grantor type for public role, hence the null check - roleMap.getGrantorType() == null ? null - : PrincipalType.valueOf(roleMap.getGrantorType()) - ); - result.add(rolePrinGrant); - } - } - return result; - } - - private List listMSecurityPrincipalMembershipRole(final String roleName, - final PrincipalType principalType) throws Exception { - LOG.debug("Executing listMSecurityPrincipalMembershipRole"); - - Preconditions.checkState(this.currentTransaction.isActive()); - - try (Query query = pm.newQuery(MRoleMap.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - final List mRoleMemebership = (List) query.execute(roleName, principalType.toString()); - - LOG.debug("Retrieving all objects for listMSecurityPrincipalMembershipRole"); - pm.retrieveAll(mRoleMemebership); - LOG.debug("Done retrieving all objects for listMSecurityPrincipalMembershipRole: {}", mRoleMemebership); - - return Collections.unmodifiableList(new ArrayList<>(mRoleMemebership)); - } - } - - @Override - public Role getRole(String roleName) throws NoSuchObjectException { - MRole mRole = this.getMRole(roleName); - if (mRole == null) { - throw new NoSuchObjectException(roleName + " role can not be found."); - } - return new Role(mRole.getRoleName(), mRole.getCreateTime(), mRole - .getOwnerName()); - } - - private MRole getMRole(String roleName) { - MRole mrole = null; - boolean commited = false; - Query query = null; - try { - openTransaction(); - query = pm.newQuery(MRole.class, "roleName == t1"); - query.declareParameters("java.lang.String t1"); - query.setUnique(true); - mrole = (MRole) query.execute(roleName); - pm.retrieve(mrole); - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, query); - } - return mrole; - } - - @Override - public List listRoleNames() { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listAllRoleNames"); - query = pm.newQuery("select roleName from org.apache.hadoop.hive.metastore.model.MRole"); - query.setResult("roleName"); - Collection names = (Collection) query.execute(); - List roleNames = new ArrayList<>(); - for (Iterator i = names.iterator(); i.hasNext();) { - roleNames.add((String) i.next()); - } - success = commitTransaction(); - return roleNames; - } finally { - rollbackAndCleanup(success, query); - } - } - - @Override - public PrincipalPrivilegeSet getUserPrivilegeSet(String userName, - List groupNames) throws InvalidObjectException, MetaException { - boolean commited = false; - PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - try { - openTransaction(); - if (userName != null) { - List user = this.listPrincipalMGlobalGrants(userName, PrincipalType.USER); - if(CollectionUtils.isNotEmpty(user)) { - Map> userPriv = new HashMap<>(); - List grantInfos = new ArrayList<>(user.size()); - for (int i = 0; i < user.size(); i++) { - MGlobalPrivilege item = user.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - userPriv.put(userName, grantInfos); - ret.setUserPrivileges(userPriv); - } - } - if (CollectionUtils.isNotEmpty(groupNames)) { - Map> groupPriv = new HashMap<>(); - for(String groupName: groupNames) { - List group = - this.listPrincipalMGlobalGrants(groupName, PrincipalType.GROUP); - if(CollectionUtils.isNotEmpty(group)) { - List grantInfos = new ArrayList<>(group.size()); - for (int i = 0; i < group.size(); i++) { - MGlobalPrivilege item = group.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - groupPriv.put(groupName, grantInfos); - } - } - ret.setGroupPrivileges(groupPriv); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - return ret; - } - - private List getDBPrivilege(String catName, String dbName, - String principalName, PrincipalType principalType) { - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - - if (principalName != null) { - List userNameDbPriv = this.listPrincipalMDBGrants( - principalName, principalType, catName, dbName); - if (CollectionUtils.isNotEmpty(userNameDbPriv)) { - List grantInfos = new ArrayList<>( - userNameDbPriv.size()); - for (int i = 0; i < userNameDbPriv.size(); i++) { - MDBPrivilege item = userNameDbPriv.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - return grantInfos; - } - } - return Collections.emptyList(); - } - - - @Override - public PrincipalPrivilegeSet getDBPrivilegeSet(String catName, String dbName, - String userName, List groupNames) throws InvalidObjectException, - MetaException { - boolean commited = false; - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - - PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - try { - openTransaction(); - if (userName != null) { - Map> dbUserPriv = new HashMap<>(); - dbUserPriv.put(userName, getDBPrivilege(catName, dbName, userName, - PrincipalType.USER)); - ret.setUserPrivileges(dbUserPriv); - } - if (CollectionUtils.isNotEmpty(groupNames)) { - Map> dbGroupPriv = new HashMap<>(); - for (String groupName : groupNames) { - dbGroupPriv.put(groupName, getDBPrivilege(catName, dbName, groupName, - PrincipalType.GROUP)); - } - ret.setGroupPrivileges(dbGroupPriv); - } - Set roleNames = listAllRolesInHierarchy(userName, groupNames); - if (CollectionUtils.isNotEmpty(roleNames)) { - Map> dbRolePriv = new HashMap<>(); - for (String roleName : roleNames) { - dbRolePriv - .put(roleName, getDBPrivilege(catName, dbName, roleName, PrincipalType.ROLE)); - } - ret.setRolePrivileges(dbRolePriv); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - return ret; - } - - private List getConnectorPrivilege(String catName, String connectorName, - String principalName, PrincipalType principalType) { - - // normalize string name - catName = normalizeIdentifier(catName); - connectorName = normalizeIdentifier(connectorName); - - if (principalName != null) { - // get all data connector granted privilege - List userNameDcPriv = this.listPrincipalMDCGrants( - principalName, principalType, catName, connectorName); - - // populate and return grantInfos - if (CollectionUtils.isNotEmpty(userNameDcPriv)) { - List grantInfos = new ArrayList<>( - userNameDcPriv.size()); - for (int i = 0; i < userNameDcPriv.size(); i++) { - MDCPrivilege item = userNameDcPriv.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - return grantInfos; - } - } - - // return empty list if no principalName - return Collections.emptyList(); - } - - @Override - public PrincipalPrivilegeSet getConnectorPrivilegeSet (String catName, String connectorName, - String userName, List groupNames) throws InvalidObjectException, - MetaException { - - boolean commited = false; - catName = normalizeIdentifier(catName); - connectorName = normalizeIdentifier(connectorName); - - PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - try { - openTransaction(); - - // get user privileges - if (userName != null) { - Map> connectorUserPriv = new HashMap<>(); - connectorUserPriv.put(userName, getConnectorPrivilege(catName, connectorName, userName, - PrincipalType.USER)); - ret.setUserPrivileges(connectorUserPriv); - } - - // get group privileges - if (CollectionUtils.isNotEmpty(groupNames)) { - Map> dbGroupPriv = new HashMap<>(); - for (String groupName : groupNames) { - dbGroupPriv.put(groupName, getConnectorPrivilege(catName, connectorName, groupName, - PrincipalType.GROUP)); - } - ret.setGroupPrivileges(dbGroupPriv); - } - - // get role privileges - Set roleNames = listAllRolesInHierarchy(userName, groupNames); - if (CollectionUtils.isNotEmpty(roleNames)) { - Map> dbRolePriv = new HashMap<>(); - for (String roleName : roleNames) { - dbRolePriv.put(roleName, getConnectorPrivilege(catName, connectorName, roleName, - PrincipalType.ROLE)); - } - ret.setRolePrivileges(dbRolePriv); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - return ret; - - - } - - @Override - public PrincipalPrivilegeSet getPartitionPrivilegeSet(String catName, String dbName, - String tableName, String partition, String userName, - List groupNames) throws InvalidObjectException, MetaException { - boolean commited = false; - PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - - try { - openTransaction(); - if (userName != null) { - Map> partUserPriv = new HashMap<>(); - partUserPriv.put(userName, getPartitionPrivilege(catName, dbName, - tableName, partition, userName, PrincipalType.USER)); - ret.setUserPrivileges(partUserPriv); - } - if (CollectionUtils.isNotEmpty(groupNames)) { - Map> partGroupPriv = new HashMap<>(); - for (String groupName : groupNames) { - partGroupPriv.put(groupName, getPartitionPrivilege(catName, dbName, tableName, - partition, groupName, PrincipalType.GROUP)); - } - ret.setGroupPrivileges(partGroupPriv); - } - Set roleNames = listAllRolesInHierarchy(userName, groupNames); - if (CollectionUtils.isNotEmpty(roleNames)) { - Map> partRolePriv = new HashMap<>(); - for (String roleName : roleNames) { - partRolePriv.put(roleName, getPartitionPrivilege(catName, dbName, tableName, - partition, roleName, PrincipalType.ROLE)); - } - ret.setRolePrivileges(partRolePriv); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - return ret; - } - - @Override - public PrincipalPrivilegeSet getTablePrivilegeSet(String catName, String dbName, - String tableName, String userName, List groupNames) - throws InvalidObjectException, MetaException { - boolean commited = false; - PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - tableName = normalizeIdentifier(tableName); - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - - try { - openTransaction(); - if (userName != null) { - Map> tableUserPriv = new HashMap<>(); - tableUserPriv.put(userName, getTablePrivilege(catName, dbName, - tableName, userName, PrincipalType.USER)); - ret.setUserPrivileges(tableUserPriv); - } - if (CollectionUtils.isNotEmpty(groupNames)) { - Map> tableGroupPriv = new HashMap<>(); - for (String groupName : groupNames) { - tableGroupPriv.put(groupName, getTablePrivilege(catName, dbName, tableName, - groupName, PrincipalType.GROUP)); - } - ret.setGroupPrivileges(tableGroupPriv); - } - Set roleNames = listAllRolesInHierarchy(userName, groupNames); - if (CollectionUtils.isNotEmpty(roleNames)) { - Map> tableRolePriv = new HashMap<>(); - for (String roleName : roleNames) { - tableRolePriv.put(roleName, getTablePrivilege(catName, dbName, tableName, - roleName, PrincipalType.ROLE)); - } - ret.setRolePrivileges(tableRolePriv); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - return ret; - } - - @Override - public PrincipalPrivilegeSet getColumnPrivilegeSet(String catName, String dbName, - String tableName, String partitionName, String columnName, - String userName, List groupNames) throws InvalidObjectException, - MetaException { - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - columnName = normalizeIdentifier(columnName); - catName = normalizeIdentifier(catName); - - boolean commited = false; - PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - try { - openTransaction(); - if (userName != null) { - Map> columnUserPriv = new HashMap<>(); - columnUserPriv.put(userName, getColumnPrivilege(catName, dbName, tableName, - columnName, partitionName, userName, PrincipalType.USER)); - ret.setUserPrivileges(columnUserPriv); - } - if (CollectionUtils.isNotEmpty(groupNames)) { - Map> columnGroupPriv = new HashMap<>(); - for (String groupName : groupNames) { - columnGroupPriv.put(groupName, getColumnPrivilege(catName, dbName, tableName, - columnName, partitionName, groupName, PrincipalType.GROUP)); - } - ret.setGroupPrivileges(columnGroupPriv); - } - Set roleNames = listAllRolesInHierarchy(userName, groupNames); - if (CollectionUtils.isNotEmpty(roleNames)) { - Map> columnRolePriv = new HashMap<>(); - for (String roleName : roleNames) { - columnRolePriv.put(roleName, getColumnPrivilege(catName, dbName, tableName, - columnName, partitionName, roleName, PrincipalType.ROLE)); - } - ret.setRolePrivileges(columnRolePriv); - } - commited = commitTransaction(); - } finally { - rollbackAndCleanup(commited, null); - } - return ret; - } - - private List getPartitionPrivilege(String catName, String dbName, - String tableName, String partName, String principalName, - PrincipalType principalType) { - - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - - if (principalName != null) { - List userNameTabPartPriv = this - .listPrincipalMPartitionGrants(principalName, principalType, - catName, dbName, tableName, partName); - if (CollectionUtils.isNotEmpty(userNameTabPartPriv)) { - List grantInfos = new ArrayList<>( - userNameTabPartPriv.size()); - for (int i = 0; i < userNameTabPartPriv.size(); i++) { - MPartitionPrivilege item = userNameTabPartPriv.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), - getPrincipalTypeFromStr(item.getGrantorType()), item.getGrantOption())); - - } - return grantInfos; - } - } - return new ArrayList<>(0); - } - - private PrincipalType getPrincipalTypeFromStr(String str) { - return str == null ? null : PrincipalType.valueOf(str); - } - - private List getTablePrivilege(String catName, String dbName, - String tableName, String principalName, PrincipalType principalType) { - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - - if (principalName != null) { - List userNameTabPartPriv = this - .listAllMTableGrants(principalName, principalType, - catName, dbName, tableName); - if (CollectionUtils.isNotEmpty(userNameTabPartPriv)) { - List grantInfos = new ArrayList<>( - userNameTabPartPriv.size()); - for (int i = 0; i < userNameTabPartPriv.size(); i++) { - MTablePrivilege item = userNameTabPartPriv.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - return grantInfos; - } - } - return Collections.emptyList(); - } - - private List getColumnPrivilege(String catName, String dbName, - String tableName, String columnName, String partitionName, - String principalName, PrincipalType principalType) { - - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - columnName = normalizeIdentifier(columnName); - catName = normalizeIdentifier(catName); - - if (partitionName == null) { - List userNameColumnPriv = this - .listPrincipalMTableColumnGrants(principalName, principalType, - catName, dbName, tableName, columnName); - if (CollectionUtils.isNotEmpty(userNameColumnPriv)) { - List grantInfos = new ArrayList<>( - userNameColumnPriv.size()); - for (int i = 0; i < userNameColumnPriv.size(); i++) { - MTableColumnPrivilege item = userNameColumnPriv.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - return grantInfos; - } - } else { - List userNameColumnPriv = this - .listPrincipalMPartitionColumnGrants(principalName, - principalType, catName, dbName, tableName, partitionName, columnName); - if (CollectionUtils.isNotEmpty(userNameColumnPriv)) { - List grantInfos = new ArrayList<>( - userNameColumnPriv.size()); - for (int i = 0; i < userNameColumnPriv.size(); i++) { - MPartitionColumnPrivilege item = userNameColumnPriv.get(i); - grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item - .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item - .getGrantorType()), item.getGrantOption())); - } - return grantInfos; - } - } - return Collections.emptyList(); - } - - @Override - public boolean grantPrivileges(PrivilegeBag privileges) throws InvalidObjectException, - MetaException, NoSuchObjectException { - boolean committed = false; - int now = (int) (System.currentTimeMillis() / 1000); - try { - openTransaction(); - List persistentObjs = new ArrayList<>(); - - List privilegeList = privileges.getPrivileges(); - - if (CollectionUtils.isNotEmpty(privilegeList)) { - Iterator privIter = privilegeList.iterator(); - Set privSet = new HashSet<>(); - while (privIter.hasNext()) { - HiveObjectPrivilege privDef = privIter.next(); - HiveObjectRef hiveObject = privDef.getHiveObject(); - String privilegeStr = privDef.getGrantInfo().getPrivilege(); - String[] privs = privilegeStr.split(","); - String userName = privDef.getPrincipalName(); - String authorizer = privDef.getAuthorizer(); - PrincipalType principalType = privDef.getPrincipalType(); - String grantor = privDef.getGrantInfo().getGrantor(); - String grantorType = privDef.getGrantInfo().getGrantorType().toString(); - boolean grantOption = privDef.getGrantInfo().isGrantOption(); - privSet.clear(); - - if(principalType == PrincipalType.ROLE){ - validateRole(userName); - } - - String catName = hiveObject.isSetCatName() ? hiveObject.getCatName() : - getDefaultCatalog(conf); - if (hiveObject.getObjectType() == HiveObjectType.GLOBAL) { - List globalPrivs = this - .listPrincipalMGlobalGrants(userName, principalType, authorizer); - for (MGlobalPrivilege priv : globalPrivs) { - if (priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted by " + grantor); - } - MGlobalPrivilege mGlobalPrivs = new MGlobalPrivilege(userName, - principalType.toString(), privilege, now, grantor, grantorType, grantOption, - authorizer); - persistentObjs.add(mGlobalPrivs); - } - } else if (hiveObject.getObjectType() == HiveObjectType.DATABASE) { - MDatabase dbObj = getMDatabase(catName, hiveObject.getDbName()); - List dbPrivs = this.listPrincipalMDBGrants( - userName, principalType, catName, hiveObject.getDbName(), authorizer); - for (MDBPrivilege priv : dbPrivs) { - if (priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted on database " - + hiveObject.getDbName() + " by " + grantor); - } - MDBPrivilege mDb = new MDBPrivilege(userName, principalType - .toString(), dbObj, privilege, now, grantor, grantorType, grantOption, authorizer); - persistentObjs.add(mDb); - } - } else if (hiveObject.getObjectType() == HiveObjectType.DATACONNECTOR) { - MDataConnector dcObj = getMDataConnector(hiveObject.getObjectName()); - List dcPrivs = this.listPrincipalMDCGrants(userName, principalType, - hiveObject.getObjectName(), authorizer); - for (MDCPrivilege priv : dcPrivs) { - if (priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted on data connector " - + hiveObject.getDbName() + " by " + grantor); - } - MDCPrivilege mDc = new MDCPrivilege(userName, principalType - .toString(), dcObj, privilege, now, grantor, grantorType, grantOption, authorizer); - persistentObjs.add(mDc); - } - } else if (hiveObject.getObjectType() == HiveObjectType.TABLE) { - MTable tblObj = getMTable(catName, hiveObject.getDbName(), hiveObject - .getObjectName()); - if (tblObj != null) { - List tablePrivs = this - .listAllMTableGrants(userName, principalType, - catName, hiveObject.getDbName(), hiveObject.getObjectName(), authorizer); - for (MTablePrivilege priv : tablePrivs) { - if (priv.getGrantor() != null - && priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted on table [" - + hiveObject.getDbName() + "," - + hiveObject.getObjectName() + "] by " + grantor); - } - MTablePrivilege mTab = new MTablePrivilege( - userName, principalType.toString(), tblObj, - privilege, now, grantor, grantorType, grantOption, authorizer); - persistentObjs.add(mTab); - } - } - } else if (hiveObject.getObjectType() == HiveObjectType.PARTITION) { - MPartition partObj = this.getMPartition(catName, hiveObject.getDbName(), - hiveObject.getObjectName(), hiveObject.getPartValues(), null); - String partName = null; - if (partObj != null) { - partName = partObj.getPartitionName(); - List partPrivs = this - .listPrincipalMPartitionGrants(userName, - principalType, catName, hiveObject.getDbName(), hiveObject - .getObjectName(), partObj.getPartitionName(), authorizer); - for (MPartitionPrivilege priv : partPrivs) { - if (priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted on partition [" - + hiveObject.getDbName() + "," - + hiveObject.getObjectName() + "," - + partName + "] by " + grantor); - } - MPartitionPrivilege mTab = new MPartitionPrivilege(userName, - principalType.toString(), partObj, privilege, now, grantor, - grantorType, grantOption, authorizer); - persistentObjs.add(mTab); - } - } - } else if (hiveObject.getObjectType() == HiveObjectType.COLUMN) { - MTable tblObj = getMTable(catName, hiveObject.getDbName(), hiveObject - .getObjectName()); - if (tblObj != null) { - if (hiveObject.getPartValues() != null) { - MPartition partObj = null; - List colPrivs = null; - partObj = this.getMPartition(catName, hiveObject.getDbName(), hiveObject - .getObjectName(), hiveObject.getPartValues(), tblObj); - if (partObj == null) { - continue; - } - colPrivs = this.listPrincipalMPartitionColumnGrants( - userName, principalType, catName, hiveObject.getDbName(), hiveObject - .getObjectName(), partObj.getPartitionName(), - hiveObject.getColumnName(), authorizer); - - for (MPartitionColumnPrivilege priv : colPrivs) { - if (priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted on column " - + hiveObject.getColumnName() + " [" - + hiveObject.getDbName() + "," - + hiveObject.getObjectName() + "," - + partObj.getPartitionName() + "] by " + grantor); - } - MPartitionColumnPrivilege mCol = new MPartitionColumnPrivilege(userName, - principalType.toString(), partObj, hiveObject - .getColumnName(), privilege, now, grantor, grantorType, - grantOption, authorizer); - persistentObjs.add(mCol); - } - - } else { - List colPrivs = null; - colPrivs = this.listPrincipalMTableColumnGrants( - userName, principalType, catName, hiveObject.getDbName(), hiveObject - .getObjectName(), hiveObject.getColumnName(), authorizer); - - for (MTableColumnPrivilege priv : colPrivs) { - if (priv.getGrantor().equalsIgnoreCase(grantor)) { - privSet.add(priv.getPrivilege()); - } - } - for (String privilege : privs) { - if (privSet.contains(privilege)) { - throw new InvalidObjectException(privilege - + " is already granted on column " - + hiveObject.getColumnName() + " [" - + hiveObject.getDbName() + "," - + hiveObject.getObjectName() + "] by " + grantor); - } - MTableColumnPrivilege mCol = new MTableColumnPrivilege(userName, - principalType.toString(), tblObj, hiveObject - .getColumnName(), privilege, now, grantor, grantorType, - grantOption, authorizer); - persistentObjs.add(mCol); - } - } - } - } - } - } - if (CollectionUtils.isNotEmpty(persistentObjs)) { - pm.makePersistentAll(persistentObjs); - } - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, null); - } - return committed; - } - - @Override - public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) - throws InvalidObjectException, MetaException, NoSuchObjectException { - boolean committed = false; - try { - openTransaction(); - List persistentObjs = new ArrayList<>(); - - List privilegeList = privileges.getPrivileges(); - - - if (CollectionUtils.isNotEmpty(privilegeList)) { - Iterator privIter = privilegeList.iterator(); - - while (privIter.hasNext()) { - HiveObjectPrivilege privDef = privIter.next(); - HiveObjectRef hiveObject = privDef.getHiveObject(); - String privilegeStr = privDef.getGrantInfo().getPrivilege(); - if (privilegeStr == null || privilegeStr.trim().equals("")) { - continue; - } - String[] privs = privilegeStr.split(","); - String userName = privDef.getPrincipalName(); - PrincipalType principalType = privDef.getPrincipalType(); - - String catName = hiveObject.isSetCatName() ? hiveObject.getCatName() : - getDefaultCatalog(conf); - if (hiveObject.getObjectType() == HiveObjectType.GLOBAL) { - List mSecUser = this.listPrincipalMGlobalGrants( - userName, principalType); - boolean found = false; - for (String privilege : privs) { - for (MGlobalPrivilege userGrant : mSecUser) { - String userGrantPrivs = userGrant.getPrivilege(); - if (privilege.equals(userGrantPrivs)) { - found = true; - if (grantOption) { - if (userGrant.getGrantOption()) { - userGrant.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(userGrant); - break; - } - } - if (!found) { - throw new InvalidObjectException( - "No user grant found for privileges " + privilege); - } - } - - } else if (hiveObject.getObjectType() == HiveObjectType.DATABASE) { - MDatabase dbObj = getMDatabase(catName, hiveObject.getDbName()); - String db = hiveObject.getDbName(); - boolean found = false; - List dbGrants = this.listPrincipalMDBGrants( - userName, principalType, catName, db); - for (String privilege : privs) { - for (MDBPrivilege dbGrant : dbGrants) { - String dbGrantPriv = dbGrant.getPrivilege(); - if (privilege.equals(dbGrantPriv)) { - found = true; - if (grantOption) { - if (dbGrant.getGrantOption()) { - dbGrant.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(dbGrant); - break; - } - } - if (!found) { - throw new InvalidObjectException( - "No database grant found for privileges " + privilege - + " on database " + db); - } - } - } else if (hiveObject.getObjectType() == HiveObjectType.DATACONNECTOR) { - MDataConnector dCObj = getMDataConnector(hiveObject.getObjectName()); - String dc = hiveObject.getObjectName(); - boolean found = false; - List dcGrants = this.listPrincipalMDCGrants( - userName, principalType, catName, dc); - for (String privilege : privs) { - for (MDCPrivilege dcGrant : dcGrants) { - String dcGrantPriv = dcGrant.getPrivilege(); - if (privilege.equals(dcGrantPriv)) { - found = true; - if (grantOption) { - if (dcGrant.getGrantOption()) { - dcGrant.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(dcGrant); - break; - } - } - if (!found) { - throw new InvalidObjectException( - "No dataconnector grant found for privileges " + privilege - + " on data connector " + dc); - } - } - } else if (hiveObject.getObjectType() == HiveObjectType.TABLE) { - boolean found = false; - List tableGrants = this - .listAllMTableGrants(userName, principalType, - catName, hiveObject.getDbName(), hiveObject.getObjectName()); - for (String privilege : privs) { - for (MTablePrivilege tabGrant : tableGrants) { - String tableGrantPriv = tabGrant.getPrivilege(); - if (privilege.equalsIgnoreCase(tableGrantPriv)) { - found = true; - if (grantOption) { - if (tabGrant.getGrantOption()) { - tabGrant.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(tabGrant); - break; - } - } - if (!found) { - throw new InvalidObjectException("No grant (" + privilege - + ") found " + " on table " + hiveObject.getObjectName() - + ", database is " + hiveObject.getDbName()); - } - } - } else if (hiveObject.getObjectType() == HiveObjectType.PARTITION) { - - boolean found = false; - Table tabObj = - this.getTable(catName, hiveObject.getDbName(), - hiveObject.getObjectName(), null); - String partName = null; - if (hiveObject.getPartValues() != null) { - partName = Warehouse.makePartName(tabObj.getPartitionKeys(), hiveObject.getPartValues()); - } - List partitionGrants = this - .listPrincipalMPartitionGrants(userName, principalType, - catName, hiveObject.getDbName(), hiveObject.getObjectName(), partName); - for (String privilege : privs) { - for (MPartitionPrivilege partGrant : partitionGrants) { - String partPriv = partGrant.getPrivilege(); - if (partPriv.equalsIgnoreCase(privilege)) { - found = true; - if (grantOption) { - if (partGrant.getGrantOption()) { - partGrant.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(partGrant); - break; - } - } - if (!found) { - throw new InvalidObjectException("No grant (" + privilege - + ") found " + " on table " + tabObj.getTableName() - + ", partition is " + partName + ", database is " + tabObj.getDbName()); - } - } - } else if (hiveObject.getObjectType() == HiveObjectType.COLUMN) { - - Table tabObj = this.getTable(catName, hiveObject.getDbName(), hiveObject - .getObjectName(), null); - String partName = null; - if (hiveObject.getPartValues() != null) { - partName = Warehouse.makePartName(tabObj.getPartitionKeys(), - hiveObject.getPartValues()); - } - - if (partName != null) { - List mSecCol = listPrincipalMPartitionColumnGrants( - userName, principalType, catName, hiveObject.getDbName(), hiveObject - .getObjectName(), partName, hiveObject.getColumnName()); - boolean found = false; - for (String privilege : privs) { - for (MPartitionColumnPrivilege col : mSecCol) { - String colPriv = col.getPrivilege(); - if (colPriv.equalsIgnoreCase(privilege)) { - found = true; - if (grantOption) { - if (col.getGrantOption()) { - col.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(col); - break; - } - } - if (!found) { - throw new InvalidObjectException("No grant (" + privilege - + ") found " + " on table " + tabObj.getTableName() - + ", partition is " + partName + ", column name = " - + hiveObject.getColumnName() + ", database is " - + tabObj.getDbName()); - } - } - } else { - List mSecCol = listPrincipalMTableColumnGrants( - userName, principalType, catName, hiveObject.getDbName(), hiveObject - .getObjectName(), hiveObject.getColumnName()); - boolean found = false; - for (String privilege : privs) { - for (MTableColumnPrivilege col : mSecCol) { - String colPriv = col.getPrivilege(); - if (colPriv.equalsIgnoreCase(privilege)) { - found = true; - if (grantOption) { - if (col.getGrantOption()) { - col.setGrantOption(false); - } else { - throw new MetaException("User " + userName - + " does not have grant option with privilege " + privilege); - } - } - persistentObjs.add(col); - break; - } - } - if (!found) { - throw new InvalidObjectException("No grant (" + privilege - + ") found " + " on table " + tabObj.getTableName() - + ", column name = " - + hiveObject.getColumnName() + ", database is " - + tabObj.getDbName()); - } - } - } - - } - } - } - - if (CollectionUtils.isNotEmpty(persistentObjs)) { - if (grantOption) { - // If grant option specified, only update the privilege, don't remove it. - // Grant option has already been removed from the privileges in the section above - } else { - pm.deletePersistentAll(persistentObjs); - } - } - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, null); - } - return committed; - } - - class PrivilegeWithoutCreateTimeComparator implements Comparator { - @Override - public int compare(HiveObjectPrivilege o1, HiveObjectPrivilege o2) { - int createTime1 = o1.getGrantInfo().getCreateTime(); - int createTime2 = o2.getGrantInfo().getCreateTime(); - o1.getGrantInfo().setCreateTime(0); - o2.getGrantInfo().setCreateTime(0); - int result = o1.compareTo(o2); - o1.getGrantInfo().setCreateTime(createTime1); - o2.getGrantInfo().setCreateTime(createTime2); - return result; - } - } - - @Override - public boolean refreshPrivileges(HiveObjectRef objToRefresh, String authorizer, PrivilegeBag grantPrivileges) - throws InvalidObjectException, MetaException, NoSuchObjectException { - boolean committed = false; - try { - openTransaction(); - Set revokePrivilegeSet - = new TreeSet<>(new PrivilegeWithoutCreateTimeComparator()); - Set grantPrivilegeSet - = new TreeSet<>(new PrivilegeWithoutCreateTimeComparator()); - - List grants = null; - String catName = objToRefresh.isSetCatName() ? objToRefresh.getCatName() : - getDefaultCatalog(conf); - switch (objToRefresh.getObjectType()) { - case DATABASE: - try { - grants = this.listDBGrantsAll(catName, objToRefresh.getDbName(), authorizer); - } catch (Exception e) { - throw new MetaException(e.getMessage()); - } - break; - case DATACONNECTOR: - try { - grants = this.listDCGrantsAll(objToRefresh.getObjectName(), authorizer); - } catch (Exception e) { - throw new MetaException(e.getMessage()); - } - break; - case TABLE: - grants = listTableGrantsAll(catName, objToRefresh.getDbName(), objToRefresh.getObjectName(), authorizer); - break; - case COLUMN: - Preconditions.checkArgument(objToRefresh.getColumnName()==null, "columnName must be null"); - grants = getTableAllColumnGrants(catName, objToRefresh.getDbName(), - objToRefresh.getObjectName(), authorizer); - break; - default: - throw new MetaException("Unexpected object type " + objToRefresh.getObjectType()); - } - revokePrivilegeSet.addAll(grants); - - // Optimize revoke/grant list, remove the overlapping - if (grantPrivileges.getPrivileges() != null) { - for (HiveObjectPrivilege grantPrivilege : grantPrivileges.getPrivileges()) { - if (revokePrivilegeSet.contains(grantPrivilege)) { - revokePrivilegeSet.remove(grantPrivilege); - } else { - grantPrivilegeSet.add(grantPrivilege); - } - } - } - if (!revokePrivilegeSet.isEmpty()) { - LOG.debug("Found " + revokePrivilegeSet.size() + " new revoke privileges to be synced."); - PrivilegeBag remainingRevokePrivileges = new PrivilegeBag(); - for (HiveObjectPrivilege revokePrivilege : revokePrivilegeSet) { - remainingRevokePrivileges.addToPrivileges(revokePrivilege); - } - revokePrivileges(remainingRevokePrivileges, false); - } else { - LOG.debug("No new revoke privileges are required to be synced."); - } - if (!grantPrivilegeSet.isEmpty()) { - LOG.debug("Found " + grantPrivilegeSet.size() + " new grant privileges to be synced."); - PrivilegeBag remainingGrantPrivileges = new PrivilegeBag(); - for (HiveObjectPrivilege grantPrivilege : grantPrivilegeSet) { - remainingGrantPrivileges.addToPrivileges(grantPrivilege); - } - grantPrivileges(remainingGrantPrivileges); - } else { - LOG.debug("No new grant privileges are required to be synced."); - } - committed = commitTransaction(); - } finally { - rollbackAndCleanup(committed, null); - } - return committed; - } - - private List getTableAllColumnGrants(String catName, String dbName, - String tableName, String authorizer) - throws MetaException, NoSuchObjectException { - return new GetListHelper(normalizeIdentifier(catName), - normalizeIdentifier(dbName), normalizeIdentifier(tableName), true, true) { - - @Override - protected String describeResult() { - return "Table column privileges."; - } - - @Override - protected List getSqlResult(GetHelper> ctx) - throws MetaException { - return directSql.getTableAllColumnGrants(catName, dbName, tblName, authorizer); - } - - @Override - protected List getJdoResult(GetHelper> ctx) { - return convertTableCols(listTableAllColumnGrants(catName, dbName, tblName, authorizer)); - } - }.run(false); - } - - public List listMRoleMembers(String roleName) { - boolean success = false; - Query query = null; - List mRoleMemeberList = new ArrayList<>(); - try { - LOG.debug("Executing listRoleMembers"); - - openTransaction(); - query = pm.newQuery(MRoleMap.class, "role.roleName == t1"); - query.declareParameters("java.lang.String t1"); - query.setUnique(false); - List mRoles = (List) query.execute(roleName); - pm.retrieveAll(mRoles); - success = commitTransaction(); - - mRoleMemeberList.addAll(mRoles); - - LOG.debug("Done retrieving all objects for listRoleMembers"); - } finally { - rollbackAndCleanup(success, query); - } - return mRoleMemeberList; - } - - @Override - public List listRoleMembers(String roleName) { - List roleMaps = listMRoleMembers(roleName); - List rolePrinGrantList = new ArrayList<>(); - - if (roleMaps != null) { - for (MRoleMap roleMap : roleMaps) { - RolePrincipalGrant rolePrinGrant = new RolePrincipalGrant( - roleMap.getRole().getRoleName(), - roleMap.getPrincipalName(), - PrincipalType.valueOf(roleMap.getPrincipalType()), - roleMap.getGrantOption(), - roleMap.getAddTime(), - roleMap.getGrantor(), - // no grantor type for public role, hence the null check - roleMap.getGrantorType() == null ? null - : PrincipalType.valueOf(roleMap.getGrantorType()) - ); - rolePrinGrantList.add(rolePrinGrant); - - } - } - return rolePrinGrantList; - } - - private List listPrincipalMGlobalGrants(String principalName, - PrincipalType principalType) { - return listPrincipalMGlobalGrants(principalName, principalType, null); - } - - private List listPrincipalMGlobalGrants(String principalName, - PrincipalType principalType, String authorizer) { - boolean commited = false; - Query query = null; - List userNameDbPriv = new ArrayList<>(); - try { - List mPrivs = null; - openTransaction(); - if (principalName != null) { - if (authorizer != null) { - query = pm.newQuery(MGlobalPrivilege.class, "principalName == t1 && principalType == t2 " - + "&& authorizer == t3"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - mPrivs = (List) query - .executeWithArray(principalName, principalType.toString(), authorizer); - } else { - query = pm.newQuery(MGlobalPrivilege.class, "principalName == t1 && principalType == t2 "); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mPrivs = (List) query - .executeWithArray(principalName, principalType.toString()); - } - pm.retrieveAll(mPrivs); - } - commited = commitTransaction(); - if (mPrivs != null) { - userNameDbPriv.addAll(mPrivs); - } - } finally { - rollbackAndCleanup(commited, query); - } - return userNameDbPriv; - } - - @Override - public List listPrincipalGlobalGrants(String principalName, - PrincipalType principalType) { - List mUsers = - listPrincipalMGlobalGrants(principalName, principalType); - if (mUsers.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mUsers.size(); i++) { - MGlobalPrivilege sUsr = mUsers.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.GLOBAL, null, null, null, null); - HiveObjectPrivilege secUser = new HiveObjectPrivilege( - objectRef, sUsr.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sUsr.getPrivilege(), sUsr - .getCreateTime(), sUsr.getGrantor(), PrincipalType - .valueOf(sUsr.getGrantorType()), sUsr.getGrantOption()), - sUsr.getAuthorizer()); - result.add(secUser); - } - return result; - } - - @Override - public List listGlobalGrantsAll() { - boolean commited = false; - Query query = null; - try { - openTransaction(); - query = pm.newQuery(MGlobalPrivilege.class); - List userNameDbPriv = (List) query.execute(); - pm.retrieveAll(userNameDbPriv); - commited = commitTransaction(); - return convertGlobal(userNameDbPriv); - } finally { - rollbackAndCleanup(commited, query); - } - } - - private List convertGlobal(List privs) { - List result = new ArrayList<>(); - for (MGlobalPrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); - - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.GLOBAL, null, null, null, null); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); - - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); - } - return result; - } - - private List listPrincipalMDBGrants(String principalName, - PrincipalType principalType, String catName, String dbName) { - return listPrincipalMDBGrants(principalName, principalType, catName, dbName, null); - } - - private List listPrincipalMDBGrants(String principalName, - PrincipalType principalType, String catName, String dbName, String authorizer) { - boolean success = false; - Query query = null; - List mSecurityDBList = new ArrayList<>(); - dbName = normalizeIdentifier(dbName); - try { - LOG.debug("Executing listPrincipalDBGrants"); - - openTransaction(); - List mPrivs; - if (authorizer != null) { - query = pm.newQuery(MDBPrivilege.class, - "principalName == t1 && principalType == t2 && database.name == t3 && " + - "database.catalogName == t4 && authorizer == t5"); - query.declareParameters( - "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4, " - + "java.lang.String t5"); - mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), - dbName, catName, authorizer); - } else { - query = pm.newQuery(MDBPrivilege.class, - "principalName == t1 && principalType == t2 && database.name == t3 && database.catalogName == t4"); - query.declareParameters( - "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4"); - mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), - dbName, catName); - } - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityDBList.addAll(mPrivs); - LOG.debug("Done retrieving all objects for listPrincipalDBGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityDBList; - } - - private List listPrincipalMDCGrants(String principalName, - PrincipalType principalType, String dcName) { - return listPrincipalMDCGrants(principalName, principalType, dcName, null); - } - - private List listPrincipalMDCGrants(String principalName, - PrincipalType principalType, String dcName, String authorizer) { - boolean success = false; - Query query = null; - List mSecurityDCList = new ArrayList<>(); - dcName = normalizeIdentifier(dcName); - try { - LOG.debug("Executing listPrincipalDCGrants"); - - openTransaction(); - List mPrivs; - if (authorizer != null) { - query = pm.newQuery(MDCPrivilege.class, - "principalName == t1 && principalType == t2 && dataConnector.name == t3 && " + - "authorizer == t4"); - query.declareParameters( - "java.lang.String t1, java.lang.String t2, java.lang.String t3, " - + "java.lang.String t4"); - mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), - dcName, authorizer); - } else { - query = pm.newQuery(MDCPrivilege.class, - "principalName == t1 && principalType == t2 && dataConnector.name == t3"); - query.declareParameters( - "java.lang.String t1, java.lang.String t2, java.lang.String t3"); - mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), dcName); - } - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityDCList.addAll(mPrivs); - LOG.debug("Done retrieving all objects for listPrincipalDCGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityDCList; - } - - @Override - public List listPrincipalDBGrants(String principalName, - PrincipalType principalType, - String catName, String dbName) { - List mDbs = listPrincipalMDBGrants(principalName, principalType, catName, dbName); - if (mDbs.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mDbs.size(); i++) { - MDBPrivilege sDB = mDbs.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.DATABASE, dbName, null, null, null); - objectRef.setCatName(catName); - HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, - sDB.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sDB.getPrivilege(), sDB - .getCreateTime(), sDB.getGrantor(), PrincipalType - .valueOf(sDB.getGrantorType()), sDB.getGrantOption()), sDB.getAuthorizer()); - result.add(secObj); - } - return result; - } - - @Override - public List listPrincipalDBGrantsAll(String principalName, PrincipalType principalType) { - List results = Collections.emptyList(); - boolean success = false; - try { - openTransaction(); - results = convertDB(listPrincipalAllDBGrant(principalName, principalType)); - success = commitTransaction(); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - rollbackAndCleanup(success, null); - } - return results; - } - - @Override - public List listDBGrantsAll(String catName, String dbName) { - List results = Collections.emptyList(); - boolean success = false; - try { - openTransaction(); - results = listDBGrantsAll(catName, dbName, null); - success = commitTransaction(); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - rollbackAndCleanup(success, null); - } - return results; - } - - private List listDBGrantsAll(String catName, String dbName, String authorizer) throws Exception { - return convertDB(listDatabaseGrants(catName, dbName, authorizer)); - } - - private List convertDB(List privs) { - List result = new ArrayList<>(); - for (MDBPrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); - String database = priv.getDatabase().getName(); - - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.DATABASE, database, - null, null, null); - objectRef.setCatName(priv.getDatabase().getCatalogName()); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); - - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); - } - return result; - } - - private List listPrincipalAllDBGrant(String principalName, PrincipalType principalType) - throws Exception { - final List mSecurityDBList; - - LOG.debug("Executing listPrincipalAllDBGrant"); - - Preconditions.checkState(this.currentTransaction.isActive()); - - if (principalName != null && principalType != null) { - try (Query query = pm.newQuery(MDBPrivilege.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mSecurityDBList = (List) query.execute(principalName, principalType.toString()); - pm.retrieveAll(mSecurityDBList); - LOG.debug("Done retrieving all objects for listPrincipalAllDBGrant: {}", mSecurityDBList); - return Collections.unmodifiableList(new ArrayList<>(mSecurityDBList)); - } - } else { - try (Query query = pm.newQuery(MDBPrivilege.class)) { - mSecurityDBList = (List) query.execute(); - pm.retrieveAll(mSecurityDBList); - LOG.debug("Done retrieving all objects for listPrincipalAllDBGrant: {}", mSecurityDBList); - return Collections.unmodifiableList(new ArrayList<>(mSecurityDBList)); - } - } - } - - @Override - public List listPrincipalDCGrants(String principalName, - PrincipalType principalType, - String dcName) { - List mDcs = listPrincipalMDCGrants(principalName, principalType, dcName); - if (mDcs.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mDcs.size(); i++) { - MDCPrivilege sDC = mDcs.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.DATACONNECTOR, null, dcName, null, null); - HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, - sDC.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sDC.getPrivilege(), sDC - .getCreateTime(), sDC.getGrantor(), PrincipalType - .valueOf(sDC.getGrantorType()), sDC.getGrantOption()), sDC.getAuthorizer()); - result.add(secObj); - } - return result; - } - - @Override - public List listPrincipalDCGrantsAll(String principalName, PrincipalType principalType) { - List results = Collections.emptyList(); - boolean success = false; - try { - openTransaction(); - results = convertDC(listPrincipalAllDCGrant(principalName, principalType)); - success = commitTransaction(); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - rollbackAndCleanup(success, null); - } - return results; - } - - @Override - public List listDCGrantsAll(String dcName) { - List results = Collections.emptyList(); - boolean success = false; - try { - openTransaction(); - results = listDCGrantsAll(dcName, null); - success = commitTransaction(); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - rollbackAndCleanup(success, null); - } - return results; - } - - private List listDCGrantsAll(String dcName, String authorizer) throws Exception { - return convertDC(listDataConnectorGrants(dcName, authorizer)); - } - - private List convertDC(List privs) { - List result = new ArrayList<>(); - for (MDCPrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); - String dataConnectorName = priv.getDataConnector().getName(); - - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.DATACONNECTOR, null, - dataConnectorName, null, null); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); - - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); - } - return result; - } - - private List listPrincipalAllDCGrant(String principalName, PrincipalType principalType) - throws Exception { - final List mSecurityDCList; - - LOG.debug("Executing listPrincipalAllDCGrant"); - - Preconditions.checkState(this.currentTransaction.isActive()); - - if (principalName != null && principalType != null) { - try (Query query = pm.newQuery(MDCPrivilege.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mSecurityDCList = (List) query.execute(principalName, principalType.toString()); - pm.retrieveAll(mSecurityDCList); - LOG.debug("Done retrieving all objects for listPrincipalAllDCGrant: {}", mSecurityDCList); - return Collections.unmodifiableList(new ArrayList<>(mSecurityDCList)); - } - } else { - try (Query query = pm.newQuery(MDCPrivilege.class)) { - mSecurityDCList = (List) query.execute(); - pm.retrieveAll(mSecurityDCList); - LOG.debug("Done retrieving all objects for listPrincipalAllDCGrant: {}", mSecurityDCList); - return Collections.unmodifiableList(new ArrayList<>(mSecurityDCList)); - } - } - } - - private List listAllTableGrants(String catName, String dbName, String tableName) { - boolean success = false; - Query query = null; - List mSecurityTabList = new ArrayList<>(); - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - try { - LOG.debug("Executing listAllTableGrants"); - - openTransaction(); - String queryStr = "table.tableName == t1 && table.database.name == t2" + - "&& table.database.catalogName == t3"; - query = pm.newQuery(MTablePrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - List mPrivs = - (List) query.executeWithArray(tableName, dbName, catName); - LOG.debug("Done executing query for listAllTableGrants"); - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityTabList.addAll(mPrivs); - - LOG.debug("Done retrieving all objects for listAllTableGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityTabList; - } - - private List listTableAllPartitionGrants(String catName, String dbName, String tableName) { - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - boolean success = false; - Query query = null; - List mSecurityTabPartList = new ArrayList<>(); - try { - LOG.debug("Executing listTableAllPartitionGrants"); - - openTransaction(); - String queryStr = "partition.table.tableName == t1 && partition.table.database.name == t2 " + - "&& partition.table.database.catalogName == t3"; - query = pm.newQuery(MPartitionPrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - List mPrivs = - (List) query.executeWithArray(tableName, dbName, catName); - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityTabPartList.addAll(mPrivs); - - LOG.debug("Done retrieving all objects for listTableAllPartitionGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityTabPartList; - } - - private List listTableAllColumnGrants( - String catName, String dbName, String tableName) { - return listTableAllColumnGrants(catName, dbName, tableName, null); - } - - private List listTableAllColumnGrants( - String catName, String dbName, String tableName, String authorizer) { - boolean success = false; - Query query = null; - List mTblColPrivilegeList = new ArrayList<>(); - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - try { - LOG.debug("Executing listTableAllColumnGrants"); - - openTransaction(); - List mPrivs = null; - if (authorizer != null) { - String queryStr = "table.tableName == t1 && table.database.name == t2 &&" + - "table.database.catalogName == t3 && authorizer == t4"; - query = pm.newQuery(MTableColumnPrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + - "java.lang.String t4"); - mPrivs = (List) query.executeWithArray(tableName, dbName, catName, authorizer); - } else { - String queryStr = "table.tableName == t1 && table.database.name == t2 &&" + - "table.database.catalogName == t3"; - query = pm.newQuery(MTableColumnPrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - mPrivs = (List) query.executeWithArray(tableName, dbName, catName); - } - LOG.debug("Query to obtain objects for listTableAllColumnGrants finished"); - pm.retrieveAll(mPrivs); - LOG.debug("RetrieveAll on all the objects for listTableAllColumnGrants finished"); - success = commitTransaction(); - LOG.debug("Transaction running query to obtain objects for listTableAllColumnGrants " + - "committed"); - - mTblColPrivilegeList.addAll(mPrivs); - - LOG.debug("Done retrieving " + mPrivs.size() + " objects for listTableAllColumnGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mTblColPrivilegeList; - } - - private List listTableAllPartitionColumnGrants( - String catName, String dbName, String tableName) { - boolean success = false; - Query query = null; - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - List mSecurityColList = new ArrayList<>(); - try { - LOG.debug("Executing listTableAllPartitionColumnGrants"); - - openTransaction(); - String queryStr = "partition.table.tableName == t1 && partition.table.database.name == t2 " + - "&& partition.table.database.catalogName == t3"; - query = pm.newQuery(MPartitionColumnPrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - List mPrivs = - (List) query.executeWithArray(tableName, dbName, catName); - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityColList.addAll(mPrivs); - - LOG.debug("Done retrieving all objects for listTableAllPartitionColumnGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityColList; - } - - private void dropPartitionAllColumnGrantsNoTxn( - String catName, String dbName, String tableName, List partNames) { - Pair queryWithParams = makeQueryByPartitionNames(catName, - dbName, tableName, partNames, MPartitionColumnPrivilege.class, - "partition.table.tableName", "partition.table.database.name", "partition.partitionName", - "partition.table.database.catalogName"); - try (QueryWrapper wrapper = new QueryWrapper(queryWithParams.getLeft())) { - wrapper.deletePersistentAll(queryWithParams.getRight()); - } - } - - private List listDatabaseGrants(String catName, String dbName, String authorizer) throws Exception { - LOG.debug("Executing listDatabaseGrants"); - - Preconditions.checkState(currentTransaction.isActive()); - - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - - final Query query; - final String[] args; - - if (authorizer != null) { - query = pm.newQuery(MDBPrivilege.class, "database.name == t1 && database.catalogName == t2 && authorizer == t3"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - args = new String[] { dbName, catName, authorizer }; - } else { - query = pm.newQuery(MDBPrivilege.class, "database.name == t1 && database.catalogName == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - args = new String[] { dbName, catName }; - } - - try (QueryWrapper q = new QueryWrapper(query)) { - final List mSecurityDBList = (List) q.executeWithArray(args); - pm.retrieveAll(mSecurityDBList); - LOG.debug("Done retrieving all objects for listDatabaseGrants: {}", mSecurityDBList); - return Collections.unmodifiableList(new ArrayList<>(mSecurityDBList)); - } - } - - private List listDataConnectorGrants(String dcName, String authorizer) throws Exception { - LOG.debug("Executing listDataConnectorGrants"); - - Preconditions.checkState(currentTransaction.isActive()); - - dcName = normalizeIdentifier(dcName); - - final Query query; - String[] args = null; - final List mSecurityDCList; - - if (authorizer != null) { - query = pm.newQuery(MDCPrivilege.class, "dataConnector.name == t1 && authorizer == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - args = new String[] { dcName, authorizer }; - } else { - query = pm.newQuery(MDCPrivilege.class, "dataConnector.name == t1"); - query.declareParameters("java.lang.String t1"); - } - try (QueryWrapper wrapper = new QueryWrapper(query)) { - if (args != null) { - mSecurityDCList = (List) wrapper.executeWithArray(args); - } else { - mSecurityDCList = (List) wrapper.execute(dcName); - } - pm.retrieveAll(mSecurityDCList); - LOG.debug("Done retrieving all objects for listDataConnectorGrants: {}", mSecurityDCList); - return Collections.unmodifiableList(new ArrayList<>(mSecurityDCList)); - } - } - - private void dropPartitionGrantsNoTxn(String catName, String dbName, String tableName, - List partNames) { - Pair queryWithParams = makeQueryByPartitionNames(catName, - dbName, tableName, partNames,MPartitionPrivilege.class, "partition.table.tableName", - "partition.table.database.name", "partition.partitionName", - "partition.table.database.catalogName"); - try (QueryWrapper wrapper = new QueryWrapper(queryWithParams.getLeft())) { - wrapper.deletePersistentAll(queryWithParams.getRight()); - } - } - - private Pair makeQueryByPartitionNames( - String catName, String dbName, String tableName, List partNames, Class clazz, - String tbCol, String dbCol, String partCol, String catCol) { - StringBuilder queryStr = new StringBuilder(tbCol + " == t1 && " + dbCol + " == t2 && " + catCol + " == t3"); - StringBuilder paramStr = new StringBuilder("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - Object[] params = new Object[3 + partNames.size()]; - params[0] = normalizeIdentifier(tableName); - params[1] = normalizeIdentifier(dbName); - params[2] = normalizeIdentifier(catName); - int index = 0; - for (String partName : partNames) { - params[index + 3] = partName; - queryStr.append(((index == 0) ? " && (" : " || ") + partCol + " == p" + index); - paramStr.append(", java.lang.String p" + index); - ++index; - } - queryStr.append(")"); - Query query = pm.newQuery(clazz, queryStr.toString()); - query.declareParameters(paramStr.toString()); - return Pair.of(query, params); - } - - private List listAllMTableGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName) { - return listAllMTableGrants(principalName, principalType, catName, dbName, tableName, null); - } - - private List listAllMTableGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String authorizer) { - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - boolean success = false; - Query query = null; - List mSecurityTabPartList = new ArrayList<>(); - try { - openTransaction(); - LOG.debug("Executing listAllTableGrants"); - List mPrivs; - if (authorizer != null) { - query = pm.newQuery(MTablePrivilege.class, - "principalName == t1 && principalType == t2 && table.tableName == t3 &&" + - "table.database.name == t4 && table.database.catalogName == t5 && authorizer == t6"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3," + - "java.lang.String t4, java.lang.String t5, java.lang.String t6"); - mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), - tableName, dbName, catName, authorizer); - } else { - query = pm.newQuery(MTablePrivilege.class, - "principalName == t1 && principalType == t2 && table.tableName == t3 &&" + - "table.database.name == t4 && table.database.catalogName == t5"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3," + - "java.lang.String t4, java.lang.String t5"); - mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), - tableName, dbName, catName); - } - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityTabPartList.addAll(mPrivs); - - LOG.debug("Done retrieving all objects for listAllTableGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityTabPartList; - } - - @Override - public List listAllTableGrants(String principalName, - PrincipalType principalType, - String catName, - String dbName, - String tableName) { - List mTbls = - listAllMTableGrants(principalName, principalType, catName, dbName, tableName); - if (mTbls.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mTbls.size(); i++) { - MTablePrivilege sTbl = mTbls.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.TABLE, dbName, tableName, null, null); - objectRef.setCatName(catName); - HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, - sTbl.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sTbl.getPrivilege(), sTbl.getCreateTime(), sTbl - .getGrantor(), PrincipalType.valueOf(sTbl - .getGrantorType()), sTbl.getGrantOption()), sTbl.getAuthorizer()); - result.add(secObj); - } - return result; - } - - private List listPrincipalMPartitionGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String partName) { - return listPrincipalMPartitionGrants(principalName, principalType, catName, dbName, tableName, partName, null); - } - - private List listPrincipalMPartitionGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String partName, String authorizer) { - boolean success = false; - Query query = null; - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - catName = normalizeIdentifier(catName); - List mSecurityTabPartList = new ArrayList<>(); - try { - LOG.debug("Executing listPrincipalPartitionGrants"); - - openTransaction(); - List mPrivs; - if (authorizer != null) { - query = pm.newQuery(MPartitionPrivilege.class, - "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " - + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" - + "&& partition.partitionName == t6 && authorizer == t7"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4, " - + "java.lang.String t5, java.lang.String t6, java.lang.String t7"); - mPrivs = (List) query.executeWithArray(principalName, - principalType.toString(), tableName, dbName, catName, partName, authorizer); - } else { - query = pm.newQuery(MPartitionPrivilege.class, - "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " - + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" - + "&& partition.partitionName == t6"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4, " - + "java.lang.String t5, java.lang.String t6"); - mPrivs = (List) query.executeWithArray(principalName, - principalType.toString(), tableName, dbName, catName, partName); - } - pm.retrieveAll(mPrivs); - success = commitTransaction(); - - mSecurityTabPartList.addAll(mPrivs); - - LOG.debug("Done retrieving all objects for listPrincipalPartitionGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityTabPartList; - } - - @Override - public List listPrincipalPartitionGrants(String principalName, - PrincipalType principalType, - String catName, - String dbName, - String tableName, - List partValues, - String partName) { - List mParts = listPrincipalMPartitionGrants(principalName, - principalType, catName, dbName, tableName, partName); - if (mParts.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mParts.size(); i++) { - MPartitionPrivilege sPart = mParts.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.PARTITION, dbName, tableName, partValues, null); - objectRef.setCatName(catName); - HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, - sPart.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sPart.getPrivilege(), sPart - .getCreateTime(), sPart.getGrantor(), PrincipalType - .valueOf(sPart.getGrantorType()), sPart - .getGrantOption()), sPart.getAuthorizer()); - - result.add(secObj); - } - return result; - } - - private List listPrincipalMTableColumnGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String columnName) { - return listPrincipalMTableColumnGrants(principalName, principalType, catName, dbName, tableName, - columnName, null); - } - - private List listPrincipalMTableColumnGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String columnName, String authorizer) { - boolean success = false; - Query query = null; - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - columnName = normalizeIdentifier(columnName); - List mSecurityColList = new ArrayList<>(); - try { - LOG.debug("Executing listPrincipalTableColumnGrants"); - - openTransaction(); - List mPrivs; - if (authorizer != null) { - String queryStr = - "principalName == t1 && principalType == t2 && " - + "table.tableName == t3 && table.database.name == t4 && " + - "table.database.catalogName == t5 && columnName == t6 && authorizer == t7"; - query = pm.newQuery(MTableColumnPrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " - + "java.lang.String t4, java.lang.String t5, java.lang.String t6, java.lang.String t7"); - mPrivs = (List) query.executeWithArray(principalName, - principalType.toString(), tableName, dbName, catName, columnName, authorizer); - } else { - String queryStr = - "principalName == t1 && principalType == t2 && " - + "table.tableName == t3 && table.database.name == t4 && " + - "table.database.catalogName == t5 && columnName == t6 "; - query = pm.newQuery(MTableColumnPrivilege.class, queryStr); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " - + "java.lang.String t4, java.lang.String t5, java.lang.String t6"); - mPrivs = (List) query.executeWithArray(principalName, - principalType.toString(), tableName, dbName, catName, columnName); - } - pm.retrieveAll(mPrivs); - success = commitTransaction(); + // Here we build an aux structure that is used to verify that the foreign key that is declared + // is actually referencing a valid primary key or unique key. We also check that the types of + // the columns correspond. + if (existingTablePrimaryKeys.isEmpty() && existingTableUniqueConstraints.isEmpty()) { + throw new MetaException( + "Trying to define foreign key but there are no primary keys or unique keys for referenced table"); + } + final Set validPKsOrUnique = generateValidPKsOrUniqueSignatures(parentCols, + existingTablePrimaryKeys, existingTableUniqueConstraints); - mSecurityColList.addAll(mPrivs); + StringBuilder fkSignature = new StringBuilder(); + StringBuilder referencedKSignature = new StringBuilder(); + for (; i < foreignKeys.size(); i++) { + SQLForeignKey foreignKey = foreignKeys.get(i); + final String fkColumnName = normalizeIdentifier(foreignKey.getFkcolumn_name()); + int childIntegerIndex = getColumnIndexFromTableColumns(childCD.getCols(), fkColumnName); + if (childIntegerIndex == -1) { + if (childTable.getPartitionKeys() != null) { + childCD = null; + childIntegerIndex = getColumnIndexFromTableColumns(childTable.getPartitionKeys(), fkColumnName); + } + if (childIntegerIndex == -1) { + throw new InvalidObjectException("Child column not found: " + fkColumnName); + } + } - LOG.debug("Done retrieving all objects for listPrincipalTableColumnGrants"); - } finally { - rollbackAndCleanup(success, query); - } - return mSecurityColList; - } + final String pkColumnName = normalizeIdentifier(foreignKey.getPkcolumn_name()); + int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD.getCols(), pkColumnName); + if (parentIntegerIndex == -1) { + if (parentTable.getPartitionKeys() != null) { + parentCD = null; + parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), pkColumnName); + } + if (parentIntegerIndex == -1) { + throw new InvalidObjectException("Parent column not found: " + pkColumnName); + } + } - @Override - public List listPrincipalTableColumnGrants(String principalName, - PrincipalType principalType, - String catName, - String dbName, - String tableName, - String columnName) { - List mTableCols = - listPrincipalMTableColumnGrants(principalName, principalType, catName, dbName, tableName, columnName); - if (mTableCols.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mTableCols.size(); i++) { - MTableColumnPrivilege sCol = mTableCols.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.COLUMN, dbName, tableName, null, sCol.getColumnName()); - objectRef.setCatName(catName); - HiveObjectPrivilege secObj = new HiveObjectPrivilege( - objectRef, sCol.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sCol.getPrivilege(), sCol - .getCreateTime(), sCol.getGrantor(), PrincipalType - .valueOf(sCol.getGrantorType()), sCol - .getGrantOption()), sCol.getAuthorizer()); - result.add(secObj); - } - return result; - } + if (foreignKey.getFk_name() == null) { + // When there is no explicit foreign key name associated with the constraint and the key is composite, + // we expect the foreign keys to be send in order in the input list. + // Otherwise, the below code will break. + // If this is the first column of the FK constraint, generate the foreign key name + // NB: The below code can result in race condition where duplicate names can be generated (in theory). + // However, this scenario can be ignored for practical purposes because of + // the uniqueness of the generated constraint name. + if (foreignKey.getKey_seq() == 1) { + currentConstraintName = generateConstraintName(parentTable, fkTableDB, fkTableName, pkTableDB, + pkTableName, pkColumnName, fkColumnName, "fk"); + } + } else { + currentConstraintName = normalizeIdentifier(foreignKey.getFk_name()); + if (constraintNameAlreadyExists(parentTable, currentConstraintName)) { + String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), + parentTable.getTableName(), currentConstraintName); + throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); + } + } + // Update Column, keys, table, database, catalog name + foreignKey.setFk_name(currentConstraintName); + foreignKey.setCatName(catName); + foreignKey.setFktable_db(fkTableDB); + foreignKey.setFktable_name(fkTableName); + foreignKey.setPktable_db(pkTableDB); + foreignKey.setPktable_name(pkTableName); + foreignKey.setFkcolumn_name(fkColumnName); + foreignKey.setPkcolumn_name(pkColumnName); - private List listPrincipalMPartitionColumnGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String partitionName, String columnName) { - return listPrincipalMPartitionColumnGrants(principalName, principalType, catName, dbName, - tableName, partitionName, columnName, null); - } + Integer updateRule = foreignKey.getUpdate_rule(); + Integer deleteRule = foreignKey.getDelete_rule(); + int enableValidateRely = (foreignKey.isEnable_cstr() ? 4 : 0) + + (foreignKey.isValidate_cstr() ? 2 : 0) + (foreignKey.isRely_cstr() ? 1 : 0); - private List listPrincipalMPartitionColumnGrants( - String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String partitionName, String columnName, String authorizer) { - boolean success = false; - Query query = null; - tableName = normalizeIdentifier(tableName); - dbName = normalizeIdentifier(dbName); - columnName = normalizeIdentifier(columnName); - catName = normalizeIdentifier(catName); - List mSecurityColList = new ArrayList<>(); - try { - LOG.debug("Executing listPrincipalPartitionColumnGrants"); + MConstraint mpkfk = new MConstraint( + currentConstraintName, + foreignKey.getKey_seq(), + MConstraint.FOREIGN_KEY_CONSTRAINT, + deleteRule, + updateRule, + enableValidateRely, + parentTable, + childTable, + parentCD, + childCD, + childIntegerIndex, + parentIntegerIndex + ); + mpkfks.add(mpkfk); - openTransaction(); - List mPrivs; - if (authorizer != null) { - query = pm.newQuery( - MPartitionColumnPrivilege.class, - "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " - + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" + - " && partition.partitionName == t6 && columnName == t7 && authorizer == t8"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " - + "java.lang.String t4, java.lang.String t5, java.lang.String t6, java.lang.String t7, " - + "java.lang.String t8"); - mPrivs = (List) query.executeWithArray(principalName, - principalType.toString(), tableName, dbName, catName, partitionName, columnName, authorizer); - } else { - query = pm.newQuery( - MPartitionColumnPrivilege.class, - "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " - + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" + - " && partition.partitionName == t6 && columnName == t7"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " - + "java.lang.String t4, java.lang.String t5, java.lang.String t6, java.lang.String t7"); - mPrivs = (List) query.executeWithArray(principalName, - principalType.toString(), tableName, dbName, catName, partitionName, columnName); - } - pm.retrieveAll(mPrivs); - success = commitTransaction(); + final String fkColType = getColumnFromTableColumns(childCols, fkColumnName).getType(); + fkSignature.append( + generateColNameTypeSignature(fkColumnName, fkColType)); + referencedKSignature.append( + generateColNameTypeSignature(pkColumnName, fkColType)); - mSecurityColList.addAll(mPrivs); + if (i + 1 < foreignKeys.size() && foreignKeys.get(i + 1).getKey_seq() == 1) { + // Next one is a new key, we bail out from the inner loop + break; + } + } + String referenced = referencedKSignature.toString(); + if (!validPKsOrUnique.contains(referenced)) { + throw new MetaException( + "Foreign key references " + referenced + " but no corresponding " + + "primary key or unique key exists. Possible keys: " + validPKsOrUnique); + } + if (sameTable && fkSignature.toString().equals(referenced)) { + throw new MetaException( + "Cannot be both foreign key and primary/unique key on same table: " + referenced); + } + fkSignature = new StringBuilder(); + referencedKSignature = new StringBuilder(); + } + pm.makePersistentAll(mpkfks); - LOG.debug("Done retrieving all objects for listPrincipalPartitionColumnGrants"); - } finally { - rollbackAndCleanup(success, query); } - return mSecurityColList; + return foreignKeys; } - @Override - public List listPrincipalPartitionColumnGrants(String principalName, - PrincipalType principalType, - String catName, - String dbName, - String tableName, - List partValues, - String partitionName, - String columnName) { - List mPartitionCols = - listPrincipalMPartitionColumnGrants(principalName, principalType, catName, dbName, tableName, - partitionName, columnName); - if (mPartitionCols.isEmpty()) { - return Collections.emptyList(); - } - List result = new ArrayList<>(); - for (int i = 0; i < mPartitionCols.size(); i++) { - MPartitionColumnPrivilege sCol = mPartitionCols.get(i); - HiveObjectRef objectRef = new HiveObjectRef( - HiveObjectType.COLUMN, dbName, tableName, partValues, sCol.getColumnName()); - objectRef.setCatName(catName); - HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, - sCol.getPrincipalName(), principalType, - new PrivilegeGrantInfo(sCol.getPrivilege(), sCol - .getCreateTime(), sCol.getGrantor(), PrincipalType - .valueOf(sCol.getGrantorType()), sCol.getGrantOption()), sCol.getAuthorizer()); - result.add(secObj); + private static Set generateValidPKsOrUniqueSignatures(List tableCols, + List refTablePrimaryKeys, List refTableUniqueConstraints) { + final Set validPKsOrUnique = new HashSet<>(); + if (!refTablePrimaryKeys.isEmpty()) { + refTablePrimaryKeys.sort((o1, o2) -> { + int keyNameComp = o1.getPk_name().compareTo(o2.getPk_name()); + if (keyNameComp == 0) { + return Integer.compare(o1.getKey_seq(), o2.getKey_seq()); + } + return keyNameComp; + }); + StringBuilder pkSignature = new StringBuilder(); + for (SQLPrimaryKey pk : refTablePrimaryKeys) { + pkSignature.append( + generateColNameTypeSignature( + pk.getColumn_name(), getColumnFromTableColumns(tableCols, pk.getColumn_name()).getType())); + } + validPKsOrUnique.add(pkSignature.toString()); } - return result; - } - - @Override - public List listPrincipalPartitionColumnGrantsAll( - String principalName, PrincipalType principalType) { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listPrincipalPartitionColumnGrantsAll"); - List mSecurityTabPartList; - if (principalName != null && principalType != null) { - query = - pm.newQuery(MPartitionColumnPrivilege.class, - "principalName == t1 && principalType == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mSecurityTabPartList = - (List) query.executeWithArray(principalName, - principalType.toString()); - } else { - query = pm.newQuery(MPartitionColumnPrivilege.class); - mSecurityTabPartList = (List) query.execute(); + if (!refTableUniqueConstraints.isEmpty()) { + refTableUniqueConstraints.sort((o1, o2) -> { + int keyNameComp = o1.getUk_name().compareTo(o2.getUk_name()); + if (keyNameComp == 0) { + return Integer.compare(o1.getKey_seq(), o2.getKey_seq()); + } + return keyNameComp; + }); + StringBuilder ukSignature = new StringBuilder(); + for (int j = 0; j < refTableUniqueConstraints.size(); j++) { + SQLUniqueConstraint uk = refTableUniqueConstraints.get(j); + ukSignature.append( + generateColNameTypeSignature( + uk.getColumn_name(), getColumnFromTableColumns(tableCols, uk.getColumn_name()).getType())); + if (j + 1 < refTableUniqueConstraints.size()) { + if (!refTableUniqueConstraints.get(j + 1).getUk_name().equals( + refTableUniqueConstraints.get(j).getUk_name())) { + validPKsOrUnique.add(ukSignature.toString()); + ukSignature = new StringBuilder(); + } + } else { + validPKsOrUnique.add(ukSignature.toString()); + } } - LOG.debug("Done executing query for listPrincipalPartitionColumnGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertPartCols(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalPartitionColumnGrantsAll"); - return result; - } finally { - rollbackAndCleanup(success, query); } + return validPKsOrUnique; } - @Override - public List listPartitionColumnGrantsAll( - String catName, String dbName, String tableName, String partitionName, String columnName) { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listPartitionColumnGrantsAll"); - query = - pm.newQuery(MPartitionColumnPrivilege.class, - "partition.table.tableName == t3 && partition.table.database.name == t4 && " - + "partition.table.database.name == t5 && " - + "partition.partitionName == t6 && columnName == t7"); - query.declareParameters("java.lang.String t3, java.lang.String t4, java.lang.String t5," + - "java.lang.String t6, java.lang.String t7"); - List mSecurityTabPartList = - (List) query.executeWithArray(tableName, dbName, catName, - partitionName, columnName); - LOG.debug("Done executing query for listPartitionColumnGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertPartCols(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPartitionColumnGrantsAll"); - return result; - } finally { - rollbackAndCleanup(success, query); - } + private static String generateColNameTypeSignature(String colName, String colType) { + return colName + ":" + colType + ";"; } - private List convertPartCols(List privs) { - List result = new ArrayList<>(); - for (MPartitionColumnPrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); - - MPartition mpartition = priv.getPartition(); - MTable mtable = mpartition.getTable(); - MDatabase mdatabase = mtable.getDatabase(); - - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.COLUMN, - mdatabase.getName(), mtable.getTableName(), mpartition.getValues(), priv.getColumnName()); - objectRef.setCatName(mdatabase.getCatalogName()); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); - - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); - } - return result; + @Override + public List addPrimaryKeys(List pks) throws InvalidObjectException, + MetaException { + return addPrimaryKeys(pks, true); } - private List listPrincipalAllTableGrants(String principalName, PrincipalType principalType) - throws Exception { - LOG.debug("Executing listPrincipalAllTableGrants"); - - Preconditions.checkState(this.currentTransaction.isActive()); - - try (Query query = pm.newQuery(MTablePrivilege.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - final List mSecurityTabPartList = - (List) query.execute(principalName, principalType.toString()); - - pm.retrieveAll(mSecurityTabPartList); + private List addPrimaryKeys(List pks, boolean retrieveCD) throws InvalidObjectException, + MetaException { + List mpks = new ArrayList<>(); + String constraintName = null; - LOG.debug("Done retrieving all objects for listPrincipalAllTableGrants"); + for (SQLPrimaryKey pk : pks) { + final String catName = normalizeIdentifier(pk.getCatName()); + final String tableDB = normalizeIdentifier(pk.getTable_db()); + final String tableName = normalizeIdentifier(pk.getTable_name()); + final String columnName = normalizeIdentifier(pk.getColumn_name()); - return Collections.unmodifiableList(new ArrayList<>(mSecurityTabPartList)); - } - } + // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. + // For instance, this is the case when we are creating the table. + AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); + MTable parentTable = nParentTable.mtbl; + if (parentTable == null) { + throw new InvalidObjectException("Parent table not found: " + tableName); + } - @Override - public List listPrincipalTableGrantsAll(String principalName, - PrincipalType principalType) { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listPrincipalAllTableGrants"); - List mSecurityTabPartList; - if (principalName != null && principalType != null) { - query = pm.newQuery(MTablePrivilege.class, "principalName == t1 && principalType == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mSecurityTabPartList = - (List) query.execute(principalName, principalType.toString()); + MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); + int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); + if (parentIntegerIndex == -1) { + if (parentTable.getPartitionKeys() != null) { + parentCD = null; + parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); + } + if (parentIntegerIndex == -1) { + throw new InvalidObjectException("Parent column not found: " + columnName); + } + } + if (getPrimaryKeyConstraintName(parentTable.getDatabase().getCatalogName(), + parentTable.getDatabase().getName(), parentTable.getTableName()) != null) { + throw new MetaException(" Primary key already exists for: " + + TableName.getQualified(catName, tableDB, tableName)); + } + if (pk.getPk_name() == null) { + if (pk.getKey_seq() == 1) { + constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "pk"); + } } else { - query = pm.newQuery(MTablePrivilege.class); - mSecurityTabPartList = (List) query.execute(); + constraintName = normalizeIdentifier(pk.getPk_name()); + if (constraintNameAlreadyExists(parentTable, constraintName)) { + String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), + parentTable.getTableName(), constraintName); + throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); + } } - LOG.debug("Done executing query for listPrincipalAllTableGrants"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertTable(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalAllTableGrants"); - return result; - } finally { - rollbackAndCleanup(success, query); + + int enableValidateRely = (pk.isEnable_cstr() ? 4 : 0) + + (pk.isValidate_cstr() ? 2 : 0) + (pk.isRely_cstr() ? 1 : 0); + MConstraint mpk = new MConstraint( + constraintName, + pk.getKey_seq(), + MConstraint.PRIMARY_KEY_CONSTRAINT, + null, + null, + enableValidateRely, + parentTable, + null, + parentCD, + null, + null, + parentIntegerIndex); + mpks.add(mpk); + + // Add normalized identifier back to result + pk.setCatName(catName); + pk.setTable_db(tableDB); + pk.setTable_name(tableName); + pk.setColumn_name(columnName); + pk.setPk_name(constraintName); } + pm.makePersistentAll(mpks); + return pks; } @Override - public List listTableGrantsAll(String catName, String dbName, String tableName) { - return listTableGrantsAll(catName, dbName, tableName, null); - } - - private List listTableGrantsAll(String catName, String dbName, String tableName, - String authorizer) { - boolean success = false; - Query query = null; - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - try { - openTransaction(); - LOG.debug("Executing listTableGrantsAll"); - List mSecurityTabPartList = null; - if (authorizer != null) { - query = pm.newQuery(MTablePrivilege.class, - "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3" + - " && authorizer == t4"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + - "java.lang.String t4"); - mSecurityTabPartList = (List) query.executeWithArray(tableName, dbName, catName, authorizer); - } else { - query = pm.newQuery(MTablePrivilege.class, - "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); - mSecurityTabPartList = (List) query.executeWithArray(tableName, dbName, catName); - } - LOG.debug("Done executing query for listTableGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertTable(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalAllTableGrants"); - return result; - } finally { - rollbackAndCleanup(success, query); - } + public List addUniqueConstraints(List uks) + throws InvalidObjectException, MetaException { + return addUniqueConstraints(uks, true); } - private List convertTable(List privs) { - List result = new ArrayList<>(); - for (MTablePrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + private List addUniqueConstraints(List uks, boolean retrieveCD) + throws InvalidObjectException, MetaException { - String table = priv.getTable().getTableName(); - String database = priv.getTable().getDatabase().getName(); + List cstrs = new ArrayList<>(); + String constraintName = null; - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.TABLE, database, table, - null, null); - objectRef.setCatName(priv.getTable().getDatabase().getCatalogName()); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + for (SQLUniqueConstraint uk : uks) { + final String catName = normalizeIdentifier(uk.getCatName()); + final String tableDB = normalizeIdentifier(uk.getTable_db()); + final String tableName = normalizeIdentifier(uk.getTable_name()); + final String columnName = normalizeIdentifier(uk.getColumn_name()); - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); - } - return result; - } + // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. + // For instance, this is the case when we are creating the table. + AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); + MTable parentTable = nParentTable.mtbl; + if (parentTable == null) { + throw new InvalidObjectException("Parent table not found: " + tableName); + } - private List listPrincipalAllPartitionGrants(String principalName, PrincipalType principalType) - throws Exception { - LOG.debug("Executing listPrincipalAllPartitionGrants"); + MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); + int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); + if (parentIntegerIndex == -1) { + if (parentTable.getPartitionKeys() != null) { + parentCD = null; + parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); + } + if (parentIntegerIndex == -1) { + throw new InvalidObjectException("Parent column not found: " + columnName); + } + } + if (uk.getUk_name() == null) { + if (uk.getKey_seq() == 1) { + constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "uk"); + } + } else { + constraintName = normalizeIdentifier(uk.getUk_name()); + if (constraintNameAlreadyExists(parentTable, constraintName)) { + String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), + parentTable.getTableName(), constraintName); + throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); + } + } - Preconditions.checkState(this.currentTransaction.isActive()); - try (Query query = pm.newQuery(MPartitionPrivilege.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - final List mSecurityTabPartList = - (List) query.execute(principalName, principalType.toString()); + int enableValidateRely = (uk.isEnable_cstr() ? 4 : 0) + + (uk.isValidate_cstr() ? 2 : 0) + (uk.isRely_cstr() ? 1 : 0); + MConstraint muk = new MConstraint( + constraintName, + uk.getKey_seq(), + MConstraint.UNIQUE_CONSTRAINT, + null, + null, + enableValidateRely, + parentTable, + null, + parentCD, + null, + null, + parentIntegerIndex); + cstrs.add(muk); - pm.retrieveAll(mSecurityTabPartList); - LOG.debug("Done retrieving all objects for listPrincipalAllPartitionGrants"); + // Add normalized identifier back to result + uk.setCatName(catName); + uk.setTable_db(tableDB); + uk.setTable_name(tableName); + uk.setColumn_name(columnName); + uk.setUk_name(constraintName); - return Collections.unmodifiableList(new ArrayList<>(mSecurityTabPartList)); } + pm.makePersistentAll(cstrs); + return uks; } @Override - public List listPrincipalPartitionGrantsAll(String principalName, - PrincipalType principalType) { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listPrincipalPartitionGrantsAll"); - List mSecurityTabPartList; - if (principalName != null && principalType != null) { - query = - pm.newQuery(MPartitionPrivilege.class, "principalName == t1 && principalType == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mSecurityTabPartList = - (List) query.execute(principalName, principalType.toString()); - } else { - query = pm.newQuery(MPartitionPrivilege.class); - mSecurityTabPartList = (List) query.execute(); - } - LOG.debug("Done executing query for listPrincipalPartitionGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertPartition(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalPartitionGrantsAll"); - return result; - } finally { - rollbackAndCleanup(success, query); - } + public List addNotNullConstraints(List nns) + throws InvalidObjectException, MetaException { + return addNotNullConstraints(nns, true); } @Override - public List listPartitionGrantsAll(String catName, String dbName, String tableName, - String partitionName) { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listPrincipalPartitionGrantsAll"); - query = - pm.newQuery(MPartitionPrivilege.class, - "partition.table.tableName == t3 && partition.table.database.name == t4 && " - + "partition.table.database.catalogName == t5 && partition.partitionName == t6"); - query.declareParameters("java.lang.String t3, java.lang.String t4, java.lang.String t5, " + - "java.lang.String t6"); - List mSecurityTabPartList = - (List) query.executeWithArray(tableName, dbName, catName, partitionName); - LOG.debug("Done executing query for listPrincipalPartitionGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertPartition(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalPartitionGrantsAll"); - return result; - } finally { - rollbackAndCleanup(success, query); - } + public List addDefaultConstraints(List nns) + throws InvalidObjectException, MetaException { + return addDefaultConstraints(nns, true); } - private List convertPartition(List privs) { - List result = new ArrayList<>(); - for (MPartitionPrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); - - MPartition mpartition = priv.getPartition(); - MTable mtable = mpartition.getTable(); - MDatabase mdatabase = mtable.getDatabase(); - - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.PARTITION, - mdatabase.getName(), mtable.getTableName(), mpartition.getValues(), null); - objectRef.setCatName(mdatabase.getCatalogName()); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); - - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); - } - return result; + @Override + public List addCheckConstraints(List nns) + throws InvalidObjectException, MetaException { + return addCheckConstraints(nns, true); } - private List listPrincipalAllTableColumnGrants(String principalName, - PrincipalType principalType) throws Exception { - - LOG.debug("Executing listPrincipalAllTableColumnGrants"); - - Preconditions.checkState(this.currentTransaction.isActive()); - - try (Query query = pm.newQuery(MTableColumnPrivilege.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - final List mSecurityColumnList = - (List) query.execute(principalName, principalType.toString()); + private List addCheckConstraints(List ccs, boolean retrieveCD) + throws InvalidObjectException, MetaException { + List cstrs = new ArrayList<>(); - pm.retrieveAll(mSecurityColumnList); - LOG.debug("Done retrieving all objects for listPrincipalAllTableColumnGrants"); + for (SQLCheckConstraint cc: ccs) { + final String catName = normalizeIdentifier(cc.getCatName()); + final String tableDB = normalizeIdentifier(cc.getTable_db()); + final String tableName = normalizeIdentifier(cc.getTable_name()); + final String columnName = cc.getColumn_name() == null? null + : normalizeIdentifier(cc.getColumn_name()); + final String ccName = cc.getDc_name(); + boolean isEnable = cc.isEnable_cstr(); + boolean isValidate = cc.isValidate_cstr(); + boolean isRely = cc.isRely_cstr(); + String constraintValue = cc.getCheck_expression(); + MConstraint muk = addConstraint(catName, tableDB, tableName, columnName, ccName, isEnable, isRely, isValidate, + MConstraint.CHECK_CONSTRAINT, constraintValue, retrieveCD); + cstrs.add(muk); - return Collections.unmodifiableList(new ArrayList<>(mSecurityColumnList)); + // Add normalized identifier back to result + cc.setCatName(catName); + cc.setTable_db(tableDB); + cc.setTable_name(tableName); + cc.setColumn_name(columnName); + cc.setDc_name(muk.getConstraintName()); } + pm.makePersistentAll(cstrs); + return ccs; } - @Override - public List listPrincipalTableColumnGrantsAll(String principalName, - PrincipalType principalType) { - boolean success = false; - Query query = null; - try { - openTransaction(); - LOG.debug("Executing listPrincipalTableColumnGrantsAll"); - - List mSecurityTabPartList; - if (principalName != null && principalType != null) { - query = - pm.newQuery(MTableColumnPrivilege.class, "principalName == t1 && principalType == t2"); - query.declareParameters("java.lang.String t1, java.lang.String t2"); - mSecurityTabPartList = - (List) query.execute(principalName, principalType.toString()); - } else { - query = pm.newQuery(MTableColumnPrivilege.class); - mSecurityTabPartList = (List) query.execute(); - } - LOG.debug("Done executing query for listPrincipalTableColumnGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertTableCols(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalTableColumnGrantsAll"); - return result; - } finally { - rollbackAndCleanup(success, query); + private MConstraint addConstraint(String catName, String tableDB, String tableName, String columnName, String ccName, + boolean isEnable, boolean isRely, boolean isValidate, int constraintType, + String constraintValue, boolean retrieveCD) + throws InvalidObjectException, MetaException { + String constraintName = null; + // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. + // For instance, this is the case when we are creating the table. + AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); + MTable parentTable = nParentTable.mtbl; + if (parentTable == null) { + throw new InvalidObjectException("Parent table not found: " + tableName); } - } - @Override - public List listTableColumnGrantsAll(String catName, String dbName, String tableName, - String columnName) { - boolean success = false; - Query query = null; - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); - try { - openTransaction(); - LOG.debug("Executing listPrincipalTableColumnGrantsAll"); - query = - pm.newQuery(MTableColumnPrivilege.class, - "table.tableName == t3 && table.database.name == t4 && " + - "table.database.catalogName == t5 && columnName == t6"); - query.declareParameters("java.lang.String t3, java.lang.String t4, java.lang.String t5, " + - "java.lang.String t6"); - List mSecurityTabPartList = - (List) query.executeWithArray(tableName, dbName, - catName, columnName); - LOG.debug("Done executing query for listPrincipalTableColumnGrantsAll"); - pm.retrieveAll(mSecurityTabPartList); - List result = convertTableCols(mSecurityTabPartList); - success = commitTransaction(); - LOG.debug("Done retrieving all objects for listPrincipalTableColumnGrantsAll"); - return result; - } finally { - rollbackAndCleanup(success, query); + MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); + int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); + if (parentIntegerIndex == -1) { + if (parentTable.getPartitionKeys() != null) { + parentCD = null; + parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); + } } - } - - private List convertTableCols(List privs) { - List result = new ArrayList<>(); - for (MTableColumnPrivilege priv : privs) { - String pname = priv.getPrincipalName(); - String authorizer = priv.getAuthorizer(); - PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); - - MTable mtable = priv.getTable(); - MDatabase mdatabase = mtable.getDatabase(); - - HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.COLUMN, - mdatabase.getName(), mtable.getTableName(), null, priv.getColumnName()); - objectRef.setCatName(mdatabase.getCatalogName()); - PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), - priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); - - result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + if (ccName == null) { + constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "dc"); + } else { + constraintName = normalizeIdentifier(ccName); + if (constraintNameAlreadyExists(parentTable, constraintName)) { + String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), + parentTable.getTableName(), constraintName); + throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); + } } - return result; - } - private List listPrincipalAllPartitionColumnGrants(String principalName, - PrincipalType principalType) throws Exception { - LOG.debug("Executing listPrincipalAllTableColumnGrants"); + int enableValidateRely = (isEnable ? 4 : 0) + + (isValidate ? 2 : 0) + (isRely ? 1 : 0); + MConstraint muk = new MConstraint( + constraintName, + 1, + constraintType, // Not null constraint should reference a single column + null, + null, + enableValidateRely, + parentTable, + null, + parentCD, + null, + null, + parentIntegerIndex, + constraintValue); - Preconditions.checkState(this.currentTransaction.isActive()); + return muk; + } - try (Query query = pm.newQuery(MPartitionColumnPrivilege.class, "principalName == t1 && principalType == t2")) { - query.declareParameters("java.lang.String t1, java.lang.String t2"); - final List mSecurityColumnList = - (List) query.execute(principalName, principalType.toString()); + private List addDefaultConstraints(List dcs, boolean retrieveCD) + throws InvalidObjectException, MetaException { - pm.retrieveAll(mSecurityColumnList); - LOG.debug("Done retrieving all objects for listPrincipalAllTableColumnGrants"); + List cstrs = new ArrayList<>(); + for (SQLDefaultConstraint dc : dcs) { + final String catName = normalizeIdentifier(dc.getCatName()); + final String tableDB = normalizeIdentifier(dc.getTable_db()); + final String tableName = normalizeIdentifier(dc.getTable_name()); + final String columnName = normalizeIdentifier(dc.getColumn_name()); + final String dcName = dc.getDc_name(); + boolean isEnable = dc.isEnable_cstr(); + boolean isValidate = dc.isValidate_cstr(); + boolean isRely = dc.isRely_cstr(); + String constraintValue = dc.getDefault_value(); + MConstraint muk = addConstraint(catName, tableDB, tableName, columnName, dcName, isEnable, isRely, isValidate, + MConstraint.DEFAULT_CONSTRAINT, constraintValue, retrieveCD); + cstrs.add(muk); - return Collections.unmodifiableList(new ArrayList<>(mSecurityColumnList)); + // Add normalized identifier back to result + dc.setCatName(catName); + dc.setTable_db(tableDB); + dc.setTable_name(tableName); + dc.setColumn_name(columnName); + dc.setDc_name(muk.getConstraintName()); } + pm.makePersistentAll(cstrs); + return dcs; } - @Override - public boolean isPartitionMarkedForEvent(String catName, String dbName, String tblName, - Map partName, PartitionEventType evtType) throws UnknownTableException, - MetaException, InvalidPartitionException, UnknownPartitionException { - boolean success = false; - Query query = null; - - try { - LOG.debug("Begin Executing isPartitionMarkedForEvent"); - - openTransaction(); - query = pm.newQuery(MPartitionEvent.class, - "dbName == t1 && tblName == t2 && partName == t3 && eventType == t4 && catalogName == t5"); - query - .declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, int t4," + - "java.lang.String t5"); - Table tbl = getTable(catName, dbName, tblName, null); // Make sure dbName and tblName are valid. - if (null == tbl) { - throw new UnknownTableException("Table: " + tblName + " is not found."); - } - Collection partEvents = - (Collection) query.executeWithArray(dbName, tblName, - getPartitionStr(tbl, partName), evtType.getValue(), catName); - pm.retrieveAll(partEvents); - success = commitTransaction(); + private List addNotNullConstraints(List nns, boolean retrieveCD) + throws InvalidObjectException, MetaException { - LOG.debug("Done executing isPartitionMarkedForEvent"); - return partEvents != null && !partEvents.isEmpty(); - } finally { - rollbackAndCleanup(success, query); - } - } + List cstrs = new ArrayList<>(); + String constraintName; - @Override - public Table markPartitionForEvent(String catName, String dbName, String tblName, Map partName, - PartitionEventType evtType) throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { + for (SQLNotNullConstraint nn : nns) { + final String catName = normalizeIdentifier(nn.getCatName()); + final String tableDB = normalizeIdentifier(nn.getTable_db()); + final String tableName = normalizeIdentifier(nn.getTable_name()); + final String columnName = normalizeIdentifier(nn.getColumn_name()); - LOG.debug("Begin executing markPartitionForEvent"); - boolean success = false; - Table tbl = null; - try{ - openTransaction(); - tbl = getTable(catName, dbName, tblName, null); // Make sure dbName and tblName are valid. - if(null == tbl) { - throw new UnknownTableException("Table: "+ tblName + " is not found."); - } - pm.makePersistent(new MPartitionEvent(catName, dbName,tblName,getPartitionStr(tbl, partName), evtType.getValue())); - success = commitTransaction(); - LOG.debug("Done executing markPartitionForEvent"); - } finally { - rollbackAndCleanup(success, null); - } - return tbl; - } + // If retrieveCD is false, we do not need to do a deep retrieval of the Table Column Descriptor. + // For instance, this is the case when we are creating the table. + AttachedMTableInfo nParentTable = getMTable(catName, tableDB, tableName, retrieveCD); + MTable parentTable = nParentTable.mtbl; + if (parentTable == null) { + throw new InvalidObjectException("Parent table not found: " + tableName); + } - private String getPartitionStr(Table tbl, Map partName) throws InvalidPartitionException{ - if(tbl.getPartitionKeysSize() != partName.size()){ - throw new InvalidPartitionException("Number of partition columns in table: "+ tbl.getPartitionKeysSize() + - " doesn't match with number of supplied partition values: "+partName.size()); - } - final List storedVals = new ArrayList<>(tbl.getPartitionKeysSize()); - for(FieldSchema partKey : tbl.getPartitionKeys()){ - String partVal = partName.get(partKey.getName()); - if(null == partVal) { - throw new InvalidPartitionException("No value found for partition column: "+partKey.getName()); + MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD(); + int parentIntegerIndex = getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), columnName); + if (parentIntegerIndex == -1) { + if (parentTable.getPartitionKeys() != null) { + parentCD = null; + parentIntegerIndex = getColumnIndexFromTableColumns(parentTable.getPartitionKeys(), columnName); + } + if (parentIntegerIndex == -1) { + throw new InvalidObjectException("Parent column not found: " + columnName); + } + } + if (nn.getNn_name() == null) { + constraintName = generateConstraintName(parentTable, tableDB, tableName, columnName, "nn"); + } else { + constraintName = normalizeIdentifier(nn.getNn_name()); + if (constraintNameAlreadyExists(parentTable, constraintName)) { + String fqConstraintName = String.format("%s.%s.%s", parentTable.getDatabase().getName(), + parentTable.getTableName(), constraintName); + throw new InvalidObjectException("Constraint name already exists: " + fqConstraintName); + } } - storedVals.add(partVal); + + int enableValidateRely = (nn.isEnable_cstr() ? 4 : 0) + + (nn.isValidate_cstr() ? 2 : 0) + (nn.isRely_cstr() ? 1 : 0); + MConstraint muk = new MConstraint( + constraintName, + 1, + MConstraint.NOT_NULL_CONSTRAINT, // Not null constraint should reference a single column + null, + null, + enableValidateRely, + parentTable, + null, + parentCD, + null, + null, + parentIntegerIndex); + cstrs.add(muk); + // Add normalized identifier back to result + nn.setCatName(catName); + nn.setTable_db(tableDB); + nn.setTable_name(tableName); + nn.setColumn_name(columnName); + nn.setNn_name(constraintName); } - return join(storedVals,','); + pm.makePersistentAll(cstrs); + return nns; } private void writeMTableColumnStatistics(Table table, MTableColumnStatistics mStatsObj, @@ -9231,7 +3449,7 @@ public Map updateTableColumnStatistics(ColumnStatistics colStats long sleepInterval = MetastoreConf.getTimeVar(conf, ConfVars.METASTORE_S4U_NOWAIT_RETRY_SLEEP_INTERVAL, TimeUnit.MILLISECONDS); Map result = new RetryingExecutor<>(maxRetries, () -> { - Ref exceptionRef = new Ref<>(); + AtomicReference exceptionRef = new AtomicReference<>(); String savePoint = "uts_" + ThreadLocalRandom.current().nextInt(10000) + "_" + System.nanoTime(); setTransactionSavePoint(savePoint); executePlainSQL( @@ -9239,13 +3457,13 @@ public Map updateTableColumnStatistics(ColumnStatistics colStats true, exception -> { rollbackTransactionToSavePoint(savePoint); - exceptionRef.t = exception; + exceptionRef.set(exception); }); - if (exceptionRef.t != null) { - throw new RetryingExecutor.RetryException(exceptionRef.t); + if (exceptionRef.get() != null) { + throw new RetryingExecutor.RetryException(exceptionRef.get()); } pm.refresh(mTable); - Table table = convertToTable(mTable); + Table table = convertToTable(mTable, conf); List colNames = new ArrayList<>(); for (ColumnStatisticsObj statsObj : statsObjs) { colNames.add(statsObj.getColName()); @@ -9315,8 +3533,8 @@ public Map updatePartitionColumnStatistics(Table table, MTable m String catName = statsDesc.isSetCatName() ? statsDesc.getCatName() : getDefaultCatalog(conf); try { openTransaction(); - MPartition mPartition = getMPartition( - catName, statsDesc.getDbName(), statsDesc.getTableName(), partVals, mTable); + MPartition mPartition = + ensureGetMPartition(new TableName(catName, statsDesc.getDbName(), statsDesc.getTableName()), partVals); if (mPartition == null) { throw new NoSuchObjectException("Partition for which stats is gathered doesn't exist."); } @@ -9329,7 +3547,7 @@ public Map updatePartitionColumnStatistics(Table table, MTable m long sleepInterval = MetastoreConf.getTimeVar(conf, ConfVars.METASTORE_S4U_NOWAIT_RETRY_SLEEP_INTERVAL, TimeUnit.MILLISECONDS); Map result = new RetryingExecutor<>(maxRetries, () -> { - Ref exceptionRef = new Ref<>(); + AtomicReference exceptionRef = new AtomicReference<>(); String savePoint = "ups_" + ThreadLocalRandom.current().nextInt(10000) + "_" + System.nanoTime(); setTransactionSavePoint(savePoint); executePlainSQL(sqlGenerator.addForUpdateNoWait( @@ -9337,14 +3555,14 @@ public Map updatePartitionColumnStatistics(Table table, MTable m true, exception -> { rollbackTransactionToSavePoint(savePoint); - exceptionRef.t = exception; + exceptionRef.set(exception); }); - if (exceptionRef.t != null) { - throw new RetryingExecutor.RetryException(exceptionRef.t); + if (exceptionRef.get() != null) { + throw new RetryingExecutor.RetryException(exceptionRef.get()); } pm.refresh(mPartition); Partition partition = convertToPart(catName, statsDesc.getDbName(), statsDesc.getTableName(), - mPartition, TxnUtils.isAcidTable(table)); + mPartition, TxnUtils.isAcidTable(table), conf); Map oldStats = Maps.newHashMap(); List stats = getMPartitionColumnStatistics(table, Lists.newArrayList(statsDesc.getPartName()), colNames, colStats.getEngine()); @@ -9371,7 +3589,7 @@ public Map updatePartitionColumnStatistics(Table table, MTable m if (errorMsg != null) { throw new MetaException(errorMsg); } - if (!isCurrentStatsValidForTheQuery(mPartition, validWriteIds, true)) { + if (!isCurrentStatsValidForTheQuery(mPartition.getParameters(), mPartition.getWriteId(), validWriteIds, true)) { // Make sure we set the flag to invalid regardless of the current value. StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE); LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition: {}, {} ", @@ -9697,10 +3915,10 @@ public List getPartitionColumnStatistics( } else { // TODO: this could be improved to get partitions in bulk for (ColumnStatistics cs : allStats) { - MPartition mpart = getMPartition(catName, dbName, tableName, - Warehouse.getPartValuesFromPartName(cs.getStatsDesc().getPartName()), null); + MPartition mpart = ensureGetMPartition(new TableName(catName, dbName, tableName), + Warehouse.getPartValuesFromPartName(cs.getStatsDesc().getPartName())); if (mpart == null - || !isCurrentStatsValidForTheQuery(mpart, writeIdList, false)) { + || !isCurrentStatsValidForTheQuery(mpart.getParameters(), mpart.getWriteId(), writeIdList, false)) { if (mpart != null) { LOG.debug("The current metastore transactional partition column statistics for {}.{}.{} " + "(write ID {}) are not valid for current query ({} {})", dbName, tableName, @@ -9790,7 +4008,7 @@ public AggrStats get_aggr_stats_for(String catName, String dbName, String tblNam // checking isolation-level-compliance of each partition column stats. for (Partition part : parts) { - if (!isCurrentStatsValidForTheQuery(part, part.getWriteId(), writeIdList, false)) { + if (!isCurrentStatsValidForTheQuery(part.getParameters(), part.getWriteId(), writeIdList, false)) { String partName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues()); LOG.debug("The current metastore transactional partition column " + "statistics for {}.{}.{} is not valid for the current query", @@ -9920,16 +4138,6 @@ private List getMPartitionColumnStatistics(Table tab } } - private void dropPartitionColumnStatisticsNoTxn( - String catName, String dbName, String tableName, List partNames) { - Pair queryWithParams = makeQueryByPartitionNames( - catName, dbName, tableName, partNames, MPartitionColumnStatistics.class, - "partition.table.tableName", "partition.table.database.name", "partition.partitionName", "partition.table.database.catalogName"); - try (QueryWrapper wrapper = new QueryWrapper(queryWithParams.getLeft())) { - wrapper.deletePersistentAll(queryWithParams.getRight()); - } - } - @Override public void deleteAllPartitionColumnStatistics(TableName tn, String writeIdList) { @@ -10089,7 +4297,7 @@ public List run(List input) throws Exception { Batchable.runBatched(batchSize, partNames, new Batchable() { @Override public List run(List input) throws MetaException { - Pair> queryWithParams = getPartQueryWithParams(catalog, database, tableName, + Pair> queryWithParams = getPartQueryWithParams(pm, catalog, database, tableName, input); try (QueryWrapper qw = new QueryWrapper(queryWithParams.getLeft())) { qw.setResultClass(MPartition.class); @@ -10556,14 +4764,6 @@ public void setMetaStoreSchemaVersion(String schemaVersion, String comment) thro } } - @Override - public boolean doesPartitionExist(String catName, String dbName, String tableName, - List partKeys, List partVals) - throws MetaException { - String name = Warehouse.makePartName(partKeys, partVals); - return this.getMPartition(catName, dbName, tableName, name) != null; - } - private void debugLog(final String message) { if (LOG.isDebugEnabled()) { if (LOG.isTraceEnabled()) { @@ -10610,7 +4810,7 @@ private MFunction convertToMFunction(Function func) throws InvalidObjectExceptio MDatabase mdb = null; String catName = func.isSetCatName() ? func.getCatName() : getDefaultCatalog(conf); try { - mdb = getMDatabase(catName, func.getDbName()); + mdb = ensureGetMDatabase(catName, func.getDbName()); } catch (NoSuchObjectException e) { LOG.error("Database does not exist", e); throw new InvalidObjectException("Database " + func.getDbName() + " doesn't exist."); @@ -10847,7 +5047,7 @@ public void createOrUpdateStoredProcedure(StoredProcedure proc) throws NoSuchObj Query query = null; String catName = normalizeIdentifier(proc.getCatName()); String dbName = normalizeIdentifier(proc.getDbName()); - MDatabase db = getMDatabase(catName, dbName); + MDatabase db = ensureGetMDatabase(catName, dbName); try { openTransaction(); query = storedProcQuery(); @@ -10967,7 +5167,7 @@ public void addPackage(AddPackageRequest request) throws NoSuchObjectException, Query query = null; String catName = normalizeIdentifier(request.getCatName()); String dbName = normalizeIdentifier(request.getDbName()); - MDatabase db = getMDatabase(catName, dbName); + MDatabase db = ensureGetMDatabase(catName, dbName); try { openTransaction(); query = findPackageQuery(); @@ -11053,130 +5253,6 @@ private MPackage findMPackage(String catName, String db, String packageName) { return pkg; } - @Override - public NotificationEventResponse getNextNotification(NotificationEventRequest rqst) { - boolean commited = false; - Query query = null; - - NotificationEventResponse result = new NotificationEventResponse(); - result.setEvents(new ArrayList<>()); - try { - openTransaction(); - long lastEvent = rqst.getLastEvent(); - List parameterVals = new ArrayList<>(); - parameterVals.add(lastEvent); - // filterBuilder parameter is used for construction of conditional clause in the select query - StringBuilder filterBuilder = new StringBuilder("eventId > para" + parameterVals.size()); - // parameterBuilder parameter is used for specify what types of parameters will go into the filterBuilder - StringBuilder parameterBuilder = new StringBuilder("java.lang.Long para" + parameterVals.size()); - /* A fully constructed query would like: - -> filterBuilder: eventId > para0 && catalogName == para1 && dbName == para2 && (tableName == para3 - || tableName == para4) && eventType != para5 - -> parameterBuilder: java.lang.Long para0, java.lang.String para1, java.lang.String para2 - , java.lang.String para3, java.lang.String para4, java.lang.String para5 - */ - if (rqst.isSetCatName()) { - parameterVals.add(normalizeIdentifier(rqst.getCatName())); - parameterBuilder.append(", java.lang.String para" + parameterVals.size()); - filterBuilder.append(" && catalogName == para" + parameterVals.size()); - } - if (rqst.isSetDbName()) { - parameterVals.add(normalizeIdentifier(rqst.getDbName())); - parameterBuilder.append(", java.lang.String para" + parameterVals.size()); - filterBuilder.append(" && dbName == para" + parameterVals.size()); - } - if (rqst.isSetTableNames() && !rqst.getTableNames().isEmpty()) { - filterBuilder.append(" && ("); - for (String tableName : rqst.getTableNames()) { - parameterVals.add(normalizeIdentifier(tableName)); - parameterBuilder.append(", java.lang.String para" + parameterVals.size()); - filterBuilder.append("tableName == para" + parameterVals.size()+ " || "); - } - filterBuilder.setLength(filterBuilder.length() - 4); // remove the last " || " - filterBuilder.append(") "); - } - if (rqst.isSetEventTypeList()) { - filterBuilder.append(" && ("); - for (String eventType : rqst.getEventTypeList()) { - parameterVals.add(eventType); - parameterBuilder.append(", java.lang.String para" + parameterVals.size()); - filterBuilder.append("eventType == para" + parameterVals.size() + " || "); - } - filterBuilder.setLength(filterBuilder.length() - 4); // remove the last " || " - filterBuilder.append(") "); - } - if (rqst.isSetEventTypeSkipList()) { - for (String eventType : rqst.getEventTypeSkipList()) { - parameterVals.add(eventType); - parameterBuilder.append(", java.lang.String para" + parameterVals.size()); - filterBuilder.append(" && eventType != para" + parameterVals.size()); - } - } - query = pm.newQuery(MNotificationLog.class, filterBuilder.toString()); - query.declareParameters(parameterBuilder.toString()); - query.setOrdering("eventId ascending"); - int maxEventResponse = MetastoreConf.getIntVar(conf, ConfVars.METASTORE_MAX_EVENT_RESPONSE); - int maxEvents = (rqst.getMaxEvents() < maxEventResponse && rqst.getMaxEvents() > 0) ? rqst.getMaxEvents() : maxEventResponse; - query.setRange(0, maxEvents); - Collection events = - (Collection) query.executeWithArray(parameterVals.toArray(new Object[0])); - commited = commitTransaction(); - if (events == null) { - return result; - } - Iterator i = events.iterator(); - while (i.hasNext()) { - result.addToEvents(translateDbToThrift(i.next())); - } - return result; - } finally { - rollbackAndCleanup(commited, query); - } - } - - @Override - public void cleanWriteNotificationEvents(int olderThan) { - cleanOlderEvents(olderThan, MTxnWriteNotificationLog.class, "TxnWriteNotificationLog"); - } - - @Override - public List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException { - List writeEventInfoList = null; - boolean commited = false; - Query query = null; - try { - openTransaction(); - List parameterVals = new ArrayList<>(); - StringBuilder filterBuilder = new StringBuilder(" txnId == " + Long.toString(txnId)); - if (dbName != null && !"*".equals(dbName)) { // * means get all database, so no need to add filter - appendSimpleCondition(filterBuilder, "database", new String[]{dbName}, parameterVals); - } - if (tableName != null && !"*".equals(tableName)) { - appendSimpleCondition(filterBuilder, "table", new String[]{tableName}, parameterVals); - } - query = pm.newQuery(MTxnWriteNotificationLog.class, filterBuilder.toString()); - query.setOrdering("database,table ascending"); - List mplans = (List)query.executeWithArray( - parameterVals.toArray(new String[0])); - pm.retrieveAll(mplans); - commited = commitTransaction(); - if (mplans != null && mplans.size() > 0) { - writeEventInfoList = Lists.newArrayList(); - for (MTxnWriteNotificationLog mplan : mplans) { - WriteEventInfo writeEventInfo = new WriteEventInfo(mplan.getWriteId(), mplan.getDatabase(), - mplan.getTable(), mplan.getFiles()); - writeEventInfo.setPartition(mplan.getPartition()); - writeEventInfo.setPartitionObj(mplan.getPartObject()); - writeEventInfo.setTableObj(mplan.getTableObject()); - writeEventInfoList.add(writeEventInfo); - } - } - } finally { - rollbackAndCleanup(commited, query); - } - return writeEventInfoList; - } - private void executePlainSQL(String sql, boolean atLeastOneRecord, Consumer exceptionConsumer) @@ -11209,287 +5285,6 @@ private void executePlainSQL(String sql, } } - private void lockNotificationSequenceForUpdate() throws MetaException { - int maxRetries = - MetastoreConf.getIntVar(conf, ConfVars.NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES); - long sleepInterval = MetastoreConf.getTimeVar(conf, - ConfVars.NOTIFICATION_SEQUENCE_LOCK_RETRY_SLEEP_INTERVAL, TimeUnit.MILLISECONDS); - if (sqlGenerator.getDbProduct().isDERBY() && directSql != null) { - // Derby doesn't allow FOR UPDATE to lock the row being selected (See https://db.apache - // .org/derby/docs/10.1/ref/rrefsqlj31783.html) . So lock the whole table. Since there's - // only one row in the table, this shouldn't cause any performance degradation. - new RetryingExecutor(maxRetries, () -> { - directSql.lockDbTable("NOTIFICATION_SEQUENCE"); - return null; - }).commandName("lockNotificationSequenceForUpdate").sleepInterval(sleepInterval).run(); - } else { - String selectQuery = "select \"NEXT_EVENT_ID\" from \"NOTIFICATION_SEQUENCE\""; - String lockingQuery = sqlGenerator.addForUpdateClause(selectQuery); - new RetryingExecutor(maxRetries, () -> { - executePlainSQL(lockingQuery, false, null); - return null; - }).commandName("lockNotificationSequenceForUpdate").sleepInterval(sleepInterval).run(); - } - } - - @Override - public void addNotificationEvent(NotificationEvent entry) throws MetaException { - boolean commited = false; - Query query = null; - try { - pm.flush(); - openTransaction(); - lockNotificationSequenceForUpdate(); - query = pm.newQuery(MNotificationNextId.class); - Collection ids = (Collection) query.execute(); - MNotificationNextId mNotificationNextId = null; - boolean needToPersistId; - if (CollectionUtils.isEmpty(ids)) { - mNotificationNextId = new MNotificationNextId(1L); - needToPersistId = true; - } else { - mNotificationNextId = ids.iterator().next(); - needToPersistId = false; - } - entry.setEventId(mNotificationNextId.getNextEventId()); - mNotificationNextId.incrementEventId(); - if (needToPersistId) { - pm.makePersistent(mNotificationNextId); - } - pm.makePersistent(translateThriftToDb(entry)); - commited = commitTransaction(); - } catch (MetaException e) { - LOG.error("Couldn't get lock for update", e); - throw e; - } finally { - rollbackAndCleanup(commited, query); - } - } - - @Override - public void cleanNotificationEvents(int olderThan) { - cleanOlderEvents(olderThan, MNotificationLog.class, "NotificationLog"); - } - - private void cleanOlderEvents(int olderThan, Class table, String tableName) { - final int eventBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.EVENT_CLEAN_MAX_EVENTS); - - final long ageSec = olderThan; - final Instant now = Instant.now(); - - final int tooOld = Math.toIntExact(now.getEpochSecond() - ageSec); - - final Optional batchSize = (eventBatchSize > 0) ? Optional.of(eventBatchSize) : Optional.empty(); - - final long start = System.nanoTime(); - int deleteCount = doCleanNotificationEvents(tooOld, batchSize, table, tableName); - - if (deleteCount == 0) { - LOG.info("No {} events found to be cleaned with eventTime < {}", tableName, tooOld); - } else { - int batchCount = 0; - do { - batchCount = doCleanNotificationEvents(tooOld, batchSize, table, tableName); - deleteCount += batchCount; - } while (batchCount > 0); - } - - final long finish = System.nanoTime(); - - LOG.info("Deleted {} {} events older than epoch:{} in {}ms", deleteCount, tableName, tooOld, - TimeUnit.NANOSECONDS.toMillis(finish - start)); - } - - private int doCleanNotificationEvents(final int ageSec, final Optional batchSize, Class tableClass, String tableName) { - final Transaction tx = pm.currentTransaction(); - int eventsCount = 0; - - try { - String key = null; - tx.begin(); - - try (Query query = pm.newQuery(tableClass, "eventTime <= tooOld")) { - query.declareParameters("java.lang.Integer tooOld"); - if (MNotificationLog.class.equals(tableClass)) { - key = "eventId"; - } else if (MTxnWriteNotificationLog.class.equals(tableClass)) { - key = "txnId"; - } - query.setOrdering(key + " ascending"); - if (batchSize.isPresent()) { - query.setRange(0, batchSize.get()); - } - - List events = (List) query.execute(ageSec); - if (CollectionUtils.isNotEmpty(events)) { - eventsCount = events.size(); - if (LOG.isDebugEnabled()) { - int minEventTime, maxEventTime; - long minId, maxId; - T firstNotification = events.get(0); - T lastNotification = events.get(eventsCount - 1); - if (MNotificationLog.class.equals(tableClass)) { - minEventTime = ((MNotificationLog)firstNotification).getEventTime(); - minId = ((MNotificationLog)firstNotification).getEventId(); - maxEventTime = ((MNotificationLog)lastNotification).getEventTime(); - maxId = ((MNotificationLog)lastNotification).getEventId(); - } else if (MTxnWriteNotificationLog.class.equals(tableClass)) { - minEventTime = ((MTxnWriteNotificationLog)firstNotification).getEventTime(); - minId = ((MTxnWriteNotificationLog)firstNotification).getTxnId(); - maxEventTime = ((MTxnWriteNotificationLog)lastNotification).getEventTime(); - maxId = ((MTxnWriteNotificationLog)lastNotification).getTxnId(); - } else { - throw new RuntimeException("Cleaning of older " + tableName + " events failed. " + - "Reason: Unknown table encountered " + tableClass.getName()); - } - - LOG.debug( - "Remove {} batch of {} events with eventTime < {}, min {}: {}, max {}: {}, min eventTime {}, max eventTime {}", - tableName, eventsCount, ageSec, key, minId, key, maxId, minEventTime, maxEventTime); - } - - pm.deletePersistentAll(events); - } - } - - tx.commit(); - } catch (Exception e) { - LOG.error("Unable to delete batch of " + tableName + " events", e); - eventsCount = 0; - } finally { - if (tx.isActive()) { - tx.rollback(); - } - } - - return eventsCount; - } - - @Override - public CurrentNotificationEventId getCurrentNotificationEventId() { - boolean commited = false; - Query query = null; - try { - openTransaction(); - query = pm.newQuery(MNotificationNextId.class); - Collection ids = (Collection) query.execute(); - long id = 0; - if (CollectionUtils.isNotEmpty(ids)) { - id = ids.iterator().next().getNextEventId() - 1; - } - commited = commitTransaction(); - return new CurrentNotificationEventId(id); - } finally { - rollbackAndCleanup(commited, query); - } - } - - @Override - public NotificationEventsCountResponse getNotificationEventsCount(NotificationEventsCountRequest rqst) { - Long result = 0L; - boolean commited = false; - Query query = null; - try { - openTransaction(); - long fromEventId = rqst.getFromEventId(); - String inputDbName = rqst.getDbName(); - String catName = rqst.isSetCatName() ? rqst.getCatName() : getDefaultCatalog(conf); - long toEventId; - String paramSpecs; - List paramVals = new ArrayList<>(); - - // We store a catalog name in lower case in metastore and also use the same way everywhere in - // hive. - assert catName.equals(catName.toLowerCase()); - - // Build the query to count events, part by part - String queryStr = "select count(eventId) from " + MNotificationLog.class.getName(); - // count fromEventId onwards events - queryStr = queryStr + " where eventId > fromEventId"; - paramSpecs = "java.lang.Long fromEventId"; - paramVals.add(Long.valueOf(fromEventId)); - - // Input database name can be a database name or a *. In the first case we add a filter - // condition on dbName column, but not in the second case, since a * means all the - // databases. In case we support more elaborate database name patterns in future, we will - // have to apply a method similar to getNextNotification() method of MetaStoreClient. - if (!inputDbName.equals("*")) { - // dbName could be NULL in case of transaction related events, which also need to be - // counted. - queryStr = queryStr + " && (dbName == inputDbName || dbName == null)"; - paramSpecs = paramSpecs + ", java.lang.String inputDbName"; - // We store a database name in lower case in metastore. - paramVals.add(inputDbName.toLowerCase()); - } - - // catName could be NULL in case of transaction related events, which also need to be - // counted. - queryStr = queryStr + " && (catalogName == catName || catalogName == null)"; - paramSpecs = paramSpecs +", java.lang.String catName"; - paramVals.add(catName); - - // count events upto toEventId if specified - if (rqst.isSetToEventId()) { - toEventId = rqst.getToEventId(); - queryStr = queryStr + " && eventId <= toEventId"; - paramSpecs = paramSpecs + ", java.lang.Long toEventId"; - paramVals.add(Long.valueOf(toEventId)); - } - // Specify list of table names in the query string and parameter types - if (rqst.isSetTableNames() && !rqst.getTableNames().isEmpty()) { - queryStr = queryStr + " && ("; - for (String tableName : rqst.getTableNames()) { - paramVals.add(tableName.toLowerCase()); - queryStr = queryStr + "tableName == tableName" + paramVals.size() + " || "; - paramSpecs = paramSpecs + ", java.lang.String tableName" + paramVals.size(); - } - queryStr = queryStr.substring(0, queryStr.length() - 4); // remove the last " || " - queryStr += ")"; - } - - query = pm.newQuery(queryStr); - query.declareParameters(paramSpecs); - result = (Long) query.executeWithArray(paramVals.toArray()); - commited = commitTransaction(); - - // Cap the event count by limit if specified. - long eventCount = result.longValue(); - if (rqst.isSetLimit() && eventCount > rqst.getLimit()) { - eventCount = rqst.getLimit(); - } - - return new NotificationEventsCountResponse(eventCount); - } finally { - rollbackAndCleanup(commited, query); - } - } - - private MNotificationLog translateThriftToDb(NotificationEvent entry) { - MNotificationLog dbEntry = new MNotificationLog(); - dbEntry.setEventId(entry.getEventId()); - dbEntry.setEventTime(entry.getEventTime()); - dbEntry.setEventType(entry.getEventType()); - dbEntry.setCatalogName(entry.isSetCatName() ? entry.getCatName() : getDefaultCatalog(conf)); - dbEntry.setDbName(entry.getDbName()); - dbEntry.setTableName(entry.getTableName()); - dbEntry.setMessage(entry.getMessage()); - dbEntry.setMessageFormat(entry.getMessageFormat()); - return dbEntry; - } - - private NotificationEvent translateDbToThrift(MNotificationLog dbEvent) { - NotificationEvent event = new NotificationEvent(); - event.setEventId(dbEvent.getEventId()); - event.setEventTime(dbEvent.getEventTime()); - event.setEventType(dbEvent.getEventType()); - event.setCatName(dbEvent.getCatalogName()); - event.setDbName(dbEvent.getDbName()); - event.setTableName(dbEvent.getTableName()); - event.setMessage((dbEvent.getMessage())); - event.setMessageFormat(dbEvent.getMessageFormat()); - return event; - } - @Override public List getPrimaryKeys(PrimaryKeysRequest request) throws MetaException { try { @@ -12371,7 +6166,7 @@ public SerDeInfo getSerDeInfo(String serDeName) throws NoSuchObjectException, Me if (mSerDeInfo == null) { throw new NoSuchObjectException("No SerDe named " + serDeName); } - SerDeInfo serde = convertToSerDeInfo(mSerDeInfo, false); + SerDeInfo serde = convertToSerDeInfo(mSerDeInfo, conf, false); committed = commitTransaction(); return serde; } finally { @@ -12409,7 +6204,7 @@ public void addSerde(SerDeInfo serde) throws AlreadyExistsException, MetaExcepti private MISchema convertToMISchema(ISchema schema) throws NoSuchObjectException { return new MISchema(schema.getSchemaType().getValue(), normalizeIdentifier(schema.getName()), - getMDatabase(schema.getCatName(), schema.getDbName()), + ensureGetMDatabase(schema.getCatName(), schema.getDbName()), schema.getCompatibility().getValue(), schema.getValidationLevel().getValue(), schema.isCanEvolve(), @@ -12479,7 +6274,7 @@ private SchemaVersion convertToSchemaVersion(MSchemaVersion mSchemaVersion) thro schemaVersion.setName(mSchemaVersion.getName()); } if (mSchemaVersion.getSerDe() != null) { - schemaVersion.setSerDe(convertToSerDeInfo(mSchemaVersion.getSerDe(), false)); + schemaVersion.setSerDe(convertToSerDeInfo(mSchemaVersion.getSerDe(), conf, false)); } return schemaVersion; } @@ -13698,24 +7493,8 @@ private boolean isCurrentStatsValidForTheQuery(MTable tbl, String queryValidWrit * the conjunction of the following two are true: * ~ COLUMN_STATE_ACCURATE(CSA) state is true * ~ Isolation-level (snapshot) compliant with the query - * @param part MPartition of the stats entity * @param queryValidWriteIdList valid writeId list of the query - * @Precondition "part" should be retrieved from the PARTITIONS table. */ - private boolean isCurrentStatsValidForTheQuery(MPartition part, - String queryValidWriteIdList, boolean isCompleteStatsWriter) - throws MetaException { - return isCurrentStatsValidForTheQuery(part.getParameters(), part.getWriteId(), - queryValidWriteIdList, isCompleteStatsWriter); - } - - private boolean isCurrentStatsValidForTheQuery(Partition part, long partWriteId, - String queryValidWriteIdList, boolean isCompleteStatsWriter) - throws MetaException { - return isCurrentStatsValidForTheQuery(part.getParameters(), partWriteId, - queryValidWriteIdList, isCompleteStatsWriter); - } - // TODO: move to somewhere else public static boolean isCurrentStatsValidForTheQuery( Map statsParams, long statsWriteId, String queryValidWriteIdList, diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java index b76082a52ae0..fbece9c199a7 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java @@ -24,7 +24,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -38,7 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** @@ -58,9 +56,6 @@ public class PartitionManagementTask implements MetastoreTaskThread { public static final String DISCOVER_PARTITIONS_TBLPROPERTY = "discover.partitions"; public static final String PARTITION_RETENTION_PERIOD_TBLPROPERTY = "partition.retention.period"; private static final Lock lock = new ReentrantLock(); - // these are just for testing - private static final AtomicInteger completedAttempts = new AtomicInteger(); - private static final AtomicInteger skippedAttempts = new AtomicInteger(); private Configuration conf; @@ -136,10 +131,8 @@ public void run() { } lock.unlock(); } - completedAttempts.incrementAndGet(); } else { - int skipped = skippedAttempts.incrementAndGet(); - LOG.info("Lock is held by some other partition discovery task. Skipping this attempt..#{}", skipped); + LOG.info("Lock is held by some other partition discovery task. Skipping this attempt."); } } @@ -200,13 +193,4 @@ public void run() { } } - @VisibleForTesting - public static int getSkippedAttempts() { - return skippedAttempts.get(); - } - - @VisibleForTesting - public static int getCompletedAttempts() { - return completedAttempts.get(); - } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index 9ac7f921e077..4fae846e5455 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -23,6 +23,8 @@ import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -116,11 +118,22 @@ import org.apache.hadoop.hive.metastore.api.WMValidateResourcePlanResponse; import org.apache.hadoop.hive.metastore.api.WriteEventInfo; import org.apache.hadoop.hive.metastore.client.builder.GetPartitionsArgs; +import org.apache.hadoop.hive.metastore.model.MDatabase; +import org.apache.hadoop.hive.metastore.model.MPartition; import org.apache.hadoop.hive.metastore.model.MTable; import org.apache.hadoop.hive.metastore.properties.PropertyStore; +import org.apache.hadoop.hive.metastore.metastore.iface.NotificationStore; +import org.apache.hadoop.hive.metastore.metastore.iface.PrivilegeStore; +import org.apache.hadoop.hive.metastore.metastore.iface.TableStore; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.thrift.TException; +/** + * NOTE: Please don't add new methods into this class if we have the corresponding iface in metastore package, + * use "rawstore.unwrap(iface).newMethod(...)" to call the new method instead. + * In the future, this RawStore will only act as a bridge over different ifaces defined in metastore package. + */ public interface RawStore extends Configurable { /*** * Annotation to skip retries @@ -320,8 +333,10 @@ boolean alterDataConnector(String dcName, DataConnector connector) boolean dropType(String typeName); - void createTable(Table tbl) throws InvalidObjectException, - MetaException; + default void createTable(Table tbl) throws InvalidObjectException, + MetaException { + unwrap(TableStore.class).createTable(tbl); + } /** * Drop a table. @@ -334,8 +349,10 @@ void createTable(Table tbl) throws InvalidObjectException, * @throws InvalidObjectException Don't think this is ever actually thrown * @throws InvalidInputException Don't think this is ever actually thrown */ - boolean dropTable(String catalogName, String dbName, String tableName) - throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException; + default boolean dropTable(String catalogName, String dbName, String tableName) + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { + return unwrap(TableStore.class).dropTable(new TableName(catalogName, dbName, tableName)); + } /** * Drop all partitions from the table, and return the partition's location that not a child of baseLocationToNotShow, @@ -347,8 +364,10 @@ boolean dropTable(String catalogName, String dbName, String tableName) * @throws MetaException something went wrong, usually in the RDBMS or storage * @throws InvalidInputException unable to drop all partitions due to the invalid input */ - List dropAllPartitionsAndGetLocations(TableName table, String baseLocationToNotShow, AtomicReference message) - throws MetaException, InvalidInputException, NoSuchObjectException, InvalidObjectException; + default List dropAllPartitionsAndGetLocations(TableName table, String baseLocationToNotShow, AtomicReference message) + throws MetaException, InvalidInputException, NoSuchObjectException, InvalidObjectException { + return unwrap(TableStore.class).dropAllPartitionsAndGetLocations(table, baseLocationToNotShow, message); + } /** * Get a table object. @@ -359,7 +378,9 @@ List dropAllPartitionsAndGetLocations(TableName table, String baseLocati * consistently returned null or consistently threw NoSuchObjectException). * @throws MetaException something went wrong in the RDBMS */ - Table getTable(String catalogName, String dbName, String tableName) throws MetaException; + default Table getTable(String catalogName, String dbName, String tableName) throws MetaException { + return getTable(catalogName, dbName, tableName, null, -1); + } /** * Get a table object. @@ -371,8 +392,10 @@ List dropAllPartitionsAndGetLocations(TableName table, String baseLocati * consistently returned null or consistently threw NoSuchObjectException). * @throws MetaException something went wrong in the RDBMS */ - Table getTable(String catalogName, String dbName, String tableName, - String writeIdList) throws MetaException; + default Table getTable(String catalogName, String dbName, String tableName, + String writeIdList) throws MetaException { + return getTable(catalogName, dbName, tableName, writeIdList, -1); + } /** * Get a table object. @@ -384,8 +407,10 @@ Table getTable(String catalogName, String dbName, String tableName, * consistently returned null or consistently threw NoSuchObjectException). * @throws MetaException something went wrong in the RDBMS */ - Table getTable(String catalogName, String dbName, String tableName, - String writeIdList, long tableId) throws MetaException; + default Table getTable(String catalogName, String dbName, String tableName, + String writeIdList, long tableId) throws MetaException { + return unwrap(TableStore.class).getTable(new TableName(catalogName, dbName, tableName), writeIdList, tableId); + } /** * Add a partition. @@ -394,8 +419,13 @@ Table getTable(String catalogName, String dbName, String tableName, * @throws InvalidObjectException the provided partition object is not valid. * @throws MetaException error writing to the RDBMS. */ - boolean addPartition(Partition part) - throws InvalidObjectException, MetaException; + default boolean addPartition(Partition part) + throws InvalidObjectException, MetaException { + String catName = part.getCatName() == null ? + MetaStoreUtils.getDefaultCatalog(getConf()) : part.getCatName(); + return unwrap(TableStore.class) + .addPartitions(new TableName(catName, part.getDbName(), part.getTableName()), Arrays.asList(part)); + } /** * Add a list of partitions to a table. @@ -408,8 +438,10 @@ boolean addPartition(Partition part) * @throws MetaException the partitions don't belong to the indicated table or error writing to * the RDBMS. */ - boolean addPartitions(String catName, String dbName, String tblName, List parts) - throws InvalidObjectException, MetaException; + default boolean addPartitions(String catName, String dbName, String tblName, List parts) + throws InvalidObjectException, MetaException { + return unwrap(TableStore.class).addPartitions(new TableName(catName, dbName, tblName), parts); + } /** * Get a partition. @@ -421,8 +453,10 @@ boolean addPartitions(String catName, String dbName, String tblName, List part_vals) throws MetaException, NoSuchObjectException; + default Partition getPartition(String catName, String dbName, String tableName, + List part_vals) throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getPartition(new TableName(catName, dbName, tableName), part_vals, null); + } /** * Get a partition. * @param catName catalog name. @@ -434,10 +468,12 @@ Partition getPartition(String catName, String dbName, String tableName, * @throws MetaException error reading from RDBMS. * @throws NoSuchObjectException no partition matching this specification exists. */ - Partition getPartition(String catName, String dbName, String tableName, + default Partition getPartition(String catName, String dbName, String tableName, List part_vals, String writeIdList) - throws MetaException, NoSuchObjectException; + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getPartition(new TableName(catName, dbName, tableName), part_vals, writeIdList); + } /** * Check whether a partition exists. @@ -450,9 +486,12 @@ Partition getPartition(String catName, String dbName, String tableName, * @throws MetaException failure reading RDBMS * @throws NoSuchObjectException this is never thrown. */ - boolean doesPartitionExist(String catName, String dbName, String tableName, + @Deprecated + default boolean doesPartitionExist(String catName, String dbName, String tableName, List partKeys, List part_vals) - throws MetaException, NoSuchObjectException; + throws MetaException, NoSuchObjectException { + throw new UnsupportedOperationException(); + } /** * Drop a partition. @@ -466,8 +505,10 @@ boolean doesPartitionExist(String catName, String dbName, String tableName, * @throws InvalidObjectException error dropping the statistics for the partition * @throws InvalidInputException error dropping the statistics for the partition */ - boolean dropPartition(String catName, String dbName, String tableName, String partName) - throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException; + default boolean dropPartition(String catName, String dbName, String tableName, String partName) + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { + return unwrap(TableStore.class).dropPartitions(new TableName(catName, dbName, tableName), Arrays.asList(partName)); + } /** * Get some or all partitions for a table. @@ -479,8 +520,10 @@ boolean dropPartition(String catName, String dbName, String tableName, String pa * @throws MetaException error access the RDBMS. * @throws NoSuchObjectException no such table exists */ - List getPartitions(String catName, String dbName, String tableName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException; + default List getPartitions(String catName, String dbName, String tableName, + GetPartitionsArgs args) throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getPartitions(new TableName(catName, dbName, tableName), args); + } /** * Get the location for every partition of a given table. If a partition location is a child of @@ -494,8 +537,10 @@ List getPartitions(String catName, String dbName, String tableName, * @param max The maximum number of partition locations returned, or -1 for all * @return The map of the partitionName, location pairs */ - Map getPartitionLocations(String catName, String dbName, String tblName, - String baseLocationToNotShow, int max); + default Map getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return unwrap(TableStore.class).getPartitionLocations(new TableName(catName, dbName, tblName), baseLocationToNotShow, max); + } /** * Alter a table. @@ -508,9 +553,11 @@ Map getPartitionLocations(String catName, String dbName, String * @throws InvalidObjectException The new table object is invalid. * @throws MetaException something went wrong, usually in the RDBMS or storage. */ - Table alterTable(String catName, String dbname, String name, Table newTable, + default Table alterTable(String catName, String dbname, String name, Table newTable, String queryValidWriteIds) - throws InvalidObjectException, MetaException; + throws InvalidObjectException, MetaException { + return unwrap(TableStore.class).alterTable(new TableName(catName, dbname, name), newTable, queryValidWriteIds); + } /** * Update creation metadata for a materialized view. @@ -520,8 +567,10 @@ Table alterTable(String catName, String dbname, String name, Table newTable, * @param cm new creation metadata * @throws MetaException error accessing the RDBMS. */ - void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) - throws MetaException; + default void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) + throws MetaException { + unwrap(TableStore.class).updateCreationMetadata(new TableName(catName, dbname, tablename), cm); + } /** * Get table names that match a pattern. @@ -531,8 +580,10 @@ void updateCreationMetadata(String catName, String dbname, String tablename, Cre * @return list of table names, if any * @throws MetaException failure in querying the RDBMS */ - List getTables(String catName, String dbName, String pattern) - throws MetaException; + default List getTables(String catName, String dbName, String pattern) + throws MetaException { + return unwrap(TableStore.class).getTables(catName, dbName, pattern, null, -1); + } /** * Get table names that match a pattern. @@ -544,16 +595,20 @@ List getTables(String catName, String dbName, String pattern) * @return list of table names, if any * @throws MetaException failure in querying the RDBMS */ - List getTables(String catName, String dbName, String pattern, TableType tableType, int limit) - throws MetaException; + default List getTables(String catName, String dbName, String pattern, TableType tableType, int limit) + throws MetaException { + return unwrap(TableStore.class).getTables(catName, dbName, pattern, tableType, limit); + } /** * Retrieve all materialized views. * @return all materialized views in a catalog * @throws MetaException error querying the RDBMS */ - List
getAllMaterializedViewObjectsForRewriting(String catName) - throws MetaException; + default List
getAllMaterializedViewObjectsForRewriting(String catName) + throws MetaException { + return unwrap(TableStore.class).getAllMaterializedViewObjectsForRewriting(catName); + } /** * Get list of materialized views in a database. @@ -563,8 +618,10 @@ List
getAllMaterializedViewObjectsForRewriting(String catName) * @throws MetaException error querying the RDBMS * @throws NoSuchObjectException no such database */ - List getMaterializedViewsForRewriting(String catName, String dbName) - throws MetaException, NoSuchObjectException; + default List getMaterializedViewsForRewriting(String catName, String dbName) + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getMaterializedViewsForRewriting(catName, dbName); + } /** @@ -575,8 +632,10 @@ List getMaterializedViewsForRewriting(String catName, String dbName) * @return list of matching table meta information. * @throws MetaException failure in querying the RDBMS. */ - List getTableMeta(String catName, String dbNames, String tableNames, - List tableTypes) throws MetaException; + default List getTableMeta(String catName, String dbNames, String tableNames, + List tableTypes) throws MetaException { + return unwrap(TableStore.class).getTableMeta(catName, dbNames, tableNames, tableTypes); + } /** * @param catName catalog name @@ -589,8 +648,10 @@ List getTableMeta(String catName, String dbNames, String tableNames, * If there are duplicate names, only one instance of the table will be returned * @throws MetaException failure in querying the RDBMS. */ - List
getTableObjectsByName(String catName, String dbname, List tableNames) - throws MetaException, UnknownDBException; + default List
getTableObjectsByName(String catName, String dbname, List tableNames) + throws MetaException, UnknownDBException { + return unwrap(TableStore.class).getTableObjectsByName(catName, dbname, tableNames, null, null); + } /** * Multi-table table-parameter update. @@ -612,8 +673,10 @@ List
getTableObjectsByName(String catName, String dbname, List ta * If there are duplicate names, only one instance of the table will be returned * @throws MetaException failure in querying the RDBMS. */ - List
getTableObjectsByName(String catName, String dbname, List tableNames, - GetProjectionsSpec projectionSpec, String tablePattern) throws MetaException, UnknownDBException; + default List
getTableObjectsByName(String catName, String dbname, List tableNames, + GetProjectionsSpec projectionSpec, String tablePattern) throws MetaException, UnknownDBException { + return unwrap(TableStore.class).getTableObjectsByName(catName, dbname, tableNames, projectionSpec, tablePattern); + } /** * Get all tables in a database. @@ -622,7 +685,9 @@ List
getTableObjectsByName(String catName, String dbname, List ta * @return list of table names * @throws MetaException failure in querying the RDBMS. */ - List getAllTables(String catName, String dbName) throws MetaException; + default List getAllTables(String catName, String dbName) throws MetaException { + return unwrap(TableStore.class).getTables(catName, dbName, null, null, -1); + } /** * Gets a list of tables based on a filter string and filter type. @@ -637,8 +702,10 @@ List
getTableObjectsByName(String catName, String dbname, List ta * @throws MetaException * @throws UnknownDBException */ - List listTableNamesByFilter(String catName, String dbName, String filter, - short max_tables) throws MetaException, UnknownDBException; + default List listTableNamesByFilter(String catName, String dbName, String filter, + short max_tables) throws MetaException, UnknownDBException { + return unwrap(TableStore.class).listTableNamesByFilter(catName, dbName, filter, max_tables); + } /** * Get a partial or complete list of names for partitions of a table. @@ -649,8 +716,17 @@ List listTableNamesByFilter(String catName, String dbName, String filter * @return list of partition names. * @throws MetaException there was an error accessing the RDBMS */ - List listPartitionNames(String catName, String db_name, - String tbl_name, short max_parts) throws MetaException; + default List listPartitionNames(String catName, String db_name, + String tbl_name, short max_parts) throws MetaException { + try { + return unwrap(TableStore.class).listPartitionNames(new TableName(catName, db_name, tbl_name), + null, null, null, max_parts); + } catch (NoSuchObjectException nse) { + // In case of NoSuchObjectException, this method returns an empty list to + // take care of the old clients. + return Collections.emptyList(); + } + } /** * Get a partial or complete list of names for partitions of a table. @@ -664,9 +740,12 @@ List listPartitionNames(String catName, String db_name, * @return list of partition names. * @throws MetaException there was an error accessing the RDBMS */ - List listPartitionNames(String catName, String dbName, String tblName, + default List listPartitionNames(String catName, String dbName, String tblName, String defaultPartName, byte[] exprBytes, String order, - int maxParts) throws MetaException, NoSuchObjectException; + int maxParts) throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).listPartitionNames(new TableName(catName, dbName, tblName), + defaultPartName, exprBytes, order, maxParts); + } /** * Get partition names with a filter. This is a portion of the SQL where clause. @@ -678,8 +757,10 @@ List listPartitionNames(String catName, String dbName, String tblName, * @throws MetaException Error accessing the RDBMS or processing the filter. * @throws NoSuchObjectException no such table. */ - List listPartitionNamesByFilter(String catName, String dbName, String tblName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException; + default List listPartitionNamesByFilter(String catName, String dbName, String tblName, + GetPartitionsArgs args) throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).listPartitionNamesByFilter(new TableName(catName, dbName, tblName), args); + } /** * Get a list of partition values as one big struct. @@ -695,9 +776,12 @@ List listPartitionNamesByFilter(String catName, String dbName, String tb * @return struct with all of the partition value information * @throws MetaException error access the RDBMS */ - PartitionValuesResponse listPartitionValues(String catName, String db_name, String tbl_name, + default PartitionValuesResponse listPartitionValues(String catName, String db_name, String tbl_name, List cols, boolean applyDistinct, String filter, boolean ascending, - List order, long maxParts) throws MetaException; + List order, long maxParts) throws MetaException { + return unwrap(TableStore.class).listPartitionValues(new TableName(catName, db_name, tbl_name), + cols, applyDistinct, filter, ascending, order, maxParts); + } /** * Alter a partition. @@ -711,9 +795,11 @@ PartitionValuesResponse listPartitionValues(String catName, String db_name, Stri * @throws InvalidObjectException No such partition. * @throws MetaException error accessing the RDBMS. */ - Partition alterPartition(String catName, String db_name, String tbl_name, List part_vals, + default Partition alterPartition(String catName, String db_name, String tbl_name, List part_vals, Partition new_part, String queryValidWriteIds) - throws InvalidObjectException, MetaException; + throws InvalidObjectException, MetaException { + return unwrap(TableStore.class).alterPartition(new TableName(catName, db_name, tbl_name), part_vals, new_part, queryValidWriteIds); + } /** * Alter a set of partitions. @@ -731,10 +817,13 @@ Partition alterPartition(String catName, String db_name, String tbl_name, List alterPartitions(String catName, String db_name, String tbl_name, + default List alterPartitions(String catName, String db_name, String tbl_name, List> part_vals_list, List new_parts, long writeId, String queryValidWriteIds) - throws InvalidObjectException, MetaException; + throws InvalidObjectException, MetaException { + return unwrap(TableStore.class).alterPartitions(new TableName(catName, db_name, tbl_name), + part_vals_list, new_parts, writeId, queryValidWriteIds); + } /** * Get partitions with a filter. This is a portion of the SQL where clause. @@ -746,9 +835,11 @@ List alterPartitions(String catName, String db_name, String tbl_name, * @throws MetaException Error accessing the RDBMS or processing the filter. * @throws NoSuchObjectException no such table. */ - List getPartitionsByFilter( + default List getPartitionsByFilter( String catName, String dbName, String tblName, GetPartitionsArgs args) - throws MetaException, NoSuchObjectException; + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getPartitionsByFilter(new TableName(catName, dbName, tblName), args); + } /** * Generic Partition request API, providing different kinds of filtering and controlling output. @@ -778,9 +869,11 @@ List getPartitionsByFilter( * @throws MetaException in case of errors * @throws NoSuchObjectException when table isn't found */ - List getPartitionSpecsByFilterAndProjection(Table table, + default List getPartitionSpecsByFilterAndProjection(Table table, GetProjectionsSpec projectionSpec, GetPartitionsFilterSpec filterSpec) - throws MetaException, NoSuchObjectException; + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getPartitionSpecsByFilterAndProjection(table, projectionSpec, filterSpec); + } /** * Get partitions using an already parsed expression. @@ -791,9 +884,11 @@ List getPartitionSpecsByFilterAndProjection(Table table, * @return true if the result contains unknown partitions. * @throws TException error executing the expression */ - boolean getPartitionsByExpr(String catName, String dbName, String tblName, + default boolean getPartitionsByExpr(String catName, String dbName, String tblName, List result, GetPartitionsArgs args) - throws TException; + throws TException { + return unwrap(TableStore.class).getPartitionsByExpr(new TableName(catName, dbName, tblName), result, args); + } /** * Get the number of partitions that match a provided SQL filter. @@ -805,8 +900,10 @@ boolean getPartitionsByExpr(String catName, String dbName, String tblName, * @throws MetaException error accessing the RDBMS or executing the filter * @throws NoSuchObjectException no such table */ - int getNumPartitionsByFilter(String catName, String dbName, String tblName, String filter) - throws MetaException, NoSuchObjectException; + default int getNumPartitionsByFilter(String catName, String dbName, String tblName, String filter) + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getNumPartitionsByFilter(new TableName(catName, dbName, tblName), filter); + } /** * Get the number of partitions that match a given partial specification. @@ -819,8 +916,10 @@ int getNumPartitionsByFilter(String catName, String dbName, String tblName, Stri * @throws MetaException error accessing the RDBMS or working with the specification. * @throws NoSuchObjectException no such table. */ - int getNumPartitionsByPs(String catName, String dbName, String tblName, List partVals) - throws MetaException, NoSuchObjectException; + default int getNumPartitionsByPs(String catName, String dbName, String tblName, List partVals) + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getNumPartitionsByPs(new TableName(catName, dbName, tblName), partVals); + } /** * Get partitions by name. @@ -849,27 +948,45 @@ default List getPartitionsByNames(String catName, String dbName, Stri * @throws MetaException error accessing the RDBMS. * @throws NoSuchObjectException No such table. */ - List getPartitionsByNames(String catName, String dbName, String tblName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException; + default List getPartitionsByNames(String catName, String dbName, String tblName, + GetPartitionsArgs args) throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).getPartitionsByNames(new TableName(catName, dbName, tblName), args); + } - Table markPartitionForEvent(String catName, String dbName, String tblName, Map partVals, PartitionEventType evtType) throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException; + default Table markPartitionForEvent(String catName, String dbName, String tblName, Map partVals, PartitionEventType evtType) + throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { + return unwrap(TableStore.class).markPartitionForEvent(new TableName(catName, dbName, tblName), partVals, evtType); + } - boolean isPartitionMarkedForEvent(String catName, String dbName, String tblName, Map partName, PartitionEventType evtType) throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException; + default boolean isPartitionMarkedForEvent(String catName, String dbName, String tblName, Map partName, PartitionEventType evtType) + throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { + return unwrap(TableStore.class).isPartitionMarkedForEvent(new TableName(catName, dbName, tblName), partName, evtType); + } - boolean addRole(String rowName, String ownerName) - throws InvalidObjectException, MetaException, NoSuchObjectException; + default boolean addRole(String rowName, String ownerName) + throws InvalidObjectException, MetaException, NoSuchObjectException { + return unwrap(PrivilegeStore.class).addRole(rowName, ownerName); + } - boolean removeRole(String roleName) throws MetaException, NoSuchObjectException; + default boolean removeRole(String roleName) throws MetaException, NoSuchObjectException { + return unwrap(PrivilegeStore.class).removeRole(roleName); + } - boolean grantRole(Role role, String userName, PrincipalType principalType, + default boolean grantRole(Role role, String userName, PrincipalType principalType, String grantor, PrincipalType grantorType, boolean grantOption) - throws MetaException, NoSuchObjectException, InvalidObjectException; + throws MetaException, NoSuchObjectException, InvalidObjectException { + return unwrap(PrivilegeStore.class).grantRole(role, userName, principalType, grantor, grantorType, grantOption); + } - boolean revokeRole(Role role, String userName, PrincipalType principalType, - boolean grantOption) throws MetaException, NoSuchObjectException; + default boolean revokeRole(Role role, String userName, PrincipalType principalType, + boolean grantOption) throws MetaException, NoSuchObjectException { + return unwrap(PrivilegeStore.class).revokeRole(role, userName, principalType, grantOption); + } - PrincipalPrivilegeSet getUserPrivilegeSet(String userName, - List groupNames) throws InvalidObjectException, MetaException; + default PrincipalPrivilegeSet getUserPrivilegeSet(String userName, + List groupNames) throws InvalidObjectException, MetaException { + return unwrap(PrivilegeStore.class).getUserPrivilegeSet(userName, groupNames); + } /** * Get privileges for a database for a user. @@ -881,8 +998,10 @@ PrincipalPrivilegeSet getUserPrivilegeSet(String userName, * @throws InvalidObjectException no such database * @throws MetaException error accessing the RDBMS */ - PrincipalPrivilegeSet getDBPrivilegeSet (String catName, String dbName, String userName, - List groupNames) throws InvalidObjectException, MetaException; + default PrincipalPrivilegeSet getDBPrivilegeSet (String catName, String dbName, String userName, + List groupNames) throws InvalidObjectException, MetaException { + return unwrap(PrivilegeStore.class).getDBPrivilegeSet(catName, dbName, userName, groupNames); + } /** * Get privileges for a connector for a user. @@ -894,8 +1013,10 @@ PrincipalPrivilegeSet getDBPrivilegeSet (String catName, String dbName, String u * @throws InvalidObjectException no such database * @throws MetaException error accessing the RDBMS */ - PrincipalPrivilegeSet getConnectorPrivilegeSet (String catName, String connectorName, String userName, - List groupNames) throws InvalidObjectException, MetaException; + default PrincipalPrivilegeSet getConnectorPrivilegeSet (String catName, String connectorName, String userName, + List groupNames) throws InvalidObjectException, MetaException { + return unwrap(PrivilegeStore.class).getConnectorPrivilegeSet(catName, connectorName, userName, groupNames); + } /** * Get privileges for a table for a user. @@ -908,8 +1029,10 @@ PrincipalPrivilegeSet getConnectorPrivilegeSet (String catName, String connector * @throws InvalidObjectException no such table * @throws MetaException error accessing the RDBMS */ - PrincipalPrivilegeSet getTablePrivilegeSet (String catName, String dbName, String tableName, - String userName, List groupNames) throws InvalidObjectException, MetaException; + default PrincipalPrivilegeSet getTablePrivilegeSet (String catName, String dbName, String tableName, + String userName, List groupNames) throws InvalidObjectException, MetaException { + return unwrap(PrivilegeStore.class).getTablePrivilegeSet(new TableName(catName, dbName, tableName), userName, groupNames); + } /** * Get privileges for a partition for a user. @@ -923,8 +1046,10 @@ PrincipalPrivilegeSet getTablePrivilegeSet (String catName, String dbName, Strin * @throws InvalidObjectException no such partition * @throws MetaException error accessing the RDBMS */ - PrincipalPrivilegeSet getPartitionPrivilegeSet (String catName, String dbName, String tableName, - String partition, String userName, List groupNames) throws InvalidObjectException, MetaException; + default PrincipalPrivilegeSet getPartitionPrivilegeSet (String catName, String dbName, String tableName, + String partition, String userName, List groupNames) throws InvalidObjectException, MetaException { + return unwrap(PrivilegeStore.class).getPartitionPrivilegeSet(new TableName(catName, dbName, tableName), partition, userName, groupNames); + } /** * Get privileges for a column in a table or partition for a user. @@ -939,11 +1064,16 @@ PrincipalPrivilegeSet getPartitionPrivilegeSet (String catName, String dbName, S * @throws InvalidObjectException no such table, partition, or column * @throws MetaException error accessing the RDBMS */ - PrincipalPrivilegeSet getColumnPrivilegeSet (String catName, String dbName, String tableName, String partitionName, - String columnName, String userName, List groupNames) throws InvalidObjectException, MetaException; + default PrincipalPrivilegeSet getColumnPrivilegeSet (String catName, String dbName, String tableName, String partitionName, + String columnName, String userName, List groupNames) throws InvalidObjectException, MetaException { + return unwrap(PrivilegeStore.class).getColumnPrivilegeSet(new TableName(catName, dbName, tableName), + partitionName, columnName, userName, groupNames); + } - List listPrincipalGlobalGrants(String principalName, - PrincipalType principalType); + default List listPrincipalGlobalGrants(String principalName, + PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalGlobalGrants(principalName, principalType); + } /** * For a given principal name and type, list the DB Grants @@ -953,8 +1083,10 @@ List listPrincipalGlobalGrants(String principalName, * @param dbName database name * @return list of privileges for that principal on the specified database. */ - List listPrincipalDBGrants(String principalName, - PrincipalType principalType, String catName, String dbName); + default List listPrincipalDBGrants(String principalName, + PrincipalType principalType, String catName, String dbName) { + return unwrap(PrivilegeStore.class).listPrincipalDBGrants(principalName, principalType, catName, dbName); + } /** * For a given principal name and type, list the DC Grants @@ -963,8 +1095,10 @@ List listPrincipalDBGrants(String principalName, * @param dcName data connector name * @return list of privileges for that principal on the specified data connector. */ - List listPrincipalDCGrants(String principalName, - PrincipalType principalType, String dcName); + default List listPrincipalDCGrants(String principalName, + PrincipalType principalType, String dcName) { + return unwrap(PrivilegeStore.class).listPrincipalDCGrants(principalName, principalType, dcName); + } /** * For a given principal name and type, list the Table Grants @@ -975,9 +1109,11 @@ List listPrincipalDCGrants(String principalName, * @param tableName table name * @return list of privileges for that principal on the specified database. */ - List listAllTableGrants( + default List listAllTableGrants( String principalName, PrincipalType principalType, String catName, String dbName, - String tableName); + String tableName) { + return unwrap(PrivilegeStore.class).listAllTableGrants(principalName, principalType, new TableName(catName, dbName, tableName)); + } /** * For a given principal name and type, list the Table Grants @@ -989,9 +1125,12 @@ List listAllTableGrants( * @param partName partition name (not value) * @return list of privileges for that principal on the specified database. */ - List listPrincipalPartitionGrants( + default List listPrincipalPartitionGrants( String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, List partValues, String partName); + String tableName, List partValues, String partName) { + return unwrap(PrivilegeStore.class).listPrincipalPartitionGrants(principalName, principalType, + new TableName(catName, dbName, tableName), partValues, partName); + } /** * For a given principal name and type, list the Table Grants @@ -1003,9 +1142,12 @@ List listPrincipalPartitionGrants( * @param columnName column name * @return list of privileges for that principal on the specified database. */ - List listPrincipalTableColumnGrants( + default List listPrincipalTableColumnGrants( String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, String columnName); + String tableName, String columnName) { + return unwrap(PrivilegeStore.class).listPrincipalTableColumnGrants(principalName, principalType, + new TableName(catName, dbName, tableName), columnName); + } /** * For a given principal name and type, list the Table Grants @@ -1018,29 +1160,46 @@ List listPrincipalTableColumnGrants( * @param columnName column name * @return list of privileges for that principal on the specified database. */ - List listPrincipalPartitionColumnGrants( + default List listPrincipalPartitionColumnGrants( String principalName, PrincipalType principalType, String catName, String dbName, - String tableName, List partValues, String partName, String columnName); + String tableName, List partValues, String partName, String columnName) { + return unwrap(PrivilegeStore.class).listPrincipalPartitionColumnGrants(principalName, principalType, + new TableName(catName, dbName, tableName), partValues, partName, columnName); + } - boolean grantPrivileges (PrivilegeBag privileges) - throws InvalidObjectException, MetaException, NoSuchObjectException; + default boolean grantPrivileges (PrivilegeBag privileges) + throws InvalidObjectException, MetaException, NoSuchObjectException { + return unwrap(PrivilegeStore.class).grantPrivileges(privileges); + } - boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) - throws InvalidObjectException, MetaException, NoSuchObjectException; + default boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) + throws InvalidObjectException, MetaException, NoSuchObjectException { + return unwrap(PrivilegeStore.class).revokePrivileges(privileges, grantOption); + } - boolean refreshPrivileges(HiveObjectRef objToRefresh, String authorizer, PrivilegeBag grantPrivileges) - throws InvalidObjectException, MetaException, NoSuchObjectException; + default boolean refreshPrivileges(HiveObjectRef objToRefresh, String authorizer, PrivilegeBag grantPrivileges) + throws InvalidObjectException, MetaException, NoSuchObjectException { + return unwrap(PrivilegeStore.class).refreshPrivileges(objToRefresh, authorizer, grantPrivileges); + } - org.apache.hadoop.hive.metastore.api.Role getRole( - String roleName) throws NoSuchObjectException; + default org.apache.hadoop.hive.metastore.api.Role getRole( + String roleName) throws NoSuchObjectException { + return unwrap(PrivilegeStore.class).getRole(roleName); + } - List listRoleNames(); + default List listRoleNames() { + return unwrap(PrivilegeStore.class).listRoleNames(); + } - List listRoles(String principalName, - PrincipalType principalType); + default List listRoles(String principalName, + PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listRoles(principalName, principalType); + } - List listRolesWithGrants(String principalName, - PrincipalType principalType); + default List listRolesWithGrants(String principalName, + PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listRolesWithGrants(principalName, principalType); + } /** @@ -1048,7 +1207,9 @@ List listRolesWithGrants(String principalName, * @param roleName * @return */ - List listRoleMembers(String roleName); + default List listRoleMembers(String roleName) { + return unwrap(PrivilegeStore.class).listRoleMembers(roleName); + } /** * Fetch a partition along with privilege information for a particular user. @@ -1063,9 +1224,12 @@ List listRolesWithGrants(String principalName, * @throws NoSuchObjectException no such partition exists * @throws InvalidObjectException error fetching privilege information */ - Partition getPartitionWithAuth(String catName, String dbName, String tblName, + default Partition getPartitionWithAuth(String catName, String dbName, String tblName, List partVals, String user_name, List group_names) - throws MetaException, NoSuchObjectException, InvalidObjectException; + throws MetaException, NoSuchObjectException, InvalidObjectException { + return unwrap(TableStore.class) + .getPartitionWithAuth(new TableName(catName, dbName, tblName), partVals, user_name, group_names); + } /** * Lists partition names that match a given partial specification @@ -1083,9 +1247,11 @@ Partition getPartitionWithAuth(String catName, String dbName, String tblName, * @throws MetaException error accessing RDBMS * @throws NoSuchObjectException No such table exists */ - List listPartitionNamesPs(String catName, String db_name, String tbl_name, + default List listPartitionNamesPs(String catName, String db_name, String tbl_name, List part_vals, short max_parts) - throws MetaException, NoSuchObjectException; + throws MetaException, NoSuchObjectException { + return unwrap(TableStore.class).listPartitionNamesPs(new TableName(catName, db_name, tbl_name), part_vals, max_parts); + } /** * Lists partitions that match a given partial specification and sets their auth privileges. @@ -1101,8 +1267,10 @@ List listPartitionNamesPs(String catName, String db_name, String tbl_nam * @throws NoSuchObjectException No such table exists * @throws InvalidObjectException error access privilege information */ - List listPartitionsPsWithAuth(String catName, String db_name, String tbl_name, - GetPartitionsArgs args) throws MetaException, InvalidObjectException, NoSuchObjectException; + default List listPartitionsPsWithAuth(String catName, String db_name, String tbl_name, + GetPartitionsArgs args) throws MetaException, InvalidObjectException, NoSuchObjectException { + return unwrap(TableStore.class).listPartitionsPsWithAuth(new TableName(catName, db_name, tbl_name), args); + } /** Persists the given column statistics object to the metastore * @param colStats object to persist @@ -1306,8 +1474,10 @@ void updateMasterKey(Integer seqNo, String key) * @throws MetaException error access RDBMS or storage. * @throws NoSuchObjectException One or more of the partitions does not exist. */ - void dropPartitions(String catName, String dbName, String tblName, List partNames) - throws MetaException, NoSuchObjectException; + default void dropPartitions(String catName, String dbName, String tblName, List partNames) + throws MetaException, NoSuchObjectException { + unwrap(TableStore.class).dropPartitions(new TableName(catName, dbName, tblName), partNames); + } /** * List all DB grants for a given principal. @@ -1315,8 +1485,10 @@ void dropPartitions(String catName, String dbName, String tblName, List * @param principalType type * @return all DB grants for this principal */ - List listPrincipalDBGrantsAll( - String principalName, PrincipalType principalType); + default List listPrincipalDBGrantsAll( + String principalName, PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalDBGrantsAll(principalName, principalType); + } /** * List all DC grants for a given principal. @@ -1324,8 +1496,10 @@ List listPrincipalDBGrantsAll( * @param principalType type * @return all DC grants for this principal */ - List listPrincipalDCGrantsAll( - String principalName, PrincipalType principalType); + default List listPrincipalDCGrantsAll( + String principalName, PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalDCGrantsAll(principalName, principalType); + } /** * List all Table grants for a given principal @@ -1333,8 +1507,10 @@ List listPrincipalDCGrantsAll( * @param principalType type * @return all Table grants for this principal */ - List listPrincipalTableGrantsAll( - String principalName, PrincipalType principalType); + default List listPrincipalTableGrantsAll( + String principalName, PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalTableGrantsAll(principalName, principalType); + } /** * List all Partition grants for a given principal @@ -1342,8 +1518,10 @@ List listPrincipalTableGrantsAll( * @param principalType type * @return all Partition grants for this principal */ - List listPrincipalPartitionGrantsAll( - String principalName, PrincipalType principalType); + default List listPrincipalPartitionGrantsAll( + String principalName, PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalPartitionGrantsAll(principalName, principalType); + } /** * List all Table column grants for a given principal @@ -1351,8 +1529,10 @@ List listPrincipalPartitionGrantsAll( * @param principalType type * @return all Table column grants for this principal */ - List listPrincipalTableColumnGrantsAll( - String principalName, PrincipalType principalType); + default List listPrincipalTableColumnGrantsAll( + String principalName, PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalTableColumnGrantsAll(principalName, principalType); + } /** * List all Partition column grants for a given principal @@ -1360,10 +1540,14 @@ List listPrincipalTableColumnGrantsAll( * @param principalType type * @return all Partition column grants for this principal */ - List listPrincipalPartitionColumnGrantsAll( - String principalName, PrincipalType principalType); + default List listPrincipalPartitionColumnGrantsAll( + String principalName, PrincipalType principalType) { + return unwrap(PrivilegeStore.class).listPrincipalPartitionColumnGrantsAll(principalName, principalType); + } - List listGlobalGrantsAll(); + default List listGlobalGrantsAll() { + return unwrap(PrivilegeStore.class).listGlobalGrantsAll(); + } /** * Find all the privileges for a given database. @@ -1371,14 +1555,18 @@ List listPrincipalPartitionColumnGrantsAll( * @param dbName database name * @return list of all privileges. */ - List listDBGrantsAll(String catName, String dbName); + default List listDBGrantsAll(String catName, String dbName) { + return unwrap(PrivilegeStore.class).listDBGrantsAll(catName, dbName); + } /** * Find all the privileges for a given data connector. * @param dcName data connector name * @return list of all privileges. */ - List listDCGrantsAll(String dcName); + default List listDCGrantsAll(String dcName) { + return unwrap(PrivilegeStore.class).listDCGrantsAll(dcName); + } /** * Find all of the privileges for a given column in a given partition. @@ -1389,8 +1577,11 @@ List listPrincipalPartitionColumnGrantsAll( * @param columnName column name * @return all privileges on this column in this partition */ - List listPartitionColumnGrantsAll( - String catName, String dbName, String tableName, String partitionName, String columnName); + default List listPartitionColumnGrantsAll( + String catName, String dbName, String tableName, String partitionName, String columnName) { + return unwrap(PrivilegeStore.class).listPartitionColumnGrantsAll(new TableName(catName, dbName, tableName), + partitionName, columnName); + } /** * Find all of the privileges for a given table @@ -1399,7 +1590,9 @@ List listPartitionColumnGrantsAll( * @param tableName table name * @return all privileges on this table */ - List listTableGrantsAll(String catName, String dbName, String tableName); + default List listTableGrantsAll(String catName, String dbName, String tableName) { + return unwrap(PrivilegeStore.class).listTableGrantsAll(new TableName(catName, dbName, tableName)); + } /** * Find all of the privileges for a given partition. @@ -1409,8 +1602,10 @@ List listPartitionColumnGrantsAll( * @param partitionName partition name (not value) * @return all privileges on this partition */ - List listPartitionGrantsAll( - String catName, String dbName, String tableName, String partitionName); + default List listPartitionGrantsAll( + String catName, String dbName, String tableName, String partitionName) { + return unwrap(PrivilegeStore.class).listPartitionGrantsAll(new TableName(catName, dbName, tableName), partitionName); + } /** * Find all of the privileges for a given column in a given table. @@ -1420,8 +1615,10 @@ List listPartitionGrantsAll( * @param columnName column name * @return all privileges on this column in this table */ - List listTableColumnGrantsAll( - String catName, String dbName, String tableName, String columnName); + default List listTableColumnGrantsAll( + String catName, String dbName, String tableName, String columnName) { + return unwrap(PrivilegeStore.class).listTableColumnGrantsAll(new TableName(catName, dbName, tableName), columnName); + } /** * Register a user-defined function based on the function specification passed in. @@ -1532,22 +1729,27 @@ List getPartitionColStatsForDatabase(String catName, * @param rqst Request containing information on the last processed notification. * @return list of notifications, sorted by eventId */ - NotificationEventResponse getNextNotification(NotificationEventRequest rqst); - + default NotificationEventResponse getNextNotification(NotificationEventRequest rqst) { + return unwrap(NotificationStore.class).getNextNotification(rqst); + } /** * Add a notification entry. This should only be called from inside the metastore * @param event the notification to add * @throws MetaException error accessing RDBMS */ - void addNotificationEvent(NotificationEvent event) throws MetaException; + default void addNotificationEvent(NotificationEvent event) throws MetaException { + unwrap(NotificationStore.class).addNotificationEvent(event); + } /** * Remove older notification events. * * @param olderThan Remove any events older or equal to a given number of seconds */ - void cleanNotificationEvents(int olderThan); + default void cleanNotificationEvents(int olderThan) { + unwrap(NotificationStore.class).cleanNotificationEvents(olderThan); + } /** * Get the last issued notification event id. This is intended for use by the export command @@ -1555,14 +1757,18 @@ List getPartitionColStatsForDatabase(String catName, * and determine which notification events happened before or after the export. * @return */ - CurrentNotificationEventId getCurrentNotificationEventId(); + default CurrentNotificationEventId getCurrentNotificationEventId() { + return unwrap(NotificationStore.class).getCurrentNotificationEventId(); + } /** * Get the number of events corresponding to given database with fromEventId. * This is intended for use by the repl commands to track the progress of incremental dump. * @return */ - NotificationEventsCountResponse getNotificationEventsCount(NotificationEventsCountRequest rqst); + default NotificationEventsCountResponse getNotificationEventsCount(NotificationEventsCountRequest rqst) { + return unwrap(NotificationStore.class).getNotificationEventsCount(rqst); + } /* * Flush any catalog objects held by the metastore implementation. Note that this does not @@ -1626,19 +1832,25 @@ default FileMetadataHandler getFileMetadataHandler(FileMetadataExprType type) { * Gets total number of tables. */ @InterfaceStability.Evolving - int getTableCount() throws MetaException; + default int getTableCount() throws MetaException { + return unwrap(TableStore.class).getObjectCount("tableName", MTable.class.getName()); + } /** * Gets total number of partitions. */ @InterfaceStability.Evolving - int getPartitionCount() throws MetaException; + default int getPartitionCount() throws MetaException { + return unwrap(TableStore.class).getObjectCount("partitionName", MPartition.class.getName()); + } /** * Gets total number of databases. */ @InterfaceStability.Evolving - int getDatabaseCount() throws MetaException; + default int getDatabaseCount() throws MetaException { + return unwrap(TableStore.class).getObjectCount("name", MDatabase.class.getName()); + } /** * SQLPrimaryKey represents a single primary key column. @@ -2020,7 +2232,9 @@ Map> getPartitionColsWithStats(String catName, String dbNam * Remove older notification events. * @param olderThan Remove any events older or equal to a given number of seconds */ - void cleanWriteNotificationEvents(int olderThan); + default void cleanWriteNotificationEvents(int olderThan) { + unwrap(NotificationStore.class).cleanWriteNotificationEvents(olderThan); + } /** * Get all write events for a specific transaction . @@ -2028,7 +2242,9 @@ Map> getPartitionColsWithStats(String catName, String dbNam * @param dbName the name of db for which dump is being taken * @param tableName the name of the table for which the dump is being taken */ - List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException; + default List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException { + return unwrap(NotificationStore.class).getAllWriteEventInfo(txnId, dbName, tableName); + } /** * Checking if table is part of a materialized view. @@ -2037,7 +2253,9 @@ Map> getPartitionColsWithStats(String catName, String dbNam * @param tblName table name * @return list of materialized views that uses the table */ - List isPartOfMaterializedView(String catName, String dbName, String tblName); + default List isPartOfMaterializedView(String catName, String dbName, String tblName) { + return unwrap(TableStore.class).isPartOfMaterializedView(new TableName(catName, dbName, tblName)); + } /** * Returns details about a scheduled query by name. @@ -2103,7 +2321,15 @@ Map> updatePartitionColumnStatisticsInBatch( Package findPackage(GetPackageRequest request); List listPackages(ListPackageRequest request); void dropPackage(DropPackageRequest request); - public MTable ensureGetMTable(String catName, String dbName, String tblName) throws NoSuchObjectException; + default MTable ensureGetMTable(String catName, String dbName, String tblName) throws NoSuchObjectException { + return unwrap(TableStore.class).ensureGetMTable(new TableName(catName, dbName, tblName)); + } + + MDatabase ensureGetMDatabase(String catName, String dbName) throws NoSuchObjectException; + + default MPartition ensureGetMPartition(TableName tableName, List partVals) throws MetaException { + return unwrap(TableStore.class).ensureGetMPartition(tableName, partVals); + } /** Persistent Property Management. */ default PropertyStore getPropertyStore() { @@ -2117,7 +2343,16 @@ default PropertyStore getPropertyStore() { */ default long updateParameterWithExpectedValue(Table table, String key, String expectedValue, String newValue) throws MetaException, NoSuchObjectException { - throw new UnsupportedOperationException("This Store doesn't support updating table parameter with expected value"); + return unwrap(TableStore.class).updateParameterWithExpectedValue(table, key, expectedValue, newValue); } + /** + * Returns an object that implements the given interface to allow the operation + * on the specific metadata. + * + * @param iface A Class defining an interface that the result must implement + * @return an object that implements the interface + * @throws RuntimeException If the context cannot be unwrapped to the provided class + */ + T unwrap(Class iface); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 04468bd2139c..7622efd155ca 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -64,6 +64,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.messaging.*; +import org.apache.hadoop.hive.metastore.model.MDatabase; import org.apache.hadoop.hive.metastore.model.MTable; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils; @@ -1426,22 +1427,6 @@ public Table getTable(String catName, String dbName, String tblName, String vali return part; } - @Override public boolean doesPartitionExist(String catName, String dbName, String tblName, List partKeys, - List partVals) throws MetaException, NoSuchObjectException { - catName = normalizeIdentifier(catName); - dbName = StringUtils.normalizeIdentifier(dbName); - tblName = StringUtils.normalizeIdentifier(tblName); - if (!shouldCacheTable(catName, dbName, tblName) || (canUseEvents && rawStore.isActiveTransaction())) { - return rawStore.doesPartitionExist(catName, dbName, tblName, partKeys, partVals); - } - Table tbl = sharedCache.getTableFromCache(catName, dbName, tblName); - if (tbl == null) { - // The table containing the partition is not yet loaded in cache - return rawStore.doesPartitionExist(catName, dbName, tblName, partKeys, partVals); - } - return sharedCache.existPartitionFromCache(catName, dbName, tblName, partVals); - } - @Override public boolean dropPartition(String catName, String dbName, String tblName, String partName) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { boolean succ = rawStore.dropPartition(catName, dbName, tblName, partName); @@ -1494,11 +1479,6 @@ public Table getTable(String catName, String dbName, String tblName, String vali return parts; } - @Override public Map getPartitionLocations(String catName, String dbName, String tblName, - String baseLocationToNotShow, int max) { - return rawStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); - } - @Override public Table alterTable(String catName, String dbName, String tblName, Table newTable, String validWriteIds) throws InvalidObjectException, MetaException { newTable = rawStore.alterTable(catName, dbName, tblName, newTable, validWriteIds); @@ -1531,39 +1511,11 @@ public Table getTable(String catName, String dbName, String tblName, String vali return newTable; } - @Override public void updateTableParams(List updates) throws MetaException, NoSuchObjectException { + @Override + public void updateTableParams(List updates) throws MetaException, NoSuchObjectException { rawStore.updateTableParams(updates); } - @Override public void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) - throws MetaException { - rawStore.updateCreationMetadata(catName, dbname, tablename, cm); - } - - @Override public List getTables(String catName, String dbName, String pattern) throws MetaException { - return rawStore.getTables(catName, dbName, pattern); - } - - @Override public List getTables(String catName, String dbName, String pattern, TableType tableType, int limit) - throws MetaException { - return rawStore.getTables(catName, dbName, pattern, tableType, limit); - } - - @Override public List
getAllMaterializedViewObjectsForRewriting(String catName) throws MetaException { - // TODO functionCache - return rawStore.getAllMaterializedViewObjectsForRewriting(catName); - } - - @Override public List getMaterializedViewsForRewriting(String catName, String dbName) - throws MetaException, NoSuchObjectException { - return rawStore.getMaterializedViewsForRewriting(catName, dbName); - } - - @Override public List getTableMeta(String catName, String dbNames, String tableNames, - List tableTypes) throws MetaException { - return rawStore.getTableMeta(catName, dbNames, tableNames, tableTypes); - } - @Override public List
getTableObjectsByName(String catName, String dbName, List tblNames) throws MetaException, UnknownDBException { if (canUseEvents && rawStore.isActiveTransaction()) { @@ -1601,23 +1553,6 @@ public Table getTable(String catName, String dbName, String tblName, String vali return tables; } - @Override - public List
getTableObjectsByName(String catName, String db, List tbl_names, - GetProjectionsSpec projectionsSpec, String tablePattern) throws MetaException, UnknownDBException { - return rawStore.getTableObjectsByName(catName, db, tbl_names, projectionsSpec, tablePattern); - } - - @Override public List getAllTables(String catName, String dbName) throws MetaException { - return rawStore.getAllTables(catName, dbName); - } - - @Override - // TODO: implement using SharedCache - public List listTableNamesByFilter(String catName, String dbName, String filter, short maxTables) - throws MetaException, UnknownDBException { - return rawStore.listTableNamesByFilter(catName, dbName, filter, maxTables); - } - @Override public List listPartitionNames(String catName, String dbName, String tblName, short maxParts) throws MetaException { catName = StringUtils.normalizeIdentifier(catName); @@ -1641,24 +1576,6 @@ public List listTableNamesByFilter(String catName, String dbName, String return partitionNames; } - @Override - public List listPartitionNames(String catName, String dbName, String tblName, String defaultPartName, - byte[] exprBytes, String order, int maxParts) throws MetaException, NoSuchObjectException { - throw new UnsupportedOperationException(); - } - - @Override - public List listPartitionNamesByFilter(String catName, String dbName, String tblName, - GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - throw new UnsupportedOperationException(); - } - - @Override public PartitionValuesResponse listPartitionValues(String catName, String dbName, String tblName, - List cols, boolean applyDistinct, String filter, boolean ascending, List order, - long maxParts) throws MetaException { - throw new UnsupportedOperationException(); - } - @Override public Partition alterPartition(String catName, String dbName, String tblName, List partVals, Partition newPart, String validWriteIds) throws InvalidObjectException, MetaException { newPart = rawStore.alterPartition(catName, dbName, tblName, partVals, newPart, validWriteIds); @@ -1708,22 +1625,6 @@ private boolean getPartitionNamesPrunedByExprNoTxn(Table table, byte[] expr, Str return expressionProxy.filterPartitionsByExpr(table.getPartitionKeys(), expr, defaultPartName, result); } - @Override - // TODO: implement using SharedCache - public List getPartitionsByFilter(String catName, String dbName, String tblName, GetPartitionsArgs args) - throws MetaException, NoSuchObjectException { - return rawStore.getPartitionsByFilter(catName, dbName, tblName, args); - } - - @Override - /** - * getPartitionSpecsByFilterAndProjection interface is currently non-cacheable. - */ public List getPartitionSpecsByFilterAndProjection(Table table, - GetProjectionsSpec projectionSpec, GetPartitionsFilterSpec filterSpec) - throws MetaException, NoSuchObjectException { - return rawStore.getPartitionSpecsByFilterAndProjection(table, projectionSpec, filterSpec); - } - @Override public boolean getPartitionsByExpr(String catName, String dbName, String tblName, List result, GetPartitionsArgs args) throws TException { catName = StringUtils.normalizeIdentifier(catName); @@ -1749,11 +1650,6 @@ public List getPartitionsByFilter(String catName, String dbName, Stri return hasUnknownPartitions; } - @Override public int getNumPartitionsByFilter(String catName, String dbName, String tblName, String filter) - throws MetaException, NoSuchObjectException { - return rawStore.getNumPartitionsByFilter(catName, dbName, tblName, filter); - } - @VisibleForTesting public static List partNameToVals(String name) { if (name == null) { return null; @@ -1789,145 +1685,6 @@ public List getPartitionsByFilter(String catName, String dbName, Stri return partitions; } - @Override public Table markPartitionForEvent(String catName, String dbName, String tblName, - Map partVals, PartitionEventType evtType) - throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { - return rawStore.markPartitionForEvent(catName, dbName, tblName, partVals, evtType); - } - - @Override public boolean isPartitionMarkedForEvent(String catName, String dbName, String tblName, - Map partName, PartitionEventType evtType) - throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { - return rawStore.isPartitionMarkedForEvent(catName, dbName, tblName, partName, evtType); - } - - @Override public boolean addRole(String rowName, String ownerName) - throws InvalidObjectException, MetaException, NoSuchObjectException { - return rawStore.addRole(rowName, ownerName); - } - - @Override public boolean removeRole(String roleName) throws MetaException, NoSuchObjectException { - return rawStore.removeRole(roleName); - } - - @Override public boolean grantRole(Role role, String userName, PrincipalType principalType, String grantor, - PrincipalType grantorType, boolean grantOption) - throws MetaException, NoSuchObjectException, InvalidObjectException { - return rawStore.grantRole(role, userName, principalType, grantor, grantorType, grantOption); - } - - @Override public boolean revokeRole(Role role, String userName, PrincipalType principalType, boolean grantOption) - throws MetaException, NoSuchObjectException { - return rawStore.revokeRole(role, userName, principalType, grantOption); - } - - @Override public PrincipalPrivilegeSet getUserPrivilegeSet(String userName, List groupNames) - throws InvalidObjectException, MetaException { - return rawStore.getUserPrivilegeSet(userName, groupNames); - } - - @Override public PrincipalPrivilegeSet getDBPrivilegeSet(String catName, String dbName, String userName, - List groupNames) throws InvalidObjectException, MetaException { - return rawStore.getDBPrivilegeSet(catName, dbName, userName, groupNames); - } - - @Override public PrincipalPrivilegeSet getConnectorPrivilegeSet(String catName, String connectorName, String userName, - List groupNames) throws InvalidObjectException, MetaException { - return rawStore.getConnectorPrivilegeSet(catName, connectorName, userName, groupNames); - } - - @Override public PrincipalPrivilegeSet getTablePrivilegeSet(String catName, String dbName, String tableName, - String userName, List groupNames) throws InvalidObjectException, MetaException { - return rawStore.getTablePrivilegeSet(catName, dbName, tableName, userName, groupNames); - } - - @Override public PrincipalPrivilegeSet getPartitionPrivilegeSet(String catName, String dbName, String tableName, - String partition, String userName, List groupNames) throws InvalidObjectException, MetaException { - return rawStore.getPartitionPrivilegeSet(catName, dbName, tableName, partition, userName, groupNames); - } - - @Override public PrincipalPrivilegeSet getColumnPrivilegeSet(String catName, String dbName, String tableName, - String partitionName, String columnName, String userName, List groupNames) - throws InvalidObjectException, MetaException { - return rawStore.getColumnPrivilegeSet(catName, dbName, tableName, partitionName, columnName, userName, groupNames); - } - - @Override public List listPrincipalGlobalGrants(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalGlobalGrants(principalName, principalType); - } - - @Override public List listPrincipalDBGrants(String principalName, PrincipalType principalType, - String catName, String dbName) { - return rawStore.listPrincipalDBGrants(principalName, principalType, catName, dbName); - } - - @Override public List listPrincipalDCGrants(String principalName, PrincipalType principalType, - String dcName) { - return rawStore.listPrincipalDCGrants(principalName, principalType, dcName); - } - - @Override public List listAllTableGrants(String principalName, PrincipalType principalType, - String catName, String dbName, String tableName) { - return rawStore.listAllTableGrants(principalName, principalType, catName, dbName, tableName); - } - - @Override public List listPrincipalPartitionGrants(String principalName, - PrincipalType principalType, String catName, String dbName, String tableName, List partValues, - String partName) { - return rawStore - .listPrincipalPartitionGrants(principalName, principalType, catName, dbName, tableName, partValues, partName); - } - - @Override public List listPrincipalTableColumnGrants(String principalName, - PrincipalType principalType, String catName, String dbName, String tableName, String columnName) { - return rawStore - .listPrincipalTableColumnGrants(principalName, principalType, catName, dbName, tableName, columnName); - } - - @Override public List listPrincipalPartitionColumnGrants(String principalName, - PrincipalType principalType, String catName, String dbName, String tableName, List partValues, - String partName, String columnName) { - return rawStore - .listPrincipalPartitionColumnGrants(principalName, principalType, catName, dbName, tableName, partValues, - partName, columnName); - } - - @Override public boolean grantPrivileges(PrivilegeBag privileges) - throws InvalidObjectException, MetaException, NoSuchObjectException { - return rawStore.grantPrivileges(privileges); - } - - @Override public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) - throws InvalidObjectException, MetaException, NoSuchObjectException { - return rawStore.revokePrivileges(privileges, grantOption); - } - - @Override public boolean refreshPrivileges(HiveObjectRef objToRefresh, String authorizer, - PrivilegeBag grantPrivileges) throws InvalidObjectException, MetaException, NoSuchObjectException { - return rawStore.refreshPrivileges(objToRefresh, authorizer, grantPrivileges); - } - - @Override public Role getRole(String roleName) throws NoSuchObjectException { - return rawStore.getRole(roleName); - } - - @Override public List listRoleNames() { - return rawStore.listRoleNames(); - } - - @Override public List listRoles(String principalName, PrincipalType principalType) { - return rawStore.listRoles(principalName, principalType); - } - - @Override public List listRolesWithGrants(String principalName, PrincipalType principalType) { - return rawStore.listRolesWithGrants(principalName, principalType); - } - - @Override public List listRoleMembers(String roleName) { - return rawStore.listRoleMembers(roleName); - } - @Override public Partition getPartitionWithAuth(String catName, String dbName, String tblName, List partVals, String userName, List groupNames) throws MetaException, NoSuchObjectException, InvalidObjectException { catName = StringUtils.normalizeIdentifier(catName); @@ -1979,12 +1736,6 @@ public List getPartitionsByFilter(String catName, String dbName, Stri return partitionNames; } - @Override - public int getNumPartitionsByPs(String catName, String dbName, String tblName, List partSpecs) - throws MetaException, NoSuchObjectException { - return rawStore.getNumPartitionsByPs(catName, dbName, tblName, partSpecs); - } - @Override public List listPartitionsPsWithAuth(String catName, String dbName, String tblName, GetPartitionsArgs args) throws MetaException, InvalidObjectException, NoSuchObjectException { catName = StringUtils.normalizeIdentifier(catName); @@ -2532,67 +2283,6 @@ long getPartsFound() { rawStore.setMetaStoreSchemaVersion(version, comment); } - @Override public List listPrincipalDBGrantsAll(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalDBGrantsAll(principalName, principalType); - } - - @Override public List listPrincipalDCGrantsAll(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalDCGrantsAll(principalName, principalType); - } - - @Override public List listPrincipalTableGrantsAll(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalTableGrantsAll(principalName, principalType); - } - - @Override public List listPrincipalPartitionGrantsAll(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalPartitionGrantsAll(principalName, principalType); - } - - @Override public List listPrincipalTableColumnGrantsAll(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalTableColumnGrantsAll(principalName, principalType); - } - - @Override public List listPrincipalPartitionColumnGrantsAll(String principalName, - PrincipalType principalType) { - return rawStore.listPrincipalPartitionColumnGrantsAll(principalName, principalType); - } - - @Override public List listGlobalGrantsAll() { - return rawStore.listGlobalGrantsAll(); - } - - @Override public List listDBGrantsAll(String catName, String dbName) { - return rawStore.listDBGrantsAll(catName, dbName); - } - - @Override public List listDCGrantsAll(String dcName) { - return rawStore.listDCGrantsAll(dcName); - } - - @Override public List listPartitionColumnGrantsAll(String catName, String dbName, - String tableName, String partitionName, String columnName) { - return rawStore.listPartitionColumnGrantsAll(catName, dbName, tableName, partitionName, columnName); - } - - @Override public List listTableGrantsAll(String catName, String dbName, String tableName) { - return rawStore.listTableGrantsAll(catName, dbName, tableName); - } - - @Override public List listPartitionGrantsAll(String catName, String dbName, String tableName, - String partitionName) { - return rawStore.listPartitionGrantsAll(catName, dbName, tableName, partitionName); - } - - @Override public List listTableColumnGrantsAll(String catName, String dbName, String tableName, - String columnName) { - return rawStore.listTableColumnGrantsAll(catName, dbName, tableName, columnName); - } - @Override public void createFunction(Function func) throws InvalidObjectException, MetaException { // TODO functionCache rawStore.createFunction(func); @@ -2625,38 +2315,6 @@ long getPartsFound() { return rawStore.getFunctionsRequest(catName, dbName, pattern, isReturnNames); } - @Override public NotificationEventResponse getNextNotification(NotificationEventRequest rqst) { - return rawStore.getNextNotification(rqst); - } - - @Override public void addNotificationEvent(NotificationEvent event) throws MetaException { - rawStore.addNotificationEvent(event); - } - - @Override public void cleanNotificationEvents(int olderThan) { - rawStore.cleanNotificationEvents(olderThan); - } - - @Override public CurrentNotificationEventId getCurrentNotificationEventId() { - return rawStore.getCurrentNotificationEventId(); - } - - @Override public NotificationEventsCountResponse getNotificationEventsCount(NotificationEventsCountRequest rqst) { - return rawStore.getNotificationEventsCount(rqst); - } - - @Override public int getTableCount() throws MetaException { - return rawStore.getTableCount(); - } - - @Override public int getPartitionCount() throws MetaException { - return rawStore.getPartitionCount(); - } - - @Override public int getDatabaseCount() throws MetaException { - return rawStore.getDatabaseCount(); - } - @Override public List getPrimaryKeys(PrimaryKeysRequest request) throws MetaException { String catName = StringUtils.normalizeIdentifier(request.getCatName()); @@ -3045,14 +2703,6 @@ public long getCacheUpdateCount() { return sharedCache.getUpdateCount(); } - @Override public void cleanWriteNotificationEvents(int olderThan) { - rawStore.cleanWriteNotificationEvents(olderThan); - } - - @Override public List getAllWriteEventInfo(long txnId, String dbName, String tableName) - throws MetaException { - return rawStore.getAllWriteEventInfo(txnId, dbName, tableName); - } static boolean isNotInBlackList(String catName, String dbName, String tblName) { String str = TableName.getQualified(catName, dbName, tblName); @@ -3248,13 +2898,18 @@ public void dropPackage(DropPackageRequest request) { rawStore.dropPackage(request); } - @Override - public MTable ensureGetMTable(String catName, String dbName, String tblName) throws NoSuchObjectException { - return rawStore.ensureGetMTable(catName, dbName, tblName); - } - private boolean shouldGetConstraintFromRawStore(String catName, String dbName, String tblName) { return !shouldCacheTable(catName, dbName, tblName) || (canUseEvents && rawStore.isActiveTransaction()) || !sharedCache.isTableConstraintValid(catName, dbName, tblName); } + + @Override + public MDatabase ensureGetMDatabase(String catName, String dbName) throws NoSuchObjectException { + return rawStore.ensureGetMDatabase(catName, dbName); + } + + @Override + public T unwrap(Class iface) { + return rawStore.unwrap(iface); + } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java index 4a7f831d8d04..94926c01564b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java @@ -77,6 +77,7 @@ import static org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.extractSqlInt; import static org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.extractSqlLong; import static org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.getModelIdentity; +import static org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.makeParams; import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.getPartValsFromName; /** @@ -98,10 +99,6 @@ class DirectSqlUpdatePart extends DirectSqlBase { sqlGenerator = new SQLGenerator(dbType, conf); } - static String quoteString(String input) { - return "'" + input + "'"; - } - private void populateInsertUpdateMap(Map statsPartInfoMap, Map updateMap, MapinsertMap, @@ -412,35 +409,39 @@ private Map> updatePartitionParamTable(Connection db private Map getPartitionInfo(Connection dbConn, long tblId, Map partColStatsMap) - throws SQLException, MetaException { - List queries = new ArrayList<>(); - StringBuilder prefix = new StringBuilder(); - StringBuilder suffix = new StringBuilder(); + throws MetaException { Map partitionInfoMap = new HashMap<>(); + List partNames = new ArrayList<>(partColStatsMap.keySet()); + if (partNames.isEmpty()) { + return partitionInfoMap; + } - List partKeys = partColStatsMap.keySet().stream().map( - e -> quoteString(e)).collect(Collectors.toList() - ); - - prefix.append("select \"PART_ID\", \"WRITE_ID\", \"PART_NAME\" from \"PARTITIONS\" where "); - suffix.append(" and \"TBL_ID\" = " + tblId); - TxnUtils.buildQueryWithINClauseStrings(conf, queries, prefix, suffix, - partKeys, "\"PART_NAME\"", true, false); - - try (Statement statement = dbConn.createStatement()) { - for (String query : queries) { + Batchable.runBatched(maxBatchSize, partNames, new Batchable() { + @Override + public List run(List input) throws Exception { + String placeholders = makeParams(input.size()); + String query = "select \"PART_ID\", \"WRITE_ID\", \"PART_NAME\" from \"PARTITIONS\" where " + + "\"PART_NAME\" in (" + placeholders + ") and \"TBL_ID\" = ?"; // Select for update makes sure that the partitions are not modified while the stats are getting updated. query = sqlGenerator.addForUpdateClause(query); LOG.debug("Execute query: " + query); - try (ResultSet rs = statement.executeQuery(query)) { - while (rs.next()) { - PartitionInfo partitionInfo = new PartitionInfo(rs.getLong(1), - rs.getLong(2), rs.getString(3)); - partitionInfoMap.put(partitionInfo, partColStatsMap.get(rs.getString(3))); + try (PreparedStatement ps = dbConn.prepareStatement(query)) { + int paramIndex = 1; + for (String partName : input) { + ps.setString(paramIndex++, partName); + } + ps.setLong(paramIndex, tblId); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + String partName = rs.getString(3); + PartitionInfo partitionInfo = new PartitionInfo(rs.getLong(1), rs.getLong(2), partName); + partitionInfoMap.put(partitionInfo, partColStatsMap.get(partName)); + } } } + return Collections.emptyList(); } - } + }); return partitionInfoMap; } @@ -473,6 +474,10 @@ public Map> updatePartitionColumnStatistics(Map partitionInfoMap = getPartitionInfo(dbConn, tbl.getId(), partColStatsMap); + if (partitionInfoMap.isEmpty()) { + return Collections.emptyMap(); + } + result = updatePartitionParamTable(dbConn, partitionInfoMap, validWriteIds, writeId, TxnUtils.isAcidTable(tbl)); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java index 3a317104ad8e..713f3ff4e338 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java @@ -738,7 +738,7 @@ public List getPartitionsViaPartNames(final String catName, final Str return Batchable.runBatched(batchSize, partNames, new Batchable() { @Override public List run(List input) throws MetaException { - return getPartitionsByNames(catName, dbName, tblName, partNames, false, args); + return getPartitionsByNames(catName, dbName, tblName, input, false, args); } }); } @@ -1028,9 +1028,7 @@ private List getPartitionsByNames(String catName, String dbName, throws MetaException { // Get most of the fields for the partNames provided. // Assume db and table names are the same for all partition, as provided in arguments. - String quotedPartNames = partNameList.stream() - .map(DirectSqlUpdatePart::quoteString) - .collect(Collectors.joining(",")); + String partNameParams = makeParams(partNameList.size()); String queryText = "select " + PARTITIONS + ".\"PART_ID\"," + SDS + ".\"SD_ID\"," + SDS + ".\"CD_ID\"," @@ -1043,11 +1041,18 @@ private List getPartitionsByNames(String catName, String dbName, + " left outer join " + SERDES + " on " + SDS + ".\"SERDE_ID\" = " + SERDES + ".\"SERDE_ID\" " + " inner join " + TBLS + " on " + TBLS + ".\"TBL_ID\" = " + PARTITIONS + ".\"TBL_ID\" " + " inner join " + DBS + " on " + DBS + ".\"DB_ID\" = " + TBLS + ".\"DB_ID\" " - + " where \"PART_NAME\" in (" + quotedPartNames + ") " + + " where " + PARTITIONS + ".\"PART_NAME\" in (" + partNameParams + ") " + " and " + TBLS + ".\"TBL_NAME\" = ? and " + DBS + ".\"NAME\" = ? and " + DBS + ".\"CTLG_NAME\" = ? order by \"PART_NAME\" asc"; - Object[] params = new Object[]{tblName, dbName, catName}; + Object[] params = new Object[partNameList.size() + 3]; + int i = 0; + for (String partName : partNameList) { + params[i++] = partName; + } + params[i++] = tblName; + params[i++] = dbName; + params[i] = catName; return getPartitionsByQuery(catName, dbName, tblName, queryText, params, isAcidTable, args); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetastoreDirectSqlUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetastoreDirectSqlUtils.java index a3c4523dc872..489a61a08596 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetastoreDirectSqlUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetastoreDirectSqlUtils.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.metastore.PersistenceManagerProxy; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.datanucleus.ExecutionContext; import org.datanucleus.api.jdo.JDOPersistenceManager; @@ -619,7 +620,14 @@ else if (value instanceof byte[]) { static Long getModelIdentity(PersistenceManager pm, Class modelClass) throws MetaException { - ExecutionContext ec = ((JDOPersistenceManager) pm).getExecutionContext(); + ExecutionContext ec; + if (pm instanceof JDOPersistenceManager jp) { + ec = jp.getExecutionContext(); + } else if (pm instanceof PersistenceManagerProxy.ExecutionContextReference ecr) { + ec = ecr.getExecutionContext(); + } else { + throw new MetaException("Unknown " + pm); + } AbstractClassMetaData cmd = ec.getMetaDataManager().getMetaDataForClass(modelClass, ec.getClassLoaderResolver()); switch (cmd.getIdentityType()) { case DATASTORE : diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/CompactorTasks.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/CompactorTasks.java index 956a5e5371f9..efc9673c5ab8 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/CompactorTasks.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/CompactorTasks.java @@ -65,8 +65,13 @@ public List getCompactorThreads() throws Exception { compactors.add(initiator); } if (MetastoreConf.getBoolVar(configuration, MetastoreConf.ConfVars.COMPACTOR_CLEANER_ON)) { - MetaStoreThread cleaner = instantiateThread("org.apache.hadoop.hive.ql.txn.compactor.Cleaner"); - compactors.add(cleaner); + if (MetastoreConf.getBoolVar(configuration, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID)) { + MetaStoreThread cleaner = instantiateThread("org.apache.hadoop.hive.ql.txn.compactor.Cleaner"); + compactors.add(cleaner); + } else { + HiveMetaStore.LOG.warn("Compactor Cleaner is turned On. But, automatic compaction cleaner will not run " + + "when the {} property is set to false.", MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID.getHiveName()); + } } return compactors; } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java index af3bd2b0ac06..0306d47313b8 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/leader/HouseKeepingTasks.java @@ -20,7 +20,6 @@ import com.cronutils.utils.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.MetastoreTaskThread; @@ -107,20 +106,19 @@ public void takeLeadership(LeaderElection election) throws Exception { } else { tasks = new ArrayList<>(getRemoteOnlyTasks()); } - int poolSize = Math.min(MetastoreConf.getIntVar(configuration, - MetastoreConf.ConfVars.THREAD_POOL_SIZE), tasks.size()); - metastoreTaskThreadPool = Executors.newScheduledThreadPool(poolSize, threadFactory); - for (MetastoreTaskThread task : tasks) { + tasks.forEach(task -> { task.setConf(configuration); task.enforceMutex(election.enforceMutex()); - long freq = task.runFrequency(TimeUnit.MILLISECONDS); - if (freq > 0) { + if (task.runFrequency(TimeUnit.MILLISECONDS) > 0) { runningTasks.add(task); - metastoreTaskThreadPool.scheduleAtFixedRate(task, freq, freq, TimeUnit.MILLISECONDS); } - } - + }); + int poolSize = Math.min(MetastoreConf.getIntVar(configuration, + MetastoreConf.ConfVars.THREAD_POOL_SIZE), runningTasks.size()); + metastoreTaskThreadPool = Executors.newScheduledThreadPool(poolSize, threadFactory); runningTasks.forEach(task -> { + long freq = task.runFrequency(TimeUnit.MILLISECONDS); + metastoreTaskThreadPool.scheduleAtFixedRate(task, freq, freq, TimeUnit.MILLISECONDS); HiveMetaStore.LOG.info("Scheduling for " + task.getClass().getCanonicalName() + " service."); }); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/GetHelper.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/GetHelper.java new file mode 100644 index 000000000000..296e4654f879 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/GetHelper.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore; + +import com.codahale.metrics.Counter; +import com.google.common.annotations.VisibleForTesting; + +import javax.jdo.JDOException; +import javax.jdo.PersistenceManager; +import java.util.List; + +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.DatabaseProduct; +import org.apache.hadoop.hive.metastore.ExceptionHandler; +import org.apache.hadoop.hive.metastore.directsql.MetaStoreDirectSql; +import org.apache.hadoop.hive.metastore.RawStore; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.metrics.Metrics; +import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; +import org.apache.hadoop.hive.metastore.metastore.iface.TableStore; +import org.datanucleus.api.jdo.JDOTransaction; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Helper class for getting stuff w/transaction, direct SQL, perf logging, etc. */ +@VisibleForTesting +public abstract class GetHelper { + private static final Logger LOG = LoggerFactory.getLogger(GetHelper.class); + private static final Counter directSqlErrors = Metrics.getRegistry() != null ? + Metrics.getOrCreateCounter(MetricsConstants.DIRECTSQL_ERRORS) : new Counter(); + private final boolean isInTxn, doTrace, allowJdo; + private boolean doUseDirectSql; + private long start; + private Table table; + protected final RawStore baseStore; + protected final PersistenceManager pm; + private MetaStoreDirectSql directSql; + protected final List partitionFields; + protected final A argument; + private boolean success = false; + protected T results = null; + + public GetHelper(RawStoreAware rsa, A args) throws MetaException { + this(rsa, args, null); + } + + public GetHelper(RawStoreAware rsa, + A args, List fields) throws MetaException { + this.baseStore = rsa.getBaseStore(); + this.partitionFields = fields; + this.argument = args; + this.doTrace = LOG.isDebugEnabled(); + this.isInTxn = baseStore.isActiveTransaction(); + this.pm = rsa.getPersistentManager(); + this.allowJdo = canUseJdoQuery(); + + boolean isConfigEnabled = MetastoreConf.getBoolVar(baseStore.getConf(), + MetastoreConf.ConfVars.TRY_DIRECT_SQL); + if (isConfigEnabled) { + directSql = new MetaStoreDirectSql(pm, baseStore.getConf(), ""); + } + + if (!allowJdo && isConfigEnabled && !directSql.isCompatibleDatastore()) { + throw new MetaException("SQL is not operational"); // test path; SQL is enabled and broken. + } + this.doUseDirectSql = isConfigEnabled && directSql.isCompatibleDatastore(); + } + + protected boolean canUseDirectSql() throws MetaException { + return true; // By default, assume we can user directSQL - that's kind of the point. + } + + protected boolean canUseJdoQuery() throws MetaException { + return true; + } + + protected abstract String describeResult(); + protected abstract T getSqlResult() throws MetaException; + protected abstract T getJdoResult() + throws MetaException, NoSuchObjectException, InvalidObjectException, + InvalidInputException; + + public T run(boolean initTable) throws MetaException, NoSuchObjectException { + try { + start(initTable); + String savePoint = isInTxn && allowJdo ? "rollback_" + System.nanoTime() : null; + if (doUseDirectSql) { + try { + directSql.prepareTxn(); + setTransactionSavePoint(savePoint); + this.results = getSqlResult(); + LOG.debug("Using direct SQL optimization."); + } catch (Exception ex) { + handleDirectSqlError(ex, savePoint); + } + } + // Note that this will be invoked in 2 cases: + // 1) DirectSQL was disabled to start with; + // 2) DirectSQL threw and was disabled in handleDirectSqlError. + if (!doUseDirectSql && canUseJdoQuery()) { + this.results = getJdoResult(); + LOG.debug("Not using direct SQL optimization."); + } + return commit(); + } catch (NoSuchObjectException | MetaException ex) { + throw ex; + } catch (Exception ex) { + LOG.error("", ex); + throw new MetaException(ex.getMessage()); + } finally { + close(); + } + } + + private void start(boolean initTable) throws MetaException, NoSuchObjectException { + start = doTrace ? System.nanoTime() : 0; + baseStore.openTransaction(); + if (initTable && (argument != null)) { + TableStore store = baseStore.unwrap(TableStore.class); + table = store.getTable((TableName) argument, null, -1); + if (table == null) { + throw new NoSuchObjectException( + "Specified catalog.database.table does not exist : " + argument); + } + } + doUseDirectSql = doUseDirectSql && canUseDirectSql(); + } + + private void handleDirectSqlError(Exception ex, String savePoint) throws MetaException, NoSuchObjectException { + String message = null; + try { + message = generateShorterMessage(ex); + } catch (Throwable t) { + message = ex.toString() + "; error building a better message: " + t.getMessage(); + } + LOG.warn(message); // Don't log the exception, people just get confused. + LOG.debug("Full DirectSQL callstack for debugging (not an error)", ex); + + if (!allowJdo || !DatabaseProduct.isRecoverableException(ex)) { + throw ExceptionHandler.newMetaException(ex); + } + + if (!isInTxn) { + JDOException rollbackEx = null; + try { + baseStore.rollbackTransaction(); + } catch (JDOException jex) { + rollbackEx = jex; + } + if (rollbackEx != null) { + // Datanucleus propagates some pointless exceptions and rolls back in the finally. + if (baseStore.isActiveTransaction()) { + throw rollbackEx; // Throw if the tx wasn't rolled back. + } + LOG.info("Ignoring exception, rollback succeeded: " + rollbackEx.getMessage()); + } + + start = doTrace ? System.nanoTime() : 0; + baseStore.openTransaction(); + if (table != null) { + TableStore store = baseStore.unwrap(TableStore.class); + table = store.getTable((TableName) argument, null, -1); + if (table == null) { + throw new NoSuchObjectException( + "Specified catalog.database.table does not exist : " + argument); + } + } + } else { + rollbackTransactionToSavePoint(savePoint); + start = doTrace ? System.nanoTime() : 0; + } + + directSqlErrors.inc(); + doUseDirectSql = false; + } + + private void setTransactionSavePoint(String savePoint) { + if (savePoint != null) { + ((JDOTransaction) pm.currentTransaction()).setSavepoint(savePoint); + } + } + + private void rollbackTransactionToSavePoint(String savePoint) { + if (savePoint != null) { + ((JDOTransaction) pm.currentTransaction()).rollbackToSavepoint(savePoint); + } + } + + private String generateShorterMessage(Exception ex) { + StringBuilder message = new StringBuilder( + "Falling back to ORM path due to direct SQL failure (this is not an error): "); + Throwable t = ex; + StackTraceElement[] prevStack = null; + while (t != null) { + message.append(t.getMessage()); + StackTraceElement[] stack = t.getStackTrace(); + int uniqueFrames = stack.length - 1; + if (prevStack != null) { + int n = prevStack.length - 1; + while (uniqueFrames >= 0 && n >= 0 && stack[uniqueFrames].equals(prevStack[n])) { + uniqueFrames--; n--; + } + } + for (int i = 0; i <= uniqueFrames; ++i) { + StackTraceElement ste = stack[i]; + message.append(" at ").append(ste); + if (ste.getMethodName().contains("getSqlResult") + && (ste.getFileName() == null || ste.getFileName().contains("ObjectStore"))) { + break; + } + } + prevStack = stack; + t = t.getCause(); + if (t != null) { + message.append(";\n Caused by: "); + } + } + return message.toString(); + } + + private T commit() { + success = baseStore.commitTransaction(); + if (doTrace) { + double time = ((System.nanoTime() - start) / 1000000.0); + String result = describeResult(); + String retrieveType = doUseDirectSql ? "SQL" : "ORM"; + + LOG.debug("{} retrieved using {} in {}ms", result, retrieveType, time); + } + return results; + } + + private void close() { + if (!success) { + baseStore.rollbackTransaction(); + } + } + + public Table getTable() { + return table; + } + + public MetaStoreDirectSql getDirectSql() { + return directSql; + } + + public List getPartitionFields() { + return partitionFields; + } + + public static long getDirectSqlErrors() { + return directSqlErrors.getCount(); + } +} \ No newline at end of file diff --git a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/GetListHelper.java similarity index 50% rename from packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java rename to standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/GetListHelper.java index a113c50efbff..fdd9e769cbed 100644 --- a/packaging/src/kubernetes/src/java/org/apache/hive/kubernetes/operator/dependent/condition/LlapEnabledCondition.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/GetListHelper.java @@ -16,26 +16,24 @@ * limitations under the License. */ -package org.apache.hive.kubernetes.operator.dependent.condition; +package org.apache.hadoop.hive.metastore.metastore; -import io.fabric8.kubernetes.api.model.HasMetadata; -import io.javaoperatorsdk.operator.api.reconciler.Context; -import io.javaoperatorsdk.operator.api.reconciler.dependent.DependentResource; -import io.javaoperatorsdk.operator.processing.dependent.workflow.Condition; -import org.apache.hive.kubernetes.operator.model.HiveCluster; +import java.util.List; -/** - * Activation condition for LLAP dependent resources. - * Returns true only when spec.llap.enabled is true. - */ -public class LlapEnabledCondition - implements Condition { +import org.apache.hadoop.hive.metastore.api.MetaException; + +public abstract class GetListHelper extends GetHelper> { + public GetListHelper(RawStoreAware rsa, A args) throws MetaException { + super(rsa, args, null); + } + + public GetListHelper(RawStoreAware rsa, + A args, List fields) throws MetaException { + super(rsa, args, fields); + } @Override - public boolean isMet( - DependentResource dependentResource, - HiveCluster primary, - Context context) { - return primary.getSpec().llap().isEnabled(); + protected String describeResult() { + return results.size() + " entries"; } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/MetaDescriptor.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/MetaDescriptor.java new file mode 100644 index 000000000000..74607e89c5b0 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/MetaDescriptor.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +@InterfaceStability.Evolving +@InterfaceAudience.LimitedPrivate("Hive developer") +@Retention(RetentionPolicy.RUNTIME) +public @interface MetaDescriptor { + String alias(); + Class defaultImpl(); + @Retention(RetentionPolicy.RUNTIME) + @Target(ElementType.METHOD) + @interface NoTransaction {} +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/PersistenceManagerProxy.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/PersistenceManagerProxy.java new file mode 100644 index 000000000000..84fbf513a5cc --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/PersistenceManagerProxy.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore; + +import javax.jdo.PersistenceManager; +import javax.jdo.Query; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.lang.reflect.UndeclaredThrowableException; +import java.util.List; +import java.util.Objects; + +import org.datanucleus.ExecutionContext; + +public class PersistenceManagerProxy implements InvocationHandler { + private final PersistenceManager target; + private final MethodHandle getExecutionContext; + private final List openedQueries; + + private PersistenceManagerProxy(PersistenceManager pm, List trackOpenedQueries) { + this.target = Objects.requireNonNull(pm); + this.openedQueries = Objects.requireNonNull(trackOpenedQueries); + MethodHandles.Lookup lookup = MethodHandles.lookup(); + try { + java.lang.invoke.MethodType type = java.lang.invoke.MethodType.methodType(ExecutionContext.class); + this.getExecutionContext = lookup.findVirtual(target.getClass(), "getExecutionContext", type); + } catch (Exception e) { + throw new RuntimeException("Method getExecutionContext not found", e); + } + } + + public static PersistenceManager getProxy(PersistenceManager pm, List trackOpenedQueries) { + return (PersistenceManager) Proxy.newProxyInstance(pm.getClass().getClassLoader(), + new Class[] {PersistenceManager.class, ExecutionContextReference.class}, + new PersistenceManagerProxy(pm, trackOpenedQueries)); + } + + @Override + public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { + try { + // Redirect if the interface method "getExecutionContext" is called + if (method.getName().equals("getExecutionContext")) { + MethodHandle boundGetExecutionContext = getExecutionContext.bindTo(target); + return args == null ? boundGetExecutionContext.invokeWithArguments() : + boundGetExecutionContext.invokeWithArguments(args); + } else if (method.getName().equals("newQuery")) { + Object result = method.invoke(target, args); + openedQueries.add((Query) result); + return result; + } + // Otherwise, proceed with the standard call + return method.invoke(target, args); + } catch (InvocationTargetException | UndeclaredThrowableException e) { + throw e.getCause(); + } + } + + // PersistenceManager doesn't provide a way to get the ExecutionContext + // if we create a proxy around the JDOPersistenceManager, which we use it + // to save a savepoint, or generate the primary key. + public interface ExecutionContextReference { + /** + * @return ExecutionContext the current JDOPersistenceManager holds + */ + ExecutionContext getExecutionContext(); + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/RawStoreAware.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/RawStoreAware.java new file mode 100644 index 000000000000..1d16764811cb --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/RawStoreAware.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore; + +import javax.jdo.PersistenceManager; + +import java.util.Objects; + +import org.apache.hadoop.hive.metastore.RawStore; + +public abstract class RawStoreAware { + protected RawStore baseStore; + protected PersistenceManager pm; + + public void setBaseStore(RawStore store) { + this.baseStore = Objects.requireNonNull(store); + } + + public void setPersistentManager(PersistenceManager manager) { + this.pm = Objects.requireNonNull(manager); + } + + public RawStore getBaseStore() { + return baseStore; + } + + public PersistenceManager getPersistentManager() { + return pm; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/TransactionHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/TransactionHandler.java new file mode 100644 index 000000000000..afde12e22314 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/TransactionHandler.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore; + +import javax.jdo.Query; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.lang.reflect.UndeclaredThrowableException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.metastore.RawStore; + +public record TransactionHandler (RawStore rs, T simpl, List openQueries) + implements InvocationHandler { + + @SuppressWarnings("unchecked") + public static T getProxy(Class iface, TransactionHandler handler) { + List interfaces = new ArrayList<>(); + interfaces.add(iface); + interfaces.addAll(Arrays.asList(iface.getInterfaces())); + return (T) Proxy.newProxyInstance(iface.getClassLoader(), + interfaces.toArray(new Class[0]), handler); + } + + @Override + public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { + boolean openTxn = method.getAnnotation(MetaDescriptor.NoTransaction.class) == null; + boolean success = false; + if (openTxn) { + rs.openTransaction(); + } + try { + Object result = method.invoke(simpl, args); + if (openTxn) { + success = rs.commitTransaction(); + } + return result; + } catch (InvocationTargetException | UndeclaredThrowableException e) { + throw e.getCause(); + } finally { + if (openTxn && !success) { + rs.rollbackTransaction(); + } + for (Query q : openQueries) { + q.closeAll(); + } + openQueries.clear(); + } + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/NotificationStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/NotificationStore.java new file mode 100644 index 000000000000..a1d1a1db680d --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/NotificationStore.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore.iface; + +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NotificationEvent; +import org.apache.hadoop.hive.metastore.api.NotificationEventRequest; +import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; +import org.apache.hadoop.hive.metastore.api.NotificationEventsCountRequest; +import org.apache.hadoop.hive.metastore.api.NotificationEventsCountResponse; +import org.apache.hadoop.hive.metastore.api.WriteEventInfo; +import org.apache.hadoop.hive.metastore.metastore.MetaDescriptor; +import org.apache.hadoop.hive.metastore.metastore.impl.NotificationStoreImpl; + +@MetaDescriptor(alias = "notification", defaultImpl = NotificationStoreImpl.class) +public interface NotificationStore { + /** + * Get the next notification event. + * @param rqst Request containing information on the last processed notification. + * @return list of notifications, sorted by eventId + */ + NotificationEventResponse getNextNotification(NotificationEventRequest rqst); + + + /** + * Add a notification entry. This should only be called from inside the metastore + * @param event the notification to add + * @throws MetaException error accessing RDBMS + */ + void addNotificationEvent(NotificationEvent event) throws MetaException; + + /** + * Remove older notification events. + * + * @param olderThan Remove any events older or equal to a given number of seconds + */ + void cleanNotificationEvents(int olderThan); + + /** + * Get the last issued notification event id. This is intended for use by the export command + * so that users can determine the state of the system at the point of the export, + * and determine which notification events happened before or after the export. + * @return + */ + CurrentNotificationEventId getCurrentNotificationEventId(); + + /** + * Get the number of events corresponding to given database with fromEventId. + * This is intended for use by the repl commands to track the progress of incremental dump. + * @return + */ + NotificationEventsCountResponse getNotificationEventsCount(NotificationEventsCountRequest rqst); + + /** + * Remove older notification events. + * @param olderThan Remove any events older or equal to a given number of seconds + */ + void cleanWriteNotificationEvents(int olderThan); + + /** + * Get all write events for a specific transaction . + * @param txnId get all the events done by this transaction + * @param dbName the name of db for which dump is being taken + * @param tableName the name of the table for which the dump is being taken + */ + List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException; +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/PrivilegeStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/PrivilegeStore.java new file mode 100644 index 000000000000..128288b7b74a --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/PrivilegeStore.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore.iface; + +import java.util.List; + +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeBag; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant; +import org.apache.hadoop.hive.metastore.metastore.MetaDescriptor; +import org.apache.hadoop.hive.metastore.metastore.impl.PrivilegeStoreImpl; +import org.apache.hadoop.hive.metastore.model.MDBPrivilege; +import org.apache.hadoop.hive.metastore.model.MDCPrivilege; + +@MetaDescriptor(alias = "privilege", defaultImpl = PrivilegeStoreImpl.class) +public interface PrivilegeStore { + boolean addRole(String roleName, String ownerName) + throws InvalidObjectException, MetaException, NoSuchObjectException; + + boolean removeRole(String roleName) throws MetaException, NoSuchObjectException; + + boolean grantRole(Role role, String userName, PrincipalType principalType, + String grantor, PrincipalType grantorType, boolean grantOption) + throws MetaException, NoSuchObjectException, InvalidObjectException; + + boolean revokeRole(Role role, String userName, PrincipalType principalType, + boolean grantOption) throws MetaException, NoSuchObjectException; + + PrincipalPrivilegeSet getUserPrivilegeSet(String userName, + List groupNames) throws InvalidObjectException, MetaException; + + /** + * Get privileges for a database for a user. + * @param catName catalog name + * @param dbName database name + * @param userName user name + * @param groupNames list of groups the user is in + * @return privileges for that user on indicated database + * @throws InvalidObjectException no such database + * @throws MetaException error accessing the RDBMS + */ + PrincipalPrivilegeSet getDBPrivilegeSet (String catName, String dbName, String userName, + List groupNames) throws InvalidObjectException, MetaException; + + /** + * Get privileges for a connector for a user. + * @param catName catalog name + * @param connectorName connector name + * @param userName user name + * @param groupNames list of groups the user is in + * @return privileges for that user on indicated connector + * @throws InvalidObjectException no such database + * @throws MetaException error accessing the RDBMS + */ + PrincipalPrivilegeSet getConnectorPrivilegeSet (String catName, String connectorName, String userName, + List groupNames) throws InvalidObjectException, MetaException; + + /** + * Get privileges for a table for a user. + * @param tableName table name + * @param userName user name + * @param groupNames list of groups the user is in + * @return privileges for that user on indicated table + * @throws InvalidObjectException no such table + * @throws MetaException error accessing the RDBMS + */ + PrincipalPrivilegeSet getTablePrivilegeSet (TableName tableName, + String userName, List groupNames) throws InvalidObjectException, MetaException; + + /** + * Get privileges for a partition for a user. + * @param tableName table name + * @param partition partition name + * @param userName user name + * @param groupNames list of groups the user is in + * @return privileges for that user on indicated partition + * @throws InvalidObjectException no such partition + * @throws MetaException error accessing the RDBMS + */ + PrincipalPrivilegeSet getPartitionPrivilegeSet (TableName tableName, + String partition, String userName, List groupNames) throws InvalidObjectException, MetaException; + + /** + * Get privileges for a column in a table or partition for a user. + * @param tableName table name + * @param partitionName partition name, or null for table level column permissions + * @param columnName column name + * @param userName user name + * @param groupNames list of groups the user is in + * @return privileges for that user on indicated column in the table or partition + * @throws InvalidObjectException no such table, partition, or column + * @throws MetaException error accessing the RDBMS + */ + PrincipalPrivilegeSet getColumnPrivilegeSet (TableName tableName, String partitionName, + String columnName, String userName, List groupNames) throws InvalidObjectException, MetaException; + + List listPrincipalGlobalGrants(String principalName, + PrincipalType principalType); + + /** + * For a given principal name and type, list the DB Grants + * @param principalName principal name + * @param principalType type + * @param catName catalog name + * @param dbName database name + * @return list of privileges for that principal on the specified database. + */ + List listPrincipalDBGrants(String principalName, + PrincipalType principalType, String catName, String dbName); + + /** + * For a given principal name and type, list the DC Grants + * @param principalName principal name + * @param principalType type + * @param dcName data connector name + * @return list of privileges for that principal on the specified data connector. + */ + List listPrincipalDCGrants(String principalName, + PrincipalType principalType, String dcName); + + /** + * For a given principal name and type, list the Table Grants + * @param principalName principal name + * @param principalType type + * @param tableName table name + * @return list of privileges for that principal on the specified database. + */ + List listAllTableGrants( + String principalName, PrincipalType principalType, TableName tableName); + + /** + * For a given principal name and type, list the Table Grants + * @param principalName principal name + * @param principalType type + * @param tableName table name + * @param partName partition name (not value) + * @return list of privileges for that principal on the specified database. + */ + List listPrincipalPartitionGrants( + String principalName, PrincipalType principalType, TableName tableName, + List partValues, String partName); + + /** + * For a given principal name and type, list the Table Grants + * @param principalName principal name + * @param principalType type + * @param tableName table name + * @param columnName column name + * @return list of privileges for that principal on the specified database. + */ + List listPrincipalTableColumnGrants( + String principalName, PrincipalType principalType, TableName tableName, String columnName); + + /** + * For a given principal name and type, list the Table Grants + * @param principalName principal name + * @param principalType type + * @param tableName table name + * @param partName partition name (not value) + * @param columnName column name + * @return list of privileges for that principal on the specified database. + */ + List listPrincipalPartitionColumnGrants( + String principalName, PrincipalType principalType, TableName tableName, + List partValues, String partName, String columnName); + + boolean grantPrivileges (PrivilegeBag privileges) + throws InvalidObjectException, MetaException, NoSuchObjectException; + + boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) + throws InvalidObjectException, MetaException, NoSuchObjectException; + + boolean refreshPrivileges(HiveObjectRef objToRefresh, String authorizer, PrivilegeBag grantPrivileges) + throws InvalidObjectException, MetaException, NoSuchObjectException; + + org.apache.hadoop.hive.metastore.api.Role getRole( + String roleName) throws NoSuchObjectException; + + List listRoleNames(); + + List listRoles(String principalName, + PrincipalType principalType); + + List listRolesWithGrants(String principalName, + PrincipalType principalType); + + + /** + * Get the role to principal grant mapping for given role + * @param roleName + * @return + */ + List listRoleMembers(String roleName); + + /** + * List all DB grants for a given principal. + * @param principalName principal name + * @param principalType type + * @return all DB grants for this principal + */ + List listPrincipalDBGrantsAll( + String principalName, PrincipalType principalType); + + /** + * List all DC grants for a given principal. + * @param principalName principal name + * @param principalType type + * @return all DC grants for this principal + */ + List listPrincipalDCGrantsAll( + String principalName, PrincipalType principalType); + + /** + * List all Table grants for a given principal + * @param principalName principal name + * @param principalType type + * @return all Table grants for this principal + */ + List listPrincipalTableGrantsAll( + String principalName, PrincipalType principalType); + + /** + * List all Partition grants for a given principal + * @param principalName principal name + * @param principalType type + * @return all Partition grants for this principal + */ + List listPrincipalPartitionGrantsAll( + String principalName, PrincipalType principalType); + + /** + * List all Table column grants for a given principal + * @param principalName principal name + * @param principalType type + * @return all Table column grants for this principal + */ + List listPrincipalTableColumnGrantsAll( + String principalName, PrincipalType principalType); + + /** + * List all Partition column grants for a given principal + * @param principalName principal name + * @param principalType type + * @return all Partition column grants for this principal + */ + List listPrincipalPartitionColumnGrantsAll( + String principalName, PrincipalType principalType); + + List listGlobalGrantsAll(); + + /** + * Find all the privileges for a given database. + * @param catName catalog name + * @param dbName database name + * @return list of all privileges. + */ + List listDBGrantsAll(String catName, String dbName); + + /** + * Find all the privileges for a given data connector. + * @param dcName data connector name + * @return list of all privileges. + */ + List listDCGrantsAll(String dcName); + + /** + * Find all of the privileges for a given column in a given partition. + * @param tableName table name + * @param partitionName partition name (not value) + * @param columnName column name + * @return all privileges on this column in this partition + */ + List listPartitionColumnGrantsAll( + TableName tableName, String partitionName, String columnName); + + /** + * Find all of the privileges for a given table + * @param tableName table name + * @return all privileges on this table + */ + List listTableGrantsAll(TableName tableName); + + /** + * Find all of the privileges for a given partition. + * @param tableName table name + * @param partitionName partition name (not value) + * @return all privileges on this partition + */ + List listPartitionGrantsAll( + TableName tableName, String partitionName); + + /** + * Find all of the privileges for a given column in a given table. + * @param tableName table name + * @param columnName column name + * @return all privileges on this column in this table + */ + List listTableColumnGrantsAll( + TableName tableName, String columnName); + + List listDatabaseGrants(String catName, String dbName, String authorizer); + + List listDataConnectorGrants(String dcName, String authorizer); +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/TableStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/TableStore.java new file mode 100644 index 000000000000..bf180f8766ea --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/iface/TableStore.java @@ -0,0 +1,477 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore.iface; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.CreationMetadata; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; +import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidPartitionException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.metastore.api.PartitionValuesResponse; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.client.builder.GetPartitionsArgs; +import org.apache.hadoop.hive.metastore.model.MPartition; +import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.metastore.MetaDescriptor; +import org.apache.hadoop.hive.metastore.metastore.impl.TableStoreImpl; +import org.apache.thrift.TException; + +@MetaDescriptor(alias = "table", defaultImpl = TableStoreImpl.class) +public interface TableStore { + + void createTable(Table tbl) throws InvalidObjectException, + MetaException; + /** + * Drop a table. + * @param table the table to be dropped + * @return true if the table was dropped + * @throws MetaException something went wrong, usually in the RDBMS or storage + * @throws NoSuchObjectException No table of this name + * @throws InvalidObjectException Don't think this is ever actually thrown + * @throws InvalidInputException Don't think this is ever actually thrown + */ + boolean dropTable(TableName table) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException; + + /** + * Drop all partitions from the table, and return the partition's location that not a child of baseLocationToNotShow, + * when the baseLocationToNotShow is not null. + * @param table the table to drop partitions from + * @param baseLocationToNotShow Partition locations which are child of this path are omitted + * @param message postgres of this drop + * @return list of partition locations outside baseLocationToNotShow + * @throws MetaException something went wrong, usually in the RDBMS or storage + * @throws InvalidInputException unable to drop all partitions due to the invalid input + */ + List dropAllPartitionsAndGetLocations(TableName table, String baseLocationToNotShow, AtomicReference message) + throws MetaException, InvalidInputException, NoSuchObjectException, InvalidObjectException; + + /** + * Get a table object. + * @param table the table to be got + * @param writeIdList string format of valid writeId transaction list + * @return table object, or null if no such table exists (wow it would be nice if we either + * consistently returned null or consistently threw NoSuchObjectException). + * @throws MetaException something went wrong in the RDBMS + */ + Table getTable(TableName table, + String writeIdList, long tableId) throws MetaException; + + /** + * Add a list of partitions to a table. + * @param table the table this partitions added to . + * @param parts list of partitions to be added. + * @return true if the operation succeeded. + * @throws InvalidObjectException never throws this AFAICT + * @throws MetaException the partitions don't belong to the indicated table or error writing to + * the RDBMS. + */ + boolean addPartitions(TableName table, List parts) + throws InvalidObjectException, MetaException; + + /** + * Get a partition. + * @param table table name. + * @param part_vals partition values for this table. + * @param writeIdList string format of valid writeId transaction list + * @return the partition. + * @throws MetaException error reading from RDBMS. + * @throws NoSuchObjectException no partition matching this specification exists. + */ + Partition getPartition(TableName table, + List part_vals, + String writeIdList) + throws MetaException, NoSuchObjectException; + + /** + * Get some or all partitions for a table. + * @param table table name + * @param args additional arguments for getting partitions + * @return list of partitions + * @throws MetaException error access the RDBMS. + * @throws NoSuchObjectException no such table exists + */ + List getPartitions(TableName table, + GetPartitionsArgs args) throws MetaException, NoSuchObjectException; + + /** + * Get the location for every partition of a given table. If a partition location is a child of + * baseLocationToNotShow then the partitionName is returned, but the only null location is + * returned. + * @param tableName table name. + * @param baseLocationToNotShow Partition locations which are child of this path are omitted, and + * null value returned instead. + * @param max The maximum number of partition locations returned, or -1 for all + * @return The map of the partitionName, location pairs + */ + Map getPartitionLocations(TableName tableName, String baseLocationToNotShow, int max); + + /** + * Alter a table. + * @param tableName name of the table. + * @param newTable New table object. Which parts of the table can be altered are + * implementation specific. + * @return + * @throws InvalidObjectException The new table object is invalid. + * @throws MetaException something went wrong, usually in the RDBMS or storage. + */ + Table alterTable(TableName tableName, Table newTable, + String queryValidWriteIds) + throws InvalidObjectException, MetaException; + + + boolean dropPartitions(TableName tableName, List partNames) + throws MetaException, NoSuchObjectException; + + /** + * Get table names that match a pattern. + * @param catName catalog to search in + * @param dbName database to search in + * @param pattern pattern to match + * @param tableType type of table to look for + * @param limit Maximum number of tables to return (undeterministic set) + * @return list of table names, if any + * @throws MetaException failure in querying the RDBMS + */ + List getTables(String catName, String dbName, String pattern, TableType tableType, int limit) + throws MetaException; + + /** + * @param catName catalog name + * @param dbname + * The name of the database from which to retrieve the tables + * @param tableNames + * The names of the tables to retrieve. + * @param projectionSpec + * Projection Specification containing the columns that need to be returned. + * @return A list of the tables retrievable from the database + * whose names are in the list tableNames. + * If there are duplicate names, only one instance of the table will be returned + * @throws MetaException failure in querying the RDBMS. + */ + List
getTableObjectsByName(String catName, String dbname, List tableNames, + GetProjectionsSpec projectionSpec, String tablePattern) throws MetaException, + UnknownDBException; + + /** + * Get list of materialized views in a database. + * @param catName catalog name + * @param dbName database name + * @return names of all materialized views in the database + * @throws MetaException error querying the RDBMS + * @throws NoSuchObjectException no such database + */ + List getMaterializedViewsForRewriting(String catName, String dbName) + throws MetaException, NoSuchObjectException; + + /** + + * @param catName catalog name to search in. Search must be confined to one catalog. + * @param dbNames databases to search in. + * @param tableNames names of tables to select. + * @param tableTypes types of tables to look for. + * @return list of matching table meta information. + * @throws MetaException failure in querying the RDBMS. + */ + List getTableMeta(String catName, String dbNames, String tableNames, + List tableTypes) throws MetaException; + + /** + * Gets a list of tables based on a filter string and filter type. + * @param catName catalog name + * @param dbName + * The name of the database from which you will retrieve the table names + * @param filter + * The filter string + * @param max_tables + * The maximum number of tables returned + * @return A list of table names that match the desired filter + * @throws MetaException + * @throws UnknownDBException + */ + List listTableNamesByFilter(String catName, String dbName, String filter, + short max_tables) throws MetaException, UnknownDBException; + + /** + * Get partition names with a filter. This is a portion of the SQL where clause. + * @param tableName table name + * @param args additional arguments for getting partition names + * @return list of partition names matching the criteria + * @throws MetaException Error accessing the RDBMS or processing the filter. + * @throws NoSuchObjectException no such table. + */ + List listPartitionNamesByFilter(TableName tableName, + GetPartitionsArgs args) throws MetaException, NoSuchObjectException; + + /** + * Get a partial or complete list of names for partitions of a table. + * @param tableName the table + * @param defaultPartName default partition name. + * @param exprBytes expression for filtering resulting list, serialized from ExprNodeDesc. + * @param order ordered the resulting list. + * @param maxParts maximum number of partitions to retrieve, -1 for all. + * @return list of partition names. + * @throws MetaException there was an error accessing the RDBMS + */ + List listPartitionNames(TableName tableName, + String defaultPartName, byte[] exprBytes, String order, + int maxParts) throws MetaException, NoSuchObjectException; + + /** + * Get partitions using an already parsed expression. + * @param tableName the table + * @param args additional arguments for getting partitions + * @return true if the result contains unknown partitions. + * @throws TException error executing the expression + */ + boolean getPartitionsByExpr(TableName tableName, + List result, GetPartitionsArgs args) + throws TException; + + /** + * Get partitions by name. + * @param tableName the table. + * @param args additional arguments for getting partitions + * @return list of matching partitions + * @throws MetaException error accessing the RDBMS. + * @throws NoSuchObjectException No such table. + */ + List getPartitionsByNames(TableName tableName, + GetPartitionsArgs args) throws MetaException, NoSuchObjectException; + + /** + * Alter a partition. + * @param tableName the table. + * @param part_vals partition values that describe the partition. + * @param new_part new partition object. This should be a complete copy of the old with + * changes values, not just the parts to update. + * @return + * @throws InvalidObjectException No such partition. + * @throws MetaException error accessing the RDBMS. + */ + Partition alterPartition(TableName tableName, List part_vals, + Partition new_part, String queryValidWriteIds) + throws InvalidObjectException, MetaException; + + /** + * Alter a set of partitions. + * @param tableName table name. + * @param part_vals_list list of list of partition values. Each outer list describes one + * partition (with its list of partition values). + * @param new_parts list of new partitions. The order must match the old partitions described in + * part_vals_list. Each of these should be a complete copy of the new + * partition, not just the pieces to update. + * @param writeId write id of the transaction for the table + * @param queryValidWriteIds valid write id list of the transaction on the current table + * @return + * @throws InvalidObjectException One of the indicated partitions does not exist. + * @throws MetaException error accessing the RDBMS. + */ + List alterPartitions(TableName tableName, + List> part_vals_list, List new_parts, long writeId, + String queryValidWriteIds) + throws InvalidObjectException, MetaException; + + + /** + * Get partitions with a filter. This is a portion of the SQL where clause. + * @param tableName table name + * @param args additional arguments for getting partitions + * @return list of partition objects matching the criteria + * @throws MetaException Error accessing the RDBMS or processing the filter. + * @throws NoSuchObjectException no such table. + */ + List getPartitionsByFilter( + TableName tableName, GetPartitionsArgs args) + throws MetaException, NoSuchObjectException; + + /** + * Generic Partition request API, providing different kinds of filtering and controlling output. + * + * @param table table for which whose partitions are requested + * * @param table table for which partitions are requested + * @param projectionSpec the projection spec from the GetPartitionsRequest + * This projection spec includes a fieldList which represents the fields which must be returned. + * Any other field which is not in the fieldList may be unset in the returned + * partitions (it is up to the implementation to decide whether it chooses to + * include or exclude such fields). E.g. setting the field list to sd.location, + * serdeInfo.name, sd.cols.name, sd.cols.type will + * return partitions which will have location field set in the storage descriptor. + * Also the serdeInf in the returned storage descriptor will only have name field + * set. This applies to multi-valued fields as well like sd.cols, so in the + * example above only name and type fields will be set for sd.cols. + * If the fieldList is empty or not present, all the fields will be set. + * Additionally, it also includes a includeParamKeyPattern and excludeParamKeyPattern + * which is a SQL-92 compliant regex pattern to include or exclude parameters. The paramKeyPattern + * supports _ or % wildcards which represent one character and 0 or more characters respectively + * @param filterSpec The filter spec from GetPartitionsRequest which includes the filter mode + * and the list of filter strings. The filter mode could be BY_NAMES, BY_VALUES or BY_EXPR + * to filter by partition names, partition values or expression. The filter strings are provided + * in the list of filters within the filterSpec. When more than one filters are provided in the list + * they are logically AND together + * @return List of matching partitions which which may be partially filled according to fieldList. + * @throws MetaException in case of errors + * @throws NoSuchObjectException when table isn't found + */ + List getPartitionSpecsByFilterAndProjection(Table table, + GetProjectionsSpec projectionSpec, GetPartitionsFilterSpec filterSpec) + throws MetaException, NoSuchObjectException; + + /** + * Fetch a partition along with privilege information for a particular user. + * @param tableName table name. + * @param partVals partition values + * @param user_name user to get privilege information for. + * @param group_names groups to get privilege information for. + * @return a partition + * @throws MetaException error accessing the RDBMS. + * @throws NoSuchObjectException no such partition exists + * @throws InvalidObjectException error fetching privilege information + */ + Partition getPartitionWithAuth(TableName tableName, + List partVals, String user_name, List group_names) + throws MetaException, NoSuchObjectException, InvalidObjectException; + + /** + * Lists partition names that match a given partial specification + * @param tableName + * The name of the table which has the partitions + * @param partVals + * A partial list of values for partitions in order of the table's partition keys. + * Entries can be empty if you only want to specify latter partitions. + * @param maxParts + * The maximum number of partitions to return + * @return A list of partition names that match the partial spec. + * @throws MetaException error accessing RDBMS + * @throws NoSuchObjectException No such table exists + */ + List listPartitionNamesPs(TableName tableName, List partVals, short maxParts) + throws MetaException, NoSuchObjectException; + + /** + * Lists partitions that match a given partial specification and sets their auth privileges. + * If userName and groupNames null, then no auth privileges are set. + * @param tableName + * The name of the table which has the partitions + * @param args additional arguments for getting partitions + * @return A list of partitions that match the partial spec. + * @throws MetaException error access RDBMS + * @throws NoSuchObjectException No such table exists + * @throws InvalidObjectException error access privilege information + */ + List listPartitionsPsWithAuth(TableName tableName, + GetPartitionsArgs args) throws MetaException, InvalidObjectException, NoSuchObjectException; + + /** + * Get the number of partitions that match a provided SQL filter. + * @param tableName table name. + * @param filter filter from Hive's SQL where clause + * @return number of matching partitions. + * @throws MetaException error accessing the RDBMS or executing the filter + * @throws NoSuchObjectException no such table + */ + int getNumPartitionsByFilter(TableName tableName, String filter) + throws MetaException, NoSuchObjectException; + + /** + * Get the number of partitions that match a given partial specification. + * @param tableName table name. + * @param partVals A partial list of values for partitions in order of the table's partition keys. + * Entries can be empty if you need to specify latter partitions. + * @return number of matching partitions. + * @throws MetaException error accessing the RDBMS or working with the specification. + * @throws NoSuchObjectException no such table. + */ + int getNumPartitionsByPs(TableName tableName, List partVals) + throws MetaException, NoSuchObjectException; + + /** + * Get a list of partition values as one big struct. + * @param tableName table name. + * @param cols partition key columns + * @param applyDistinct whether to apply distinct to the list + * @param filter filter to apply to the partition names + * @param ascending whether to put in ascending order + * @param order whether to order + * @param maxParts maximum number of parts to return, or -1 for all + * @return struct with all of the partition value information + * @throws MetaException error access the RDBMS + */ + PartitionValuesResponse listPartitionValues(TableName tableName, + List cols, boolean applyDistinct, String filter, boolean ascending, + List order, long maxParts) throws MetaException; + + /** + * Update creation metadata for a materialized view. + * @param tableName table name. + * @param cm new creation metadata + * @throws MetaException error accessing the RDBMS. + */ + void updateCreationMetadata(TableName tableName, CreationMetadata cm) + throws MetaException; + + /** + * Retrieve all materialized views. + * @return all materialized views in a catalog + * @throws MetaException error querying the RDBMS + */ + List
getAllMaterializedViewObjectsForRewriting(String catName) throws MetaException; + + MTable ensureGetMTable(TableName tableName) throws NoSuchObjectException; + + /** + * Checking if table is part of a materialized view. + * @param tableName table name + * @return list of materialized views that uses the table + */ + List isPartOfMaterializedView(TableName tableName); + + Table markPartitionForEvent(TableName tableName, Map partVals, PartitionEventType evtType) + throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException; + + boolean isPartitionMarkedForEvent(TableName tableName, Map partName, PartitionEventType evtType) + throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException; + + int getObjectCount(String fieldName, String objName); + + /** + * Updates a given table parameter with expected value. + * + * @return the number of rows updated + */ + long updateParameterWithExpectedValue(Table table, String key, String expectedValue, String newValue) + throws MetaException, NoSuchObjectException; + + MPartition ensureGetMPartition(TableName tableName, List partVals) throws MetaException; +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/NotificationStoreImpl.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/NotificationStoreImpl.java new file mode 100644 index 000000000000..db6c606f1826 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/NotificationStoreImpl.java @@ -0,0 +1,428 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore.impl; + +import com.google.common.collect.Lists; + +import javax.jdo.Query; +import javax.jdo.datastore.JDOConnection; + +import java.sql.Connection; +import java.sql.Statement; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.DatabaseProduct; +import org.apache.hadoop.hive.metastore.directsql.MetaStoreDirectSql; +import org.apache.hadoop.hive.metastore.PersistenceManagerProvider; +import org.apache.hadoop.hive.metastore.RawStore; +import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NotificationEvent; +import org.apache.hadoop.hive.metastore.api.NotificationEventRequest; +import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; +import org.apache.hadoop.hive.metastore.api.NotificationEventsCountRequest; +import org.apache.hadoop.hive.metastore.api.NotificationEventsCountResponse; +import org.apache.hadoop.hive.metastore.api.WriteEventInfo; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.metastore.RawStoreAware; +import org.apache.hadoop.hive.metastore.model.MNotificationLog; +import org.apache.hadoop.hive.metastore.model.MNotificationNextId; +import org.apache.hadoop.hive.metastore.model.MTxnWriteNotificationLog; +import org.apache.hadoop.hive.metastore.metastore.iface.NotificationStore; +import org.apache.hadoop.hive.metastore.tools.SQLGenerator; +import org.apache.hadoop.hive.metastore.utils.RetryingExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hive.metastore.ObjectStore.appendSimpleCondition; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +public class NotificationStoreImpl extends RawStoreAware implements NotificationStore { + private static final Logger LOG = LoggerFactory.getLogger(NotificationStoreImpl.class); + private Configuration conf; + private SQLGenerator sqlGenerator; + private MetaStoreDirectSql directSql; + + @Override + public void setBaseStore(RawStore store) { + super.setBaseStore(store); + this.conf = baseStore.getConf(); + DatabaseProduct dbType = PersistenceManagerProvider.getDatabaseProduct(); + this.sqlGenerator = new SQLGenerator(dbType, conf); + } + + @Override + public NotificationEventResponse getNextNotification(NotificationEventRequest rqst) { + NotificationEventResponse result = new NotificationEventResponse(); + result.setEvents(new ArrayList<>()); + long lastEvent = rqst.getLastEvent(); + List parameterVals = new ArrayList<>(); + parameterVals.add(lastEvent); + // filterBuilder parameter is used for construction of conditional clause in the select query + StringBuilder filterBuilder = new StringBuilder("eventId > para" + parameterVals.size()); + // parameterBuilder parameter is used for specify what types of parameters will go into the filterBuilder + StringBuilder parameterBuilder = new StringBuilder("java.lang.Long para" + parameterVals.size()); + /* A fully constructed query would like: + -> filterBuilder: eventId > para0 && catalogName == para1 && dbName == para2 && (tableName == para3 + || tableName == para4) && eventType != para5 + -> parameterBuilder: java.lang.Long para0, java.lang.String para1, java.lang.String para2 + , java.lang.String para3, java.lang.String para4, java.lang.String para5 + */ + if (rqst.isSetCatName()) { + parameterVals.add(normalizeIdentifier(rqst.getCatName())); + parameterBuilder.append(", java.lang.String para" + parameterVals.size()); + filterBuilder.append(" && catalogName == para" + parameterVals.size()); + } + if (rqst.isSetDbName()) { + parameterVals.add(normalizeIdentifier(rqst.getDbName())); + parameterBuilder.append(", java.lang.String para" + parameterVals.size()); + filterBuilder.append(" && dbName == para" + parameterVals.size()); + } + if (rqst.isSetTableNames() && !rqst.getTableNames().isEmpty()) { + filterBuilder.append(" && ("); + for (String tableName : rqst.getTableNames()) { + parameterVals.add(normalizeIdentifier(tableName)); + parameterBuilder.append(", java.lang.String para" + parameterVals.size()); + filterBuilder.append("tableName == para" + parameterVals.size()+ " || "); + } + filterBuilder.setLength(filterBuilder.length() - 4); // remove the last " || " + filterBuilder.append(") "); + } + if (rqst.isSetEventTypeList()) { + filterBuilder.append(" && ("); + for (String eventType : rqst.getEventTypeList()) { + parameterVals.add(eventType); + parameterBuilder.append(", java.lang.String para" + parameterVals.size()); + filterBuilder.append("eventType == para" + parameterVals.size() + " || "); + } + filterBuilder.setLength(filterBuilder.length() - 4); // remove the last " || " + filterBuilder.append(") "); + } + if (rqst.isSetEventTypeSkipList()) { + for (String eventType : rqst.getEventTypeSkipList()) { + parameterVals.add(eventType); + parameterBuilder.append(", java.lang.String para" + parameterVals.size()); + filterBuilder.append(" && eventType != para" + parameterVals.size()); + } + } + Query query = pm.newQuery(MNotificationLog.class, filterBuilder.toString()); + query.declareParameters(parameterBuilder.toString()); + query.setOrdering("eventId ascending"); + int maxEventResponse = MetastoreConf.getIntVar(baseStore.getConf(), MetastoreConf.ConfVars.METASTORE_MAX_EVENT_RESPONSE); + int maxEvents = (rqst.getMaxEvents() < maxEventResponse && rqst.getMaxEvents() > 0) ? rqst.getMaxEvents() : maxEventResponse; + query.setRange(0, maxEvents); + Collection events = + (Collection) query.executeWithArray(parameterVals.toArray(new Object[0])); + if (events == null) { + return result; + } + Iterator i = events.iterator(); + while (i.hasNext()) { + result.addToEvents(translateDbToThrift(i.next())); + } + return result; + } + + private NotificationEvent translateDbToThrift(MNotificationLog dbEvent) { + NotificationEvent event = new NotificationEvent(); + event.setEventId(dbEvent.getEventId()); + event.setEventTime(dbEvent.getEventTime()); + event.setEventType(dbEvent.getEventType()); + event.setCatName(dbEvent.getCatalogName()); + event.setDbName(dbEvent.getDbName()); + event.setTableName(dbEvent.getTableName()); + event.setMessage((dbEvent.getMessage())); + event.setMessageFormat(dbEvent.getMessageFormat()); + return event; + } + + private void lockNotificationSequenceForUpdate() throws MetaException { + int maxRetries = + MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.NOTIFICATION_SEQUENCE_LOCK_MAX_RETRIES); + long sleepInterval = MetastoreConf.getTimeVar(conf, + MetastoreConf.ConfVars.NOTIFICATION_SEQUENCE_LOCK_RETRY_SLEEP_INTERVAL, TimeUnit.MILLISECONDS); + if (sqlGenerator.getDbProduct().isDERBY()) { + // Derby doesn't allow FOR UPDATE to lock the row being selected (See https://db.apache + // .org/derby/docs/10.1/ref/rrefsqlj31783.html) . So lock the whole table. Since there's + // only one row in the table, this shouldn't cause any performance degradation. + new RetryingExecutor(maxRetries, () -> { + if (directSql == null) { + directSql = new MetaStoreDirectSql(pm, conf, ""); + } + directSql.lockDbTable("NOTIFICATION_SEQUENCE"); + return null; + }).commandName("lockNotificationSequenceForUpdate").sleepInterval(sleepInterval).run(); + } else { + String selectQuery = "select \"NEXT_EVENT_ID\" from \"NOTIFICATION_SEQUENCE\""; + String lockingQuery = sqlGenerator.addForUpdateClause(selectQuery); + new RetryingExecutor(maxRetries, () -> { + String s = sqlGenerator.getDbProduct().getPrepareTxnStmt(); + assert pm.currentTransaction().isActive(); + JDOConnection jdoConn = pm.getDataStoreConnection(); + Connection conn = (Connection) jdoConn.getNativeConnection(); + try (Statement statement = conn.createStatement()) { + if (s != null) { + statement.execute(s); + } + statement.execute(lockingQuery); + } finally { + jdoConn.close(); + } + return null; + }).commandName("lockNotificationSequenceForUpdate").sleepInterval(sleepInterval).run(); + } + } + + @Override + public void addNotificationEvent(NotificationEvent entry) throws MetaException { + pm.flush(); + lockNotificationSequenceForUpdate(); + Query query = pm.newQuery(MNotificationNextId.class); + Collection ids = (Collection) query.execute(); + MNotificationNextId mNotificationNextId = null; + boolean needToPersistId; + if (CollectionUtils.isEmpty(ids)) { + mNotificationNextId = new MNotificationNextId(1L); + needToPersistId = true; + } else { + mNotificationNextId = ids.iterator().next(); + needToPersistId = false; + } + entry.setEventId(mNotificationNextId.getNextEventId()); + mNotificationNextId.incrementEventId(); + if (needToPersistId) { + pm.makePersistent(mNotificationNextId); + } + pm.makePersistent(translateThriftToDb(entry)); + } + + private MNotificationLog translateThriftToDb(NotificationEvent entry) { + MNotificationLog dbEntry = new MNotificationLog(); + dbEntry.setEventId(entry.getEventId()); + dbEntry.setEventTime(entry.getEventTime()); + dbEntry.setEventType(entry.getEventType()); + dbEntry.setCatalogName(entry.isSetCatName() ? entry.getCatName() : getDefaultCatalog(baseStore.getConf())); + dbEntry.setDbName(entry.getDbName()); + dbEntry.setTableName(entry.getTableName()); + dbEntry.setMessage(entry.getMessage()); + dbEntry.setMessageFormat(entry.getMessageFormat()); + return dbEntry; + } + + @Override + public void cleanNotificationEvents(int olderThan) { + cleanOlderEvents(olderThan, MNotificationLog.class, "NotificationLog"); + } + + private void cleanOlderEvents(int olderThan, Class table, String tableName) { + final int eventBatchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.EVENT_CLEAN_MAX_EVENTS); + final long ageSec = olderThan; + final Instant now = Instant.now(); + final int tooOld = Math.toIntExact(now.getEpochSecond() - ageSec); + final Optional batchSize = (eventBatchSize > 0) ? Optional.of(eventBatchSize) : Optional.empty(); + + final long start = System.nanoTime(); + int deleteCount = doCleanNotificationEvents(tooOld, batchSize, table, tableName); + + if (deleteCount == 0) { + LOG.info("No {} events found to be cleaned with eventTime < {}", tableName, tooOld); + } else { + int batchCount = 0; + do { + batchCount = doCleanNotificationEvents(tooOld, batchSize, table, tableName); + deleteCount += batchCount; + } while (batchCount > 0); + } + + final long finish = System.nanoTime(); + LOG.info("Deleted {} {} events older than epoch:{} in {}ms", deleteCount, tableName, tooOld, + TimeUnit.NANOSECONDS.toMillis(finish - start)); + } + + private int doCleanNotificationEvents(final int ageSec, final Optional batchSize, Class tableClass, String tableName) { + int eventsCount = 0; + Query query = pm.newQuery(tableClass, "eventTime <= tooOld"); + String key = null; + query.declareParameters("java.lang.Integer tooOld"); + if (MNotificationLog.class.equals(tableClass)) { + key = "eventId"; + } else if (MTxnWriteNotificationLog.class.equals(tableClass)) { + key = "txnId"; + } + query.setOrdering(key + " ascending"); + if (batchSize.isPresent()) { + query.setRange(0, batchSize.get()); + } + + List events = (List) query.execute(ageSec); + if (CollectionUtils.isNotEmpty(events)) { + eventsCount = events.size(); + if (LOG.isDebugEnabled()) { + int minEventTime, maxEventTime; + long minId, maxId; + T firstNotification = events.get(0); + T lastNotification = events.get(eventsCount - 1); + if (MNotificationLog.class.equals(tableClass)) { + minEventTime = ((MNotificationLog) firstNotification).getEventTime(); + minId = ((MNotificationLog) firstNotification).getEventId(); + maxEventTime = ((MNotificationLog) lastNotification).getEventTime(); + maxId = ((MNotificationLog) lastNotification).getEventId(); + } else if (MTxnWriteNotificationLog.class.equals(tableClass)) { + minEventTime = ((MTxnWriteNotificationLog) firstNotification).getEventTime(); + minId = ((MTxnWriteNotificationLog) firstNotification).getTxnId(); + maxEventTime = ((MTxnWriteNotificationLog) lastNotification).getEventTime(); + maxId = ((MTxnWriteNotificationLog) lastNotification).getTxnId(); + } else { + throw new RuntimeException( + "Cleaning of older " + tableName + " events failed. " + "Reason: Unknown table encountered " + tableClass.getName()); + } + LOG.debug( + "Remove {} batch of {} events with eventTime < {}, min {}: {}, max {}: {}, min eventTime {}, max eventTime {}", + tableName, eventsCount, ageSec, key, minId, key, maxId, minEventTime, maxEventTime); + } + pm.deletePersistentAll(events); + } + return eventsCount; + } + + @Override + public CurrentNotificationEventId getCurrentNotificationEventId() { + Query query = pm.newQuery(MNotificationNextId.class); + Collection ids = (Collection) query.execute(); + long id = 0; + if (CollectionUtils.isNotEmpty(ids)) { + id = ids.iterator().next().getNextEventId() - 1; + } + return new CurrentNotificationEventId(id); + } + + @Override + public NotificationEventsCountResponse getNotificationEventsCount(NotificationEventsCountRequest rqst) { + Long result = 0L; + long fromEventId = rqst.getFromEventId(); + String inputDbName = rqst.getDbName(); + String catName = rqst.isSetCatName() ? rqst.getCatName() : getDefaultCatalog(conf); + long toEventId; + String paramSpecs; + List paramVals = new ArrayList<>(); + + // We store a catalog name in lower case in metastore and also use the same way everywhere in + // hive. + assert catName.equals(catName.toLowerCase()); + + // Build the query to count events, part by part + String queryStr = "select count(eventId) from " + MNotificationLog.class.getName(); + // count fromEventId onwards events + queryStr = queryStr + " where eventId > fromEventId"; + paramSpecs = "java.lang.Long fromEventId"; + paramVals.add(Long.valueOf(fromEventId)); + + // Input database name can be a database name or a *. In the first case we add a filter + // condition on dbName column, but not in the second case, since a * means all the + // databases. In case we support more elaborate database name patterns in future, we will + // have to apply a method similar to getNextNotification() method of MetaStoreClient. + if (!inputDbName.equals("*")) { + // dbName could be NULL in case of transaction related events, which also need to be + // counted. + queryStr = queryStr + " && (dbName == inputDbName || dbName == null)"; + paramSpecs = paramSpecs + ", java.lang.String inputDbName"; + // We store a database name in lower case in metastore. + paramVals.add(inputDbName.toLowerCase()); + } + + // catName could be NULL in case of transaction related events, which also need to be + // counted. + queryStr = queryStr + " && (catalogName == catName || catalogName == null)"; + paramSpecs = paramSpecs +", java.lang.String catName"; + paramVals.add(catName); + + // count events upto toEventId if specified + if (rqst.isSetToEventId()) { + toEventId = rqst.getToEventId(); + queryStr = queryStr + " && eventId <= toEventId"; + paramSpecs = paramSpecs + ", java.lang.Long toEventId"; + paramVals.add(Long.valueOf(toEventId)); + } + // Specify list of table names in the query string and parameter types + if (rqst.isSetTableNames() && !rqst.getTableNames().isEmpty()) { + queryStr = queryStr + " && ("; + for (String tableName : rqst.getTableNames()) { + paramVals.add(tableName.toLowerCase()); + queryStr = queryStr + "tableName == tableName" + paramVals.size() + " || "; + paramSpecs = paramSpecs + ", java.lang.String tableName" + paramVals.size(); + } + queryStr = queryStr.substring(0, queryStr.length() - 4); // remove the last " || " + queryStr += ")"; + } + + Query query = pm.newQuery(queryStr); + query.declareParameters(paramSpecs); + result = (Long) query.executeWithArray(paramVals.toArray()); + // Cap the event count by limit if specified. + long eventCount = result.longValue(); + if (rqst.isSetLimit() && eventCount > rqst.getLimit()) { + eventCount = rqst.getLimit(); + } + return new NotificationEventsCountResponse(eventCount); + } + + @Override + public void cleanWriteNotificationEvents(int olderThan) { + cleanOlderEvents(olderThan, MTxnWriteNotificationLog.class, "TxnWriteNotificationLog"); + } + + @Override + public List getAllWriteEventInfo(long txnId, String dbName, String tableName) throws MetaException { + List writeEventInfoList = null; + List parameterVals = new ArrayList<>(); + StringBuilder filterBuilder = new StringBuilder(" txnId == " + Long.toString(txnId)); + if (dbName != null && !"*".equals(dbName)) { // * means get all database, so no need to add filter + appendSimpleCondition(filterBuilder, "database", new String[]{dbName}, parameterVals); + } + if (tableName != null && !"*".equals(tableName)) { + appendSimpleCondition(filterBuilder, "table", new String[]{tableName}, parameterVals); + } + Query query = pm.newQuery(MTxnWriteNotificationLog.class, filterBuilder.toString()); + query.setOrdering("database,table ascending"); + List mplans = (List) query.executeWithArray( + parameterVals.toArray(new String[0])); + pm.retrieveAll(mplans); + if (mplans != null && mplans.size() > 0) { + writeEventInfoList = Lists.newArrayList(); + for (MTxnWriteNotificationLog mplan : mplans) { + WriteEventInfo writeEventInfo = new WriteEventInfo(mplan.getWriteId(), mplan.getDatabase(), + mplan.getTable(), mplan.getFiles()); + writeEventInfo.setPartition(mplan.getPartition()); + writeEventInfo.setPartitionObj(mplan.getPartObject()); + writeEventInfo.setTableObj(mplan.getTableObject()); + writeEventInfoList.add(writeEventInfo); + } + } + return writeEventInfoList; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/PrivilegeStoreImpl.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/PrivilegeStoreImpl.java new file mode 100644 index 000000000000..ea142b5ec980 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/PrivilegeStoreImpl.java @@ -0,0 +1,2373 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore.impl; + +import com.google.common.base.Preconditions; + +import javax.jdo.Query; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.RawStore; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.HiveObjectType; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeBag; +import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.metastore.RawStoreAware; +import org.apache.hadoop.hive.metastore.metastore.iface.TableStore; +import org.apache.hadoop.hive.metastore.model.MDBPrivilege; +import org.apache.hadoop.hive.metastore.model.MDCPrivilege; +import org.apache.hadoop.hive.metastore.model.MDataConnector; +import org.apache.hadoop.hive.metastore.model.MDatabase; +import org.apache.hadoop.hive.metastore.model.MGlobalPrivilege; +import org.apache.hadoop.hive.metastore.model.MPartition; +import org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege; +import org.apache.hadoop.hive.metastore.model.MPartitionPrivilege; +import org.apache.hadoop.hive.metastore.model.MRole; +import org.apache.hadoop.hive.metastore.model.MRoleMap; +import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; +import org.apache.hadoop.hive.metastore.model.MTablePrivilege; +import org.apache.hadoop.hive.metastore.metastore.GetHelper; +import org.apache.hadoop.hive.metastore.metastore.GetListHelper; +import org.apache.hadoop.hive.metastore.metastore.iface.PrivilegeStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hive.metastore.ObjectStore.convert; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +public class PrivilegeStoreImpl extends RawStoreAware implements PrivilegeStore { + private static final Logger LOG = LoggerFactory.getLogger(PrivilegeStoreImpl.class); + private Configuration conf; + + @Override + public boolean addRole(String roleName, String ownerName) + throws InvalidObjectException, MetaException, NoSuchObjectException { + MRole nameCheck = this.getMRole(roleName); + if (nameCheck != null) { + throw new InvalidObjectException("Role " + roleName + " already exists."); + } + int now = (int) (System.currentTimeMillis() / 1000); + MRole mRole = new MRole(roleName, now, ownerName); + pm.makePersistent(mRole); + return true; + } + + @Override + public boolean grantRole(Role role, String userName, + PrincipalType principalType, String grantor, PrincipalType grantorType, + boolean grantOption) throws MetaException, NoSuchObjectException,InvalidObjectException { + MRoleMap roleMap = null; + try { + roleMap = this.getMSecurityUserRoleMap(userName, principalType, role + .getRoleName()); + } catch (Exception e) { + } + if (roleMap != null) { + throw new InvalidObjectException("Principal " + userName + + " already has the role " + role.getRoleName()); + } + if (principalType == PrincipalType.ROLE) { + validateRole(userName); + } + MRole mRole = getMRole(role.getRoleName()); + long now = System.currentTimeMillis()/1000; + MRoleMap roleMember = new MRoleMap(userName, principalType.toString(), + mRole, (int) now, grantor, grantorType.toString(), grantOption); + pm.makePersistent(roleMember); + return true; + } + + /** + * Verify that role with given name exists, if not throw exception + */ + private void validateRole(String roleName) throws NoSuchObjectException { + // if grantee is a role, check if it exists + MRole granteeRole = getMRole(roleName); + if (granteeRole == null) { + throw new NoSuchObjectException("Role " + roleName + " does not exist"); + } + } + + @Override + public boolean revokeRole(Role role, String userName, PrincipalType principalType, + boolean grantOption) throws MetaException, NoSuchObjectException { + MRoleMap roleMember = getMSecurityUserRoleMap(userName, principalType, + role.getRoleName()); + if (grantOption) { + // Revoke with grant option - only remove the grant option but keep the role. + if (roleMember.getGrantOption()) { + roleMember.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with role " + role.getRoleName()); + } + } else { + // No grant option in revoke, remove the whole role. + pm.deletePersistent(roleMember); + } + return true; + } + + private MRoleMap getMSecurityUserRoleMap(String userName, PrincipalType principalType, + String roleName) { + MRoleMap mRoleMember = null; + Query query = + pm.newQuery(MRoleMap.class, + "principalName == t1 && principalType == t2 && role.roleName == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + query.setUnique(true); + mRoleMember = (MRoleMap) query.executeWithArray(userName, principalType.toString(), roleName); + pm.retrieve(mRoleMember);; + return mRoleMember; + } + + @Override + public boolean removeRole(String roleName) throws MetaException, + NoSuchObjectException { + try { + MRole mRol = getMRole(roleName); + pm.retrieve(mRol); + if (mRol != null) { + // first remove all the membership, the membership that this role has + // been granted + List roleMap = listMRoleMembers(mRol.getRoleName()); + if (CollectionUtils.isNotEmpty(roleMap)) { + pm.deletePersistentAll(roleMap); + } + List roleMember = listMSecurityPrincipalMembershipRole(mRol + .getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(roleMember)) { + pm.deletePersistentAll(roleMember); + } + + // then remove all the grants + List userGrants = listPrincipalMGlobalGrants( + mRol.getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(userGrants)) { + pm.deletePersistentAll(userGrants); + } + + List dbGrants = listPrincipalAllDBGrant(mRol + .getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(dbGrants)) { + pm.deletePersistentAll(dbGrants); + } + + List dcGrants = listPrincipalAllDCGrant(mRol + .getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(dcGrants)) { + pm.deletePersistentAll(dcGrants); + } + + List tabPartGrants = listPrincipalAllTableGrants( + mRol.getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(tabPartGrants)) { + pm.deletePersistentAll(tabPartGrants); + } + + List partGrants = listPrincipalAllPartitionGrants( + mRol.getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(partGrants)) { + pm.deletePersistentAll(partGrants); + } + + List tblColumnGrants = listPrincipalAllTableColumnGrants( + mRol.getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(tblColumnGrants)) { + pm.deletePersistentAll(tblColumnGrants); + } + + List partColumnGrants = listPrincipalAllPartitionColumnGrants( + mRol.getRoleName(), PrincipalType.ROLE); + if (CollectionUtils.isNotEmpty(partColumnGrants)) { + pm.deletePersistentAll(partColumnGrants); + } + + // finally remove the role + pm.deletePersistent(mRol); + } + return true; + } catch (Exception e) { + throw new MetaException(e.getMessage()); + } + } + + /** + * Get all the roles in the role hierarchy that this user and groupNames belongs to + */ + private Set listAllRolesInHierarchy(String userName, + List groupNames) { + List ret = new ArrayList<>(); + if(userName != null) { + ret.addAll(listMRoles(userName, PrincipalType.USER)); + } + if (groupNames != null) { + for (String groupName: groupNames) { + ret.addAll(listMRoles(groupName, PrincipalType.GROUP)); + } + } + // get names of these roles and its ancestors + Set roleNames = new HashSet<>(); + getAllRoleAncestors(roleNames, ret); + return roleNames; + } + + /** + * Add role names of parentRoles and its parents to processedRoles + */ + private void getAllRoleAncestors(Set processedRoleNames, List parentRoles) { + for (MRoleMap parentRole : parentRoles) { + String parentRoleName = parentRole.getRole().getRoleName(); + if (!processedRoleNames.contains(parentRoleName)) { + // unprocessed role: get its parents, add it to processed, and call this + // function recursively + List nextParentRoles = listMRoles(parentRoleName, PrincipalType.ROLE); + processedRoleNames.add(parentRoleName); + getAllRoleAncestors(processedRoleNames, nextParentRoles); + } + } + } + + public List listMRoles(String principalName, + PrincipalType principalType) { + Query query = pm.newQuery(MRoleMap.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + query.setUnique(false); + List mRoles = + (List) query.executeWithArray(principalName, principalType.toString()); + pm.retrieveAll(mRoles);; + List mRoleMember = new ArrayList<>(mRoles); + + if (principalType == PrincipalType.USER) { + // All users belong to public role implicitly, add that role + // TODO MS-SPLIT Change this back to HMSHandler.PUBLIC once HiveMetaStore has moved to + // stand-alone metastore. + //MRole publicRole = new MRole(HMSHandler.PUBLIC, 0, HMSHandler.PUBLIC); + MRole publicRole = new MRole("public", 0, "public"); + mRoleMember.add(new MRoleMap(principalName, principalType.toString(), publicRole, 0, null, + null, false)); + } + + return mRoleMember; + } + + @Override + public List listRoles(String principalName, PrincipalType principalType) { + List result = new ArrayList<>(); + List roleMaps = listMRoles(principalName, principalType); + if (roleMaps != null) { + for (MRoleMap roleMap : roleMaps) { + MRole mrole = roleMap.getRole(); + Role role = new Role(mrole.getRoleName(), mrole.getCreateTime(), mrole.getOwnerName()); + result.add(role); + } + } + return result; + } + + @Override + public List listRolesWithGrants(String principalName, + PrincipalType principalType) { + List result = new ArrayList<>(); + List roleMaps = listMRoles(principalName, principalType); + if (roleMaps != null) { + for (MRoleMap roleMap : roleMaps) { + RolePrincipalGrant rolePrinGrant = new RolePrincipalGrant( + roleMap.getRole().getRoleName(), + roleMap.getPrincipalName(), + PrincipalType.valueOf(roleMap.getPrincipalType()), + roleMap.getGrantOption(), + roleMap.getAddTime(), + roleMap.getGrantor(), + // no grantor type for public role, hence the null check + roleMap.getGrantorType() == null ? null + : PrincipalType.valueOf(roleMap.getGrantorType()) + ); + result.add(rolePrinGrant); + } + } + return result; + } + + private List listMSecurityPrincipalMembershipRole(final String roleName, + final PrincipalType principalType) throws Exception { + LOG.debug("Executing listMSecurityPrincipalMembershipRole"); + Query query = pm.newQuery(MRoleMap.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + final List mRoleMemebership = (List) query.execute(roleName, principalType.toString()); + + LOG.debug("Retrieving all objects for listMSecurityPrincipalMembershipRole"); + pm.retrieveAll(mRoleMemebership); + LOG.debug("Done retrieving all objects for listMSecurityPrincipalMembershipRole: {}", mRoleMemebership); + + return Collections.unmodifiableList(new ArrayList<>(mRoleMemebership)); + } + + @Override + public Role getRole(String roleName) throws NoSuchObjectException { + MRole mRole = this.getMRole(roleName); + if (mRole == null) { + throw new NoSuchObjectException(roleName + " role can not be found."); + } + return new Role(mRole.getRoleName(), mRole.getCreateTime(), mRole + .getOwnerName()); + } + + private MRole getMRole(String roleName) { + MRole mrole = null; + Query query = pm.newQuery(MRole.class, "roleName == t1"); + query.declareParameters("java.lang.String t1"); + query.setUnique(true); + mrole = (MRole) query.execute(roleName); + pm.retrieve(mrole); + return mrole; + } + + @Override + public List listRoleNames() { + LOG.debug("Executing listAllRoleNames"); + Query query = pm.newQuery("select roleName from org.apache.hadoop.hive.metastore.model.MRole"); + query.setResult("roleName"); + Collection names = (Collection) query.execute(); + List roleNames = new ArrayList<>(); + for (Iterator i = names.iterator(); i.hasNext();) { + roleNames.add((String) i.next()); + } + return roleNames; + } + + @Override + public PrincipalPrivilegeSet getUserPrivilegeSet(String userName, + List groupNames) throws InvalidObjectException, MetaException { + PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); + if (userName != null) { + List user = this.listPrincipalMGlobalGrants(userName, PrincipalType.USER); + if(CollectionUtils.isNotEmpty(user)) { + Map> userPriv = new HashMap<>(); + List grantInfos = new ArrayList<>(user.size()); + for (int i = 0; i < user.size(); i++) { + MGlobalPrivilege item = user.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + userPriv.put(userName, grantInfos); + ret.setUserPrivileges(userPriv); + } + } + if (CollectionUtils.isNotEmpty(groupNames)) { + Map> groupPriv = new HashMap<>(); + for(String groupName: groupNames) { + List group = + this.listPrincipalMGlobalGrants(groupName, PrincipalType.GROUP); + if(CollectionUtils.isNotEmpty(group)) { + List grantInfos = new ArrayList<>(group.size()); + for (int i = 0; i < group.size(); i++) { + MGlobalPrivilege item = group.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + groupPriv.put(groupName, grantInfos); + } + } + ret.setGroupPrivileges(groupPriv); + } + return ret; + } + + private List getDBPrivilege(String catName, String dbName, + String principalName, PrincipalType principalType) { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + + if (principalName != null) { + List userNameDbPriv = this.listPrincipalMDBGrants( + principalName, principalType, catName, dbName); + if (CollectionUtils.isNotEmpty(userNameDbPriv)) { + List grantInfos = new ArrayList<>( + userNameDbPriv.size()); + for (int i = 0; i < userNameDbPriv.size(); i++) { + MDBPrivilege item = userNameDbPriv.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + return grantInfos; + } + } + return Collections.emptyList(); + } + + + @Override + public PrincipalPrivilegeSet getDBPrivilegeSet(String catName, String dbName, + String userName, List groupNames) throws InvalidObjectException, + MetaException { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + + PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); + if (userName != null) { + Map> dbUserPriv = new HashMap<>(); + dbUserPriv.put(userName, getDBPrivilege(catName, dbName, userName, + PrincipalType.USER)); + ret.setUserPrivileges(dbUserPriv); + } + if (CollectionUtils.isNotEmpty(groupNames)) { + Map> dbGroupPriv = new HashMap<>(); + for (String groupName : groupNames) { + dbGroupPriv.put(groupName, getDBPrivilege(catName, dbName, groupName, + PrincipalType.GROUP)); + } + ret.setGroupPrivileges(dbGroupPriv); + } + Set roleNames = listAllRolesInHierarchy(userName, groupNames); + if (CollectionUtils.isNotEmpty(roleNames)) { + Map> dbRolePriv = new HashMap<>(); + for (String roleName : roleNames) { + dbRolePriv + .put(roleName, getDBPrivilege(catName, dbName, roleName, PrincipalType.ROLE)); + } + ret.setRolePrivileges(dbRolePriv); + } + return ret; + } + + private List getConnectorPrivilege(String catName, String connectorName, + String principalName, PrincipalType principalType) { + + // normalize string name + catName = normalizeIdentifier(catName); + connectorName = normalizeIdentifier(connectorName); + + if (principalName != null) { + // get all data connector granted privilege + List userNameDcPriv = this.listPrincipalMDCGrants( + principalName, principalType, connectorName); + + // populate and return grantInfos + if (CollectionUtils.isNotEmpty(userNameDcPriv)) { + List grantInfos = new ArrayList<>( + userNameDcPriv.size()); + for (int i = 0; i < userNameDcPriv.size(); i++) { + MDCPrivilege item = userNameDcPriv.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + return grantInfos; + } + } + + // return empty list if no principalName + return Collections.emptyList(); + } + + @Override + public PrincipalPrivilegeSet getConnectorPrivilegeSet (String catName, String connectorName, + String userName, List groupNames) throws InvalidObjectException, + MetaException { + catName = normalizeIdentifier(catName); + connectorName = normalizeIdentifier(connectorName); + + PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); + // get user privileges + if (userName != null) { + Map> connectorUserPriv = new HashMap<>(); + connectorUserPriv.put(userName, getConnectorPrivilege(catName, connectorName, userName, + PrincipalType.USER)); + ret.setUserPrivileges(connectorUserPriv); + } + + // get group privileges + if (CollectionUtils.isNotEmpty(groupNames)) { + Map> dbGroupPriv = new HashMap<>(); + for (String groupName : groupNames) { + dbGroupPriv.put(groupName, getConnectorPrivilege(catName, connectorName, groupName, + PrincipalType.GROUP)); + } + ret.setGroupPrivileges(dbGroupPriv); + } + + // get role privileges + Set roleNames = listAllRolesInHierarchy(userName, groupNames); + if (CollectionUtils.isNotEmpty(roleNames)) { + Map> dbRolePriv = new HashMap<>(); + for (String roleName : roleNames) { + dbRolePriv.put(roleName, getConnectorPrivilege(catName, connectorName, roleName, + PrincipalType.ROLE)); + } + ret.setRolePrivileges(dbRolePriv); + } + return ret; + } + + @Override + public PrincipalPrivilegeSet getPartitionPrivilegeSet(TableName table, String partition, String userName, + List groupNames) throws InvalidObjectException, MetaException { + PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); + String tableName = normalizeIdentifier(table.getTable()); + String dbName = normalizeIdentifier(table.getDb()); + String catName = normalizeIdentifier(table.getCat()); + if (userName != null) { + Map> partUserPriv = new HashMap<>(); + partUserPriv.put(userName, getPartitionPrivilege(catName, dbName, + tableName, partition, userName, PrincipalType.USER)); + ret.setUserPrivileges(partUserPriv); + } + if (CollectionUtils.isNotEmpty(groupNames)) { + Map> partGroupPriv = new HashMap<>(); + for (String groupName : groupNames) { + partGroupPriv.put(groupName, getPartitionPrivilege(catName, dbName, tableName, + partition, groupName, PrincipalType.GROUP)); + } + ret.setGroupPrivileges(partGroupPriv); + } + Set roleNames = listAllRolesInHierarchy(userName, groupNames); + if (CollectionUtils.isNotEmpty(roleNames)) { + Map> partRolePriv = new HashMap<>(); + for (String roleName : roleNames) { + partRolePriv.put(roleName, getPartitionPrivilege(catName, dbName, tableName, + partition, roleName, PrincipalType.ROLE)); + } + ret.setRolePrivileges(partRolePriv); + } + return ret; + } + + @Override + public PrincipalPrivilegeSet getTablePrivilegeSet(TableName table, String userName, List groupNames) + throws InvalidObjectException, MetaException { + boolean commited = false; + PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); + String tableName = normalizeIdentifier(table.getTable()); + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + + if (userName != null) { + Map> tableUserPriv = new HashMap<>(); + tableUserPriv.put(userName, getTablePrivilege(catName, dbName, + tableName, userName, PrincipalType.USER)); + ret.setUserPrivileges(tableUserPriv); + } + if (CollectionUtils.isNotEmpty(groupNames)) { + Map> tableGroupPriv = new HashMap<>(); + for (String groupName : groupNames) { + tableGroupPriv.put(groupName, getTablePrivilege(catName, dbName, tableName, + groupName, PrincipalType.GROUP)); + } + ret.setGroupPrivileges(tableGroupPriv); + } + Set roleNames = listAllRolesInHierarchy(userName, groupNames); + if (CollectionUtils.isNotEmpty(roleNames)) { + Map> tableRolePriv = new HashMap<>(); + for (String roleName : roleNames) { + tableRolePriv.put(roleName, getTablePrivilege(catName, dbName, tableName, + roleName, PrincipalType.ROLE)); + } + ret.setRolePrivileges(tableRolePriv); + } + return ret; + } + + @Override + public PrincipalPrivilegeSet getColumnPrivilegeSet(TableName table, String partitionName, String columnName, + String userName, List groupNames) throws InvalidObjectException, + MetaException { + String tableName = normalizeIdentifier(table.getTable()); + String dbName = normalizeIdentifier(table.getDb()); + columnName = normalizeIdentifier(columnName); + String catName = normalizeIdentifier(table.getCat()); + + PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); + if (userName != null) { + Map> columnUserPriv = new HashMap<>(); + columnUserPriv.put(userName, getColumnPrivilege(catName, dbName, tableName, + columnName, partitionName, userName, PrincipalType.USER)); + ret.setUserPrivileges(columnUserPriv); + } + if (CollectionUtils.isNotEmpty(groupNames)) { + Map> columnGroupPriv = new HashMap<>(); + for (String groupName : groupNames) { + columnGroupPriv.put(groupName, getColumnPrivilege(catName, dbName, tableName, + columnName, partitionName, groupName, PrincipalType.GROUP)); + } + ret.setGroupPrivileges(columnGroupPriv); + } + Set roleNames = listAllRolesInHierarchy(userName, groupNames); + if (CollectionUtils.isNotEmpty(roleNames)) { + Map> columnRolePriv = new HashMap<>(); + for (String roleName : roleNames) { + columnRolePriv.put(roleName, getColumnPrivilege(catName, dbName, tableName, + columnName, partitionName, roleName, PrincipalType.ROLE)); + } + ret.setRolePrivileges(columnRolePriv); + } + return ret; + } + + private List getPartitionPrivilege(String catName, String dbName, + String tableName, String partName, String principalName, + PrincipalType principalType) { + + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + + if (principalName != null) { + List userNameTabPartPriv = this + .listPrincipalMPartitionGrants(principalName, principalType, + catName, dbName, tableName, partName); + if (CollectionUtils.isNotEmpty(userNameTabPartPriv)) { + List grantInfos = new ArrayList<>( + userNameTabPartPriv.size()); + for (int i = 0; i < userNameTabPartPriv.size(); i++) { + MPartitionPrivilege item = userNameTabPartPriv.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), + getPrincipalTypeFromStr(item.getGrantorType()), item.getGrantOption())); + + } + return grantInfos; + } + } + return new ArrayList<>(0); + } + + public static PrincipalType getPrincipalTypeFromStr(String str) { + return str == null ? null : PrincipalType.valueOf(str); + } + + private List getTablePrivilege(String catName, String dbName, + String tableName, String principalName, PrincipalType principalType) { + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + + if (principalName != null) { + List userNameTabPartPriv = this + .listAllMTableGrants(principalName, principalType, + catName, dbName, tableName); + if (CollectionUtils.isNotEmpty(userNameTabPartPriv)) { + List grantInfos = new ArrayList<>( + userNameTabPartPriv.size()); + for (int i = 0; i < userNameTabPartPriv.size(); i++) { + MTablePrivilege item = userNameTabPartPriv.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + return grantInfos; + } + } + return Collections.emptyList(); + } + + private List getColumnPrivilege(String catName, String dbName, + String tableName, String columnName, String partitionName, + String principalName, PrincipalType principalType) { + + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + columnName = normalizeIdentifier(columnName); + catName = normalizeIdentifier(catName); + + if (partitionName == null) { + List userNameColumnPriv = this + .listPrincipalMTableColumnGrants(principalName, principalType, + catName, dbName, tableName, columnName); + if (CollectionUtils.isNotEmpty(userNameColumnPriv)) { + List grantInfos = new ArrayList<>( + userNameColumnPriv.size()); + for (int i = 0; i < userNameColumnPriv.size(); i++) { + MTableColumnPrivilege item = userNameColumnPriv.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + return grantInfos; + } + } else { + List userNameColumnPriv = this + .listPrincipalMPartitionColumnGrants(principalName, + principalType, catName, dbName, tableName, partitionName, columnName); + if (CollectionUtils.isNotEmpty(userNameColumnPriv)) { + List grantInfos = new ArrayList<>( + userNameColumnPriv.size()); + for (int i = 0; i < userNameColumnPriv.size(); i++) { + MPartitionColumnPrivilege item = userNameColumnPriv.get(i); + grantInfos.add(new PrivilegeGrantInfo(item.getPrivilege(), item + .getCreateTime(), item.getGrantor(), getPrincipalTypeFromStr(item + .getGrantorType()), item.getGrantOption())); + } + return grantInfos; + } + } + return Collections.emptyList(); + } + + @Override + public boolean grantPrivileges(PrivilegeBag privileges) throws InvalidObjectException, + MetaException, NoSuchObjectException { + int now = (int) (System.currentTimeMillis() / 1000); + List persistentObjs = new ArrayList<>(); + + List privilegeList = privileges.getPrivileges(); + + if (CollectionUtils.isNotEmpty(privilegeList)) { + Iterator privIter = privilegeList.iterator(); + Set privSet = new HashSet<>(); + while (privIter.hasNext()) { + HiveObjectPrivilege privDef = privIter.next(); + HiveObjectRef hiveObject = privDef.getHiveObject(); + String privilegeStr = privDef.getGrantInfo().getPrivilege(); + String[] privs = privilegeStr.split(","); + String userName = privDef.getPrincipalName(); + String authorizer = privDef.getAuthorizer(); + PrincipalType principalType = privDef.getPrincipalType(); + String grantor = privDef.getGrantInfo().getGrantor(); + String grantorType = privDef.getGrantInfo().getGrantorType().toString(); + boolean grantOption = privDef.getGrantInfo().isGrantOption(); + privSet.clear(); + + if(principalType == PrincipalType.ROLE){ + validateRole(userName); + } + + String catName = hiveObject.isSetCatName() ? hiveObject.getCatName() : + getDefaultCatalog(conf); + if (hiveObject.getObjectType() == HiveObjectType.GLOBAL) { + List globalPrivs = this + .listPrincipalMGlobalGrants(userName, principalType, authorizer); + for (MGlobalPrivilege priv : globalPrivs) { + if (priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted by " + grantor); + } + MGlobalPrivilege mGlobalPrivs = new MGlobalPrivilege(userName, + principalType.toString(), privilege, now, grantor, grantorType, grantOption, + authorizer); + persistentObjs.add(mGlobalPrivs); + } + } else if (hiveObject.getObjectType() == HiveObjectType.DATABASE) { + MDatabase dbObj = baseStore.ensureGetMDatabase(catName, hiveObject.getDbName()); + List dbPrivs = this.listPrincipalMDBGrants( + userName, principalType, catName, hiveObject.getDbName(), authorizer); + for (MDBPrivilege priv : dbPrivs) { + if (priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted on database " + + hiveObject.getDbName() + " by " + grantor); + } + MDBPrivilege mDb = new MDBPrivilege(userName, principalType + .toString(), dbObj, privilege, now, grantor, grantorType, grantOption, authorizer); + persistentObjs.add(mDb); + } + } else if (hiveObject.getObjectType() == HiveObjectType.DATACONNECTOR) { + MDataConnector dcObj = convert(baseStore.getDataConnector(hiveObject.getObjectName())); + List dcPrivs = this.listPrincipalMDCGrants(userName, principalType, + hiveObject.getObjectName(), authorizer); + for (MDCPrivilege priv : dcPrivs) { + if (priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted on data connector " + + hiveObject.getDbName() + " by " + grantor); + } + MDCPrivilege mDc = new MDCPrivilege(userName, principalType + .toString(), dcObj, privilege, now, grantor, grantorType, grantOption, authorizer); + persistentObjs.add(mDc); + } + } else if (hiveObject.getObjectType() == HiveObjectType.TABLE) { + MTable tblObj = baseStore.ensureGetMTable(catName, hiveObject.getDbName(), hiveObject + .getObjectName()); + if (tblObj != null) { + List tablePrivs = this + .listAllMTableGrants(userName, principalType, + catName, hiveObject.getDbName(), hiveObject.getObjectName(), authorizer); + for (MTablePrivilege priv : tablePrivs) { + if (priv.getGrantor() != null + && priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted on table [" + + hiveObject.getDbName() + "," + + hiveObject.getObjectName() + "] by " + grantor); + } + MTablePrivilege mTab = new MTablePrivilege( + userName, principalType.toString(), tblObj, + privilege, now, grantor, grantorType, grantOption, authorizer); + persistentObjs.add(mTab); + } + } + } else if (hiveObject.getObjectType() == HiveObjectType.PARTITION) { + MPartition partObj = baseStore.ensureGetMPartition(new TableName(catName, hiveObject.getDbName(), + hiveObject.getObjectName()), hiveObject.getPartValues()); + String partName = null; + if (partObj != null) { + partName = partObj.getPartitionName(); + List partPrivs = this + .listPrincipalMPartitionGrants(userName, + principalType, catName, hiveObject.getDbName(), hiveObject + .getObjectName(), partObj.getPartitionName(), authorizer); + for (MPartitionPrivilege priv : partPrivs) { + if (priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted on partition [" + + hiveObject.getDbName() + "," + + hiveObject.getObjectName() + "," + + partName + "] by " + grantor); + } + MPartitionPrivilege mTab = new MPartitionPrivilege(userName, + principalType.toString(), partObj, privilege, now, grantor, + grantorType, grantOption, authorizer); + persistentObjs.add(mTab); + } + } + } else if (hiveObject.getObjectType() == HiveObjectType.COLUMN) { + MTable tblObj = baseStore.ensureGetMTable(catName, hiveObject.getDbName(), hiveObject + .getObjectName()); + if (tblObj != null) { + if (hiveObject.getPartValues() != null) { + MPartition partObj = null; + List colPrivs = null; + partObj = baseStore.ensureGetMPartition(new TableName(catName, hiveObject.getDbName(), hiveObject + .getObjectName()), hiveObject.getPartValues()); + if (partObj == null) { + continue; + } + colPrivs = this.listPrincipalMPartitionColumnGrants( + userName, principalType, catName, hiveObject.getDbName(), hiveObject + .getObjectName(), partObj.getPartitionName(), + hiveObject.getColumnName(), authorizer); + + for (MPartitionColumnPrivilege priv : colPrivs) { + if (priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted on column " + + hiveObject.getColumnName() + " [" + + hiveObject.getDbName() + "," + + hiveObject.getObjectName() + "," + + partObj.getPartitionName() + "] by " + grantor); + } + MPartitionColumnPrivilege mCol = new MPartitionColumnPrivilege(userName, + principalType.toString(), partObj, hiveObject + .getColumnName(), privilege, now, grantor, grantorType, + grantOption, authorizer); + persistentObjs.add(mCol); + } + + } else { + List colPrivs = null; + colPrivs = this.listPrincipalMTableColumnGrants( + userName, principalType, catName, hiveObject.getDbName(), hiveObject + .getObjectName(), hiveObject.getColumnName(), authorizer); + + for (MTableColumnPrivilege priv : colPrivs) { + if (priv.getGrantor().equalsIgnoreCase(grantor)) { + privSet.add(priv.getPrivilege()); + } + } + for (String privilege : privs) { + if (privSet.contains(privilege)) { + throw new InvalidObjectException(privilege + + " is already granted on column " + + hiveObject.getColumnName() + " [" + + hiveObject.getDbName() + "," + + hiveObject.getObjectName() + "] by " + grantor); + } + MTableColumnPrivilege mCol = new MTableColumnPrivilege(userName, + principalType.toString(), tblObj, hiveObject + .getColumnName(), privilege, now, grantor, grantorType, + grantOption, authorizer); + persistentObjs.add(mCol); + } + } + } + } + } + } + if (CollectionUtils.isNotEmpty(persistentObjs)) { + pm.makePersistentAll(persistentObjs); + } + return true; + } + + @Override + public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) + throws InvalidObjectException, MetaException, NoSuchObjectException { + List persistentObjs = new ArrayList<>(); + + List privilegeList = privileges.getPrivileges(); + + if (CollectionUtils.isNotEmpty(privilegeList)) { + Iterator privIter = privilegeList.iterator(); + + while (privIter.hasNext()) { + HiveObjectPrivilege privDef = privIter.next(); + HiveObjectRef hiveObject = privDef.getHiveObject(); + String privilegeStr = privDef.getGrantInfo().getPrivilege(); + if (privilegeStr == null || privilegeStr.trim().equals("")) { + continue; + } + String[] privs = privilegeStr.split(","); + String userName = privDef.getPrincipalName(); + PrincipalType principalType = privDef.getPrincipalType(); + + String catName = hiveObject.isSetCatName() ? hiveObject.getCatName() : + getDefaultCatalog(conf); + if (hiveObject.getObjectType() == HiveObjectType.GLOBAL) { + List mSecUser = this.listPrincipalMGlobalGrants( + userName, principalType); + boolean found = false; + for (String privilege : privs) { + for (MGlobalPrivilege userGrant : mSecUser) { + String userGrantPrivs = userGrant.getPrivilege(); + if (privilege.equals(userGrantPrivs)) { + found = true; + if (grantOption) { + if (userGrant.getGrantOption()) { + userGrant.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(userGrant); + break; + } + } + if (!found) { + throw new InvalidObjectException( + "No user grant found for privileges " + privilege); + } + } + + } else if (hiveObject.getObjectType() == HiveObjectType.DATABASE) { + String db = hiveObject.getDbName(); + boolean found = false; + List dbGrants = this.listPrincipalMDBGrants( + userName, principalType, catName, db); + for (String privilege : privs) { + for (MDBPrivilege dbGrant : dbGrants) { + String dbGrantPriv = dbGrant.getPrivilege(); + if (privilege.equals(dbGrantPriv)) { + found = true; + if (grantOption) { + if (dbGrant.getGrantOption()) { + dbGrant.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(dbGrant); + break; + } + } + if (!found) { + throw new InvalidObjectException( + "No database grant found for privileges " + privilege + + " on database " + db); + } + } + } else if (hiveObject.getObjectType() == HiveObjectType.DATACONNECTOR) { + String dc = hiveObject.getObjectName(); + boolean found = false; + List dcGrants = this.listPrincipalMDCGrants( + userName, principalType, dc); + for (String privilege : privs) { + for (MDCPrivilege dcGrant : dcGrants) { + String dcGrantPriv = dcGrant.getPrivilege(); + if (privilege.equals(dcGrantPriv)) { + found = true; + if (grantOption) { + if (dcGrant.getGrantOption()) { + dcGrant.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(dcGrant); + break; + } + } + if (!found) { + throw new InvalidObjectException( + "No dataconnector grant found for privileges " + privilege + + " on data connector " + dc); + } + } + } else if (hiveObject.getObjectType() == HiveObjectType.TABLE) { + boolean found = false; + List tableGrants = this + .listAllMTableGrants(userName, principalType, + catName, hiveObject.getDbName(), hiveObject.getObjectName()); + for (String privilege : privs) { + for (MTablePrivilege tabGrant : tableGrants) { + String tableGrantPriv = tabGrant.getPrivilege(); + if (privilege.equalsIgnoreCase(tableGrantPriv)) { + found = true; + if (grantOption) { + if (tabGrant.getGrantOption()) { + tabGrant.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(tabGrant); + break; + } + } + if (!found) { + throw new InvalidObjectException("No grant (" + privilege + + ") found " + " on table " + hiveObject.getObjectName() + + ", database is " + hiveObject.getDbName()); + } + } + } else if (hiveObject.getObjectType() == HiveObjectType.PARTITION) { + boolean found = false; + Table tabObj = baseStore.unwrap(TableStore.class).getTable( + new TableName(catName, hiveObject.getDbName(), hiveObject.getObjectName()), null, -1); + String partName = null; + if (hiveObject.getPartValues() != null) { + partName = Warehouse.makePartName(tabObj.getPartitionKeys(), hiveObject.getPartValues()); + } + List partitionGrants = this + .listPrincipalMPartitionGrants(userName, principalType, + catName, hiveObject.getDbName(), hiveObject.getObjectName(), partName); + for (String privilege : privs) { + for (MPartitionPrivilege partGrant : partitionGrants) { + String partPriv = partGrant.getPrivilege(); + if (partPriv.equalsIgnoreCase(privilege)) { + found = true; + if (grantOption) { + if (partGrant.getGrantOption()) { + partGrant.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(partGrant); + break; + } + } + if (!found) { + throw new InvalidObjectException("No grant (" + privilege + + ") found " + " on table " + tabObj.getTableName() + + ", partition is " + partName + ", database is " + tabObj.getDbName()); + } + } + } else if (hiveObject.getObjectType() == HiveObjectType.COLUMN) { + Table tabObj = baseStore.unwrap(TableStore.class).getTable( + new TableName(catName, hiveObject.getDbName(), hiveObject.getObjectName()), null, -1); + String partName = null; + if (hiveObject.getPartValues() != null) { + partName = Warehouse.makePartName(tabObj.getPartitionKeys(), hiveObject.getPartValues()); + } + + if (partName != null) { + List mSecCol = listPrincipalMPartitionColumnGrants( + userName, principalType, catName, hiveObject.getDbName(), hiveObject + .getObjectName(), partName, hiveObject.getColumnName()); + boolean found = false; + for (String privilege : privs) { + for (MPartitionColumnPrivilege col : mSecCol) { + String colPriv = col.getPrivilege(); + if (colPriv.equalsIgnoreCase(privilege)) { + found = true; + if (grantOption) { + if (col.getGrantOption()) { + col.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(col); + break; + } + } + if (!found) { + throw new InvalidObjectException("No grant (" + privilege + + ") found " + " on table " + tabObj.getTableName() + + ", partition is " + partName + ", column name = " + + hiveObject.getColumnName() + ", database is " + + tabObj.getDbName()); + } + } + } else { + List mSecCol = listPrincipalMTableColumnGrants( + userName, principalType, catName, hiveObject.getDbName(), hiveObject + .getObjectName(), hiveObject.getColumnName()); + boolean found = false; + for (String privilege : privs) { + for (MTableColumnPrivilege col : mSecCol) { + String colPriv = col.getPrivilege(); + if (colPriv.equalsIgnoreCase(privilege)) { + found = true; + if (grantOption) { + if (col.getGrantOption()) { + col.setGrantOption(false); + } else { + throw new MetaException("User " + userName + + " does not have grant option with privilege " + privilege); + } + } + persistentObjs.add(col); + break; + } + } + if (!found) { + throw new InvalidObjectException("No grant (" + privilege + + ") found " + " on table " + tabObj.getTableName() + + ", column name = " + + hiveObject.getColumnName() + ", database is " + + tabObj.getDbName()); + } + } + } + + } + } + } + + if (CollectionUtils.isNotEmpty(persistentObjs)) { + if (grantOption) { + // If grant option specified, only update the privilege, don't remove it. + // Grant option has already been removed from the privileges in the section above + } else { + pm.deletePersistentAll(persistentObjs); + } + } + return true; + } + + class PrivilegeWithoutCreateTimeComparator implements Comparator { + @Override + public int compare(HiveObjectPrivilege o1, HiveObjectPrivilege o2) { + int createTime1 = o1.getGrantInfo().getCreateTime(); + int createTime2 = o2.getGrantInfo().getCreateTime(); + o1.getGrantInfo().setCreateTime(0); + o2.getGrantInfo().setCreateTime(0); + int result = o1.compareTo(o2); + o1.getGrantInfo().setCreateTime(createTime1); + o2.getGrantInfo().setCreateTime(createTime2); + return result; + } + } + + @Override + public boolean refreshPrivileges(HiveObjectRef objToRefresh, String authorizer, PrivilegeBag grantPrivileges) + throws InvalidObjectException, MetaException, NoSuchObjectException { + Set revokePrivilegeSet + = new TreeSet<>(new PrivilegeWithoutCreateTimeComparator()); + Set grantPrivilegeSet + = new TreeSet<>(new PrivilegeWithoutCreateTimeComparator()); + + List grants = null; + String catName = objToRefresh.isSetCatName() ? objToRefresh.getCatName() : + getDefaultCatalog(conf); + switch (objToRefresh.getObjectType()) { + case DATABASE: + try { + grants = this.listDBGrantsAll(catName, objToRefresh.getDbName(), authorizer); + } catch (Exception e) { + throw new MetaException(e.getMessage()); + } + break; + case DATACONNECTOR: + try { + grants = this.listDCGrantsAll(objToRefresh.getObjectName(), authorizer); + } catch (Exception e) { + throw new MetaException(e.getMessage()); + } + break; + case TABLE: + grants = listTableGrantsAll(new TableName(catName, objToRefresh.getDbName(), objToRefresh.getObjectName()), authorizer); + break; + case COLUMN: + Preconditions.checkArgument(objToRefresh.getColumnName()==null, "columnName must be null"); + grants = getTableAllColumnGrants(catName, objToRefresh.getDbName(), + objToRefresh.getObjectName(), authorizer); + break; + default: + throw new MetaException("Unexpected object type " + objToRefresh.getObjectType()); + } + revokePrivilegeSet.addAll(grants); + + // Optimize revoke/grant list, remove the overlapping + if (grantPrivileges.getPrivileges() != null) { + for (HiveObjectPrivilege grantPrivilege : grantPrivileges.getPrivileges()) { + if (revokePrivilegeSet.contains(grantPrivilege)) { + revokePrivilegeSet.remove(grantPrivilege); + } else { + grantPrivilegeSet.add(grantPrivilege); + } + } + } + if (!revokePrivilegeSet.isEmpty()) { + LOG.debug("Found " + revokePrivilegeSet.size() + " new revoke privileges to be synced."); + PrivilegeBag remainingRevokePrivileges = new PrivilegeBag(); + for (HiveObjectPrivilege revokePrivilege : revokePrivilegeSet) { + remainingRevokePrivileges.addToPrivileges(revokePrivilege); + } + revokePrivileges(remainingRevokePrivileges, false); + } else { + LOG.debug("No new revoke privileges are required to be synced."); + } + if (!grantPrivilegeSet.isEmpty()) { + LOG.debug("Found " + grantPrivilegeSet.size() + " new grant privileges to be synced."); + PrivilegeBag remainingGrantPrivileges = new PrivilegeBag(); + for (HiveObjectPrivilege grantPrivilege : grantPrivilegeSet) { + remainingGrantPrivileges.addToPrivileges(grantPrivilege); + } + grantPrivileges(remainingGrantPrivileges); + } else { + LOG.debug("No new grant privileges are required to be synced."); + } + return true; + } + + private List getTableAllColumnGrants(String catalog, String db, + String tableName, String authorizer) + throws MetaException, NoSuchObjectException { + String catName = normalizeIdentifier(catalog); + String dbName = normalizeIdentifier(db); + String tblName = normalizeIdentifier(tableName); + return new GetListHelper(this, new TableName(catName, dbName, tableName)) { + + @Override + protected String describeResult() { + return "Table column privileges."; + } + + @Override + protected List getSqlResult() + throws MetaException { + return getDirectSql().getTableAllColumnGrants(catName, dbName, tblName, authorizer); + } + + @Override + protected List getJdoResult() { + return convertTableCols(listTableAllColumnGrants(catName, dbName, tblName, authorizer)); + } + }.run(false); + } + + public List listMRoleMembers(String roleName) { + Query query = null; + List mRoleMemeberList = new ArrayList<>(); + query = pm.newQuery(MRoleMap.class, "role.roleName == t1"); + query.declareParameters("java.lang.String t1"); + query.setUnique(false); + List mRoles = (List) query.execute(roleName); + pm.retrieveAll(mRoles); + mRoleMemeberList.addAll(mRoles); + return mRoleMemeberList; + } + + @Override + public List listRoleMembers(String roleName) { + List roleMaps = listMRoleMembers(roleName); + List rolePrinGrantList = new ArrayList<>(); + + if (roleMaps != null) { + for (MRoleMap roleMap : roleMaps) { + RolePrincipalGrant rolePrinGrant = new RolePrincipalGrant( + roleMap.getRole().getRoleName(), + roleMap.getPrincipalName(), + PrincipalType.valueOf(roleMap.getPrincipalType()), + roleMap.getGrantOption(), + roleMap.getAddTime(), + roleMap.getGrantor(), + // no grantor type for public role, hence the null check + roleMap.getGrantorType() == null ? null + : PrincipalType.valueOf(roleMap.getGrantorType()) + ); + rolePrinGrantList.add(rolePrinGrant); + + } + } + return rolePrinGrantList; + } + + private List listPrincipalMGlobalGrants(String principalName, + PrincipalType principalType) { + return listPrincipalMGlobalGrants(principalName, principalType, null); + } + + private List listPrincipalMGlobalGrants(String principalName, + PrincipalType principalType, String authorizer) { + Query query; + List userNameDbPriv = new ArrayList<>(); + List mPrivs = null; + if (principalName != null) { + if (authorizer != null) { + query = pm.newQuery(MGlobalPrivilege.class, "principalName == t1 && principalType == t2 " + + "&& authorizer == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + mPrivs = (List) query + .executeWithArray(principalName, principalType.toString(), authorizer); + } else { + query = pm.newQuery(MGlobalPrivilege.class, "principalName == t1 && principalType == t2 "); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mPrivs = (List) query + .executeWithArray(principalName, principalType.toString()); + } + pm.retrieveAll(mPrivs); + } + if (mPrivs != null) { + userNameDbPriv.addAll(mPrivs); + } + return userNameDbPriv; + } + + @Override + public List listPrincipalGlobalGrants(String principalName, + PrincipalType principalType) { + List mUsers = + listPrincipalMGlobalGrants(principalName, principalType); + if (mUsers.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mUsers.size(); i++) { + MGlobalPrivilege sUsr = mUsers.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.GLOBAL, null, null, null, null); + HiveObjectPrivilege secUser = new HiveObjectPrivilege( + objectRef, sUsr.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sUsr.getPrivilege(), sUsr + .getCreateTime(), sUsr.getGrantor(), PrincipalType + .valueOf(sUsr.getGrantorType()), sUsr.getGrantOption()), + sUsr.getAuthorizer()); + result.add(secUser); + } + return result; + } + + @Override + public List listGlobalGrantsAll() { + Query query = pm.newQuery(MGlobalPrivilege.class); + List userNameDbPriv = (List) query.execute(); + pm.retrieveAll(userNameDbPriv); + return convertGlobal(userNameDbPriv); + } + + private List convertGlobal(List privs) { + List result = new ArrayList<>(); + for (MGlobalPrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.GLOBAL, null, null, null, null); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalMDBGrants(String principalName, + PrincipalType principalType, String catName, String dbName) { + return listPrincipalMDBGrants(principalName, principalType, catName, dbName, null); + } + + private List listPrincipalMDBGrants(String principalName, + PrincipalType principalType, String catName, String dbName, String authorizer) { + Query query = null; + List mSecurityDBList = new ArrayList<>(); + dbName = normalizeIdentifier(dbName); + List mPrivs; + if (authorizer != null) { + query = pm.newQuery(MDBPrivilege.class, + "principalName == t1 && principalType == t2 && database.name == t3 && " + + "database.catalogName == t4 && authorizer == t5"); + query.declareParameters( + "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4, " + + "java.lang.String t5"); + mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), + dbName, catName, authorizer); + } else { + query = pm.newQuery(MDBPrivilege.class, + "principalName == t1 && principalType == t2 && database.name == t3 && database.catalogName == t4"); + query.declareParameters( + "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4"); + mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), + dbName, catName); + } + pm.retrieveAll(mPrivs); + mSecurityDBList.addAll(mPrivs); + return mSecurityDBList; + } + + private List listPrincipalMDCGrants(String principalName, + PrincipalType principalType, String dcName) { + return listPrincipalMDCGrants(principalName, principalType, dcName, null); + } + + private List listPrincipalMDCGrants(String principalName, + PrincipalType principalType, String dcName, String authorizer) { + Query query = null; + List mSecurityDCList = new ArrayList<>(); + dcName = normalizeIdentifier(dcName); + List mPrivs; + if (authorizer != null) { + query = pm.newQuery(MDCPrivilege.class, + "principalName == t1 && principalType == t2 && dataConnector.name == t3 && " + + "authorizer == t4"); + query.declareParameters( + "java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4"); + mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), + dcName, authorizer); + } else { + query = pm.newQuery(MDCPrivilege.class, + "principalName == t1 && principalType == t2 && dataConnector.name == t3"); + query.declareParameters( + "java.lang.String t1, java.lang.String t2, java.lang.String t3"); + mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), dcName); + } + pm.retrieveAll(mPrivs); + mSecurityDCList.addAll(mPrivs); + return mSecurityDCList; + } + + @Override + public List listPrincipalDBGrants(String principalName, + PrincipalType principalType, + String catName, String dbName) { + List mDbs = listPrincipalMDBGrants(principalName, principalType, catName, dbName); + if (mDbs.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mDbs.size(); i++) { + MDBPrivilege sDB = mDbs.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.DATABASE, dbName, null, null, null); + objectRef.setCatName(catName); + HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, + sDB.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sDB.getPrivilege(), sDB + .getCreateTime(), sDB.getGrantor(), PrincipalType + .valueOf(sDB.getGrantorType()), sDB.getGrantOption()), sDB.getAuthorizer()); + result.add(secObj); + } + return result; + } + + @Override + public List listPrincipalDBGrantsAll(String principalName, PrincipalType principalType) { + return convertDB(listPrincipalAllDBGrant(principalName, principalType)); + } + + @Override + public List listDBGrantsAll(String catName, String dbName) { + return listDBGrantsAll(catName, dbName, null); + } + + private List listDBGrantsAll(String catName, String dbName, String authorizer) { + return convertDB(listDatabaseGrants(catName, dbName, authorizer)); + } + + private List convertDB(List privs) { + List result = new ArrayList<>(); + for (MDBPrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + String database = priv.getDatabase().getName(); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.DATABASE, database, + null, null, null); + objectRef.setCatName(priv.getDatabase().getCatalogName()); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalAllDBGrant(String principalName, PrincipalType principalType) { + final List mSecurityDBList; + + LOG.debug("Executing listPrincipalAllDBGrant"); + Query query; + if (principalName != null && principalType != null) { + query = pm.newQuery(MDBPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mSecurityDBList = (List) query.execute(principalName, principalType.toString()); + pm.retrieveAll(mSecurityDBList); + LOG.debug("Done retrieving all objects for listPrincipalAllDBGrant: {}", mSecurityDBList); + return Collections.unmodifiableList(new ArrayList<>(mSecurityDBList)); + } else { + query = pm.newQuery(MDBPrivilege.class); + mSecurityDBList = (List) query.execute(); + pm.retrieveAll(mSecurityDBList); + LOG.debug("Done retrieving all objects for listPrincipalAllDBGrant: {}", mSecurityDBList); + return Collections.unmodifiableList(new ArrayList<>(mSecurityDBList)); + } + } + + @Override + public List listPrincipalDCGrants(String principalName, + PrincipalType principalType, + String dcName) { + List mDcs = listPrincipalMDCGrants(principalName, principalType, dcName); + if (mDcs.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mDcs.size(); i++) { + MDCPrivilege sDC = mDcs.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.DATACONNECTOR, null, dcName, null, null); + HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, + sDC.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sDC.getPrivilege(), sDC + .getCreateTime(), sDC.getGrantor(), PrincipalType + .valueOf(sDC.getGrantorType()), sDC.getGrantOption()), sDC.getAuthorizer()); + result.add(secObj); + } + return result; + } + + @Override + public List listPrincipalDCGrantsAll(String principalName, PrincipalType principalType) { + return convertDC(listPrincipalAllDCGrant(principalName, principalType)); + } + + @Override + public List listDCGrantsAll(String dcName) { + return listDCGrantsAll(dcName, null); + } + + private List listDCGrantsAll(String dcName, String authorizer) { + return convertDC(listDataConnectorGrants(dcName, authorizer)); + } + + private List convertDC(List privs) { + List result = new ArrayList<>(); + for (MDCPrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + String dataConnectorName = priv.getDataConnector().getName(); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.DATACONNECTOR, null, + dataConnectorName, null, null); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalAllDCGrant(String principalName, PrincipalType principalType) { + final List mSecurityDCList; + + LOG.debug("Executing listPrincipalAllDCGrant"); + + if (principalName != null && principalType != null) { + Query query = pm.newQuery(MDCPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mSecurityDCList = (List) query.execute(principalName, principalType.toString()); + pm.retrieveAll(mSecurityDCList); + LOG.debug("Done retrieving all objects for listPrincipalAllDCGrant: {}", mSecurityDCList); + return Collections.unmodifiableList(new ArrayList<>(mSecurityDCList)); + } else { + Query query = pm.newQuery(MDCPrivilege.class); + mSecurityDCList = (List) query.execute(); + pm.retrieveAll(mSecurityDCList); + LOG.debug("Done retrieving all objects for listPrincipalAllDCGrant: {}", mSecurityDCList); + return Collections.unmodifiableList(new ArrayList<>(mSecurityDCList)); + } + } + + private List listTableAllColumnGrants( + String catName, String dbName, String tableName, String authorizer) { + boolean success = false; + Query query = null; + List mTblColPrivilegeList = new ArrayList<>(); + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + List mPrivs = null; + if (authorizer != null) { + String queryStr = "table.tableName == t1 && table.database.name == t2 &&" + + "table.database.catalogName == t3 && authorizer == t4"; + query = pm.newQuery(MTableColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4"); + mPrivs = (List) query.executeWithArray(tableName, dbName, catName, authorizer); + } else { + String queryStr = "table.tableName == t1 && table.database.name == t2 &&" + + "table.database.catalogName == t3"; + query = pm.newQuery(MTableColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + mPrivs = (List) query.executeWithArray(tableName, dbName, catName); + } + LOG.debug("Query to obtain objects for listTableAllColumnGrants finished"); + pm.retrieveAll(mPrivs); + LOG.debug("RetrieveAll on all the objects for listTableAllColumnGrants finished"); + mTblColPrivilegeList.addAll(mPrivs); + return mTblColPrivilegeList; + } + + @Override + public List listDatabaseGrants(String catName, String dbName, String authorizer) { + LOG.debug("Executing listDatabaseGrants"); + + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + + final Query query; + final String[] args; + + if (authorizer != null) { + query = pm.newQuery(MDBPrivilege.class, "database.name == t1 && database.catalogName == t2 && authorizer == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + args = new String[] { dbName, catName, authorizer }; + } else { + query = pm.newQuery(MDBPrivilege.class, "database.name == t1 && database.catalogName == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + args = new String[] { dbName, catName }; + } + + final List mSecurityDBList = (List) query.executeWithArray(args); + pm.retrieveAll(mSecurityDBList); + LOG.debug("Done retrieving all objects for listDatabaseGrants: {}", mSecurityDBList); + return Collections.unmodifiableList(new ArrayList<>(mSecurityDBList)); + } + + @Override + public List listDataConnectorGrants(String dcName, String authorizer) { + LOG.debug("Executing listDataConnectorGrants"); + + dcName = normalizeIdentifier(dcName); + + final Query query; + String[] args = null; + final List mSecurityDCList; + + if (authorizer != null) { + query = pm.newQuery(MDCPrivilege.class, "dataConnector.name == t1 && authorizer == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + args = new String[] { dcName, authorizer }; + } else { + query = pm.newQuery(MDCPrivilege.class, "dataConnector.name == t1"); + query.declareParameters("java.lang.String t1"); + } + if (args != null) { + mSecurityDCList = (List) query.executeWithArray(args); + } else { + mSecurityDCList = (List) query.execute(dcName); + } + pm.retrieveAll(mSecurityDCList); + LOG.debug("Done retrieving all objects for listDataConnectorGrants: {}", mSecurityDCList); + return Collections.unmodifiableList(new ArrayList<>(mSecurityDCList)); + } + + private List listAllMTableGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName) { + return listAllMTableGrants(principalName, principalType, catName, dbName, tableName, null); + } + + private List listAllMTableGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String authorizer) { + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + Query query = null; + List mSecurityTabPartList = new ArrayList<>(); + LOG.debug("Executing listAllTableGrants"); + List mPrivs; + if (authorizer != null) { + query = pm.newQuery(MTablePrivilege.class, + "principalName == t1 && principalType == t2 && table.tableName == t3 &&" + + "table.database.name == t4 && table.database.catalogName == t5 && authorizer == t6"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3," + + "java.lang.String t4, java.lang.String t5, java.lang.String t6"); + mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), + tableName, dbName, catName, authorizer); + } else { + query = pm.newQuery(MTablePrivilege.class, + "principalName == t1 && principalType == t2 && table.tableName == t3 &&" + + "table.database.name == t4 && table.database.catalogName == t5"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3," + + "java.lang.String t4, java.lang.String t5"); + mPrivs = (List) query.executeWithArray(principalName, principalType.toString(), + tableName, dbName, catName); + } + pm.retrieveAll(mPrivs); + mSecurityTabPartList.addAll(mPrivs); + return mSecurityTabPartList; + } + + @Override + public List listAllTableGrants(String principalName, + PrincipalType principalType, TableName table) { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + List mTbls = + listAllMTableGrants(principalName, principalType, catName, dbName, tableName); + if (mTbls.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mTbls.size(); i++) { + MTablePrivilege sTbl = mTbls.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.TABLE, dbName, tableName, null, null); + objectRef.setCatName(catName); + HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, + sTbl.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sTbl.getPrivilege(), sTbl.getCreateTime(), sTbl + .getGrantor(), PrincipalType.valueOf(sTbl + .getGrantorType()), sTbl.getGrantOption()), sTbl.getAuthorizer()); + result.add(secObj); + } + return result; + } + + private List listPrincipalMPartitionGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String partName) { + return listPrincipalMPartitionGrants(principalName, principalType, catName, dbName, tableName, partName, null); + } + + private List listPrincipalMPartitionGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String partName, String authorizer) { + Query query; + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + List mSecurityTabPartList = new ArrayList<>(); + List mPrivs; + if (authorizer != null) { + query = pm.newQuery(MPartitionPrivilege.class, + "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " + + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" + + "&& partition.partitionName == t6 && authorizer == t7"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4, " + + "java.lang.String t5, java.lang.String t6, java.lang.String t7"); + mPrivs = (List) query.executeWithArray(principalName, + principalType.toString(), tableName, dbName, catName, partName, authorizer); + } else { + query = pm.newQuery(MPartitionPrivilege.class, + "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " + + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" + + "&& partition.partitionName == t6"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4, " + + "java.lang.String t5, java.lang.String t6"); + mPrivs = (List) query.executeWithArray(principalName, + principalType.toString(), tableName, dbName, catName, partName); + } + pm.retrieveAll(mPrivs); + mSecurityTabPartList.addAll(mPrivs); + return mSecurityTabPartList; + } + + @Override + public List listPrincipalPartitionGrants(String principalName, + PrincipalType principalType, + TableName table, + List partValues, + String partName) { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + List mParts = listPrincipalMPartitionGrants(principalName, + principalType, catName, dbName, tableName, partName); + if (mParts.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mParts.size(); i++) { + MPartitionPrivilege sPart = mParts.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.PARTITION, dbName, tableName, partValues, null); + objectRef.setCatName(catName); + HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, + sPart.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sPart.getPrivilege(), sPart + .getCreateTime(), sPart.getGrantor(), PrincipalType + .valueOf(sPart.getGrantorType()), sPart + .getGrantOption()), sPart.getAuthorizer()); + + result.add(secObj); + } + return result; + } + + private List listPrincipalMTableColumnGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String columnName) { + return listPrincipalMTableColumnGrants(principalName, principalType, catName, dbName, tableName, + columnName, null); + } + + private List listPrincipalMTableColumnGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String columnName, String authorizer) { + Query query; + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + columnName = normalizeIdentifier(columnName); + List mSecurityColList = new ArrayList<>(); + List mPrivs; + if (authorizer != null) { + String queryStr = + "principalName == t1 && principalType == t2 && " + + "table.tableName == t3 && table.database.name == t4 && " + + "table.database.catalogName == t5 && columnName == t6 && authorizer == t7"; + query = pm.newQuery(MTableColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4, java.lang.String t5, java.lang.String t6, java.lang.String t7"); + mPrivs = (List) query.executeWithArray(principalName, + principalType.toString(), tableName, dbName, catName, columnName, authorizer); + } else { + String queryStr = + "principalName == t1 && principalType == t2 && " + + "table.tableName == t3 && table.database.name == t4 && " + + "table.database.catalogName == t5 && columnName == t6 "; + query = pm.newQuery(MTableColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4, java.lang.String t5, java.lang.String t6"); + mPrivs = (List) query.executeWithArray(principalName, + principalType.toString(), tableName, dbName, catName, columnName); + } + pm.retrieveAll(mPrivs); + mSecurityColList.addAll(mPrivs); + return mSecurityColList; + } + + @Override + public List listPrincipalTableColumnGrants(String principalName, + PrincipalType principalType, + TableName table, + String columnName) { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + List mTableCols = + listPrincipalMTableColumnGrants(principalName, principalType, catName, dbName, tableName, columnName); + if (mTableCols.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mTableCols.size(); i++) { + MTableColumnPrivilege sCol = mTableCols.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.COLUMN, dbName, tableName, null, sCol.getColumnName()); + objectRef.setCatName(catName); + HiveObjectPrivilege secObj = new HiveObjectPrivilege( + objectRef, sCol.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sCol.getPrivilege(), sCol + .getCreateTime(), sCol.getGrantor(), PrincipalType + .valueOf(sCol.getGrantorType()), sCol + .getGrantOption()), sCol.getAuthorizer()); + result.add(secObj); + } + return result; + } + + private List listPrincipalMPartitionColumnGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String partitionName, String columnName) { + return listPrincipalMPartitionColumnGrants(principalName, principalType, catName, dbName, + tableName, partitionName, columnName, null); + } + + private List listPrincipalMPartitionColumnGrants( + String principalName, PrincipalType principalType, String catName, String dbName, + String tableName, String partitionName, String columnName, String authorizer) { + Query query = null; + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + columnName = normalizeIdentifier(columnName); + catName = normalizeIdentifier(catName); + List mSecurityColList = new ArrayList<>(); + List mPrivs; + if (authorizer != null) { + query = pm.newQuery( + MPartitionColumnPrivilege.class, + "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " + + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" + + " && partition.partitionName == t6 && columnName == t7 && authorizer == t8"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4, java.lang.String t5, java.lang.String t6, java.lang.String t7, " + + "java.lang.String t8"); + mPrivs = (List) query.executeWithArray(principalName, + principalType.toString(), tableName, dbName, catName, partitionName, columnName, authorizer); + } else { + query = pm.newQuery( + MPartitionColumnPrivilege.class, + "principalName == t1 && principalType == t2 && partition.table.tableName == t3 " + + "&& partition.table.database.name == t4 && partition.table.database.catalogName == t5" + + " && partition.partitionName == t6 && columnName == t7"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4, java.lang.String t5, java.lang.String t6, java.lang.String t7"); + mPrivs = (List) query.executeWithArray(principalName, + principalType.toString(), tableName, dbName, catName, partitionName, columnName); + } + pm.retrieveAll(mPrivs); + mSecurityColList.addAll(mPrivs); + return mSecurityColList; + } + + @Override + public List listPrincipalPartitionColumnGrants(String principalName, + PrincipalType principalType, + TableName table, + List partValues, + String partitionName, + String columnName) { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + List mPartitionCols = + listPrincipalMPartitionColumnGrants(principalName, principalType, catName, dbName, tableName, + partitionName, columnName); + if (mPartitionCols.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(); + for (int i = 0; i < mPartitionCols.size(); i++) { + MPartitionColumnPrivilege sCol = mPartitionCols.get(i); + HiveObjectRef objectRef = new HiveObjectRef( + HiveObjectType.COLUMN, dbName, tableName, partValues, sCol.getColumnName()); + objectRef.setCatName(catName); + HiveObjectPrivilege secObj = new HiveObjectPrivilege(objectRef, + sCol.getPrincipalName(), principalType, + new PrivilegeGrantInfo(sCol.getPrivilege(), sCol + .getCreateTime(), sCol.getGrantor(), PrincipalType + .valueOf(sCol.getGrantorType()), sCol.getGrantOption()), sCol.getAuthorizer()); + result.add(secObj); + } + return result; + } + + @Override + public List listPrincipalPartitionColumnGrantsAll( + String principalName, PrincipalType principalType) { + Query query = null; + LOG.debug("Executing listPrincipalPartitionColumnGrantsAll"); + List mSecurityTabPartList; + if (principalName != null && principalType != null) { + query = + pm.newQuery(MPartitionColumnPrivilege.class, + "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mSecurityTabPartList = + (List) query.executeWithArray(principalName, + principalType.toString()); + } else { + query = pm.newQuery(MPartitionColumnPrivilege.class); + mSecurityTabPartList = (List) query.execute(); + } + LOG.debug("Done executing query for listPrincipalPartitionColumnGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertPartCols(mSecurityTabPartList); + return result; + } + + @Override + public List listPartitionColumnGrantsAll( + TableName table, String partitionName, String columnName) { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + LOG.debug("Executing listPartitionColumnGrantsAll"); + Query query = + pm.newQuery(MPartitionColumnPrivilege.class, + "partition.table.tableName == t3 && partition.table.database.name == t4 && " + + "partition.table.database.catalogName == t5 && " + + "partition.partitionName == t6 && columnName == t7"); + query.declareParameters("java.lang.String t3, java.lang.String t4, java.lang.String t5," + + "java.lang.String t6, java.lang.String t7"); + List mSecurityTabPartList = + (List) query.executeWithArray(tableName, dbName, catName, + partitionName, columnName); + LOG.debug("Done executing query for listPartitionColumnGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertPartCols(mSecurityTabPartList); + return result; + } + + private List convertPartCols(List privs) { + List result = new ArrayList<>(); + for (MPartitionColumnPrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + + MPartition mpartition = priv.getPartition(); + MTable mtable = mpartition.getTable(); + MDatabase mdatabase = mtable.getDatabase(); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.COLUMN, + mdatabase.getName(), mtable.getTableName(), mpartition.getValues(), priv.getColumnName()); + objectRef.setCatName(mdatabase.getCatalogName()); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalAllTableGrants(String principalName, PrincipalType principalType) { + LOG.debug("Executing listPrincipalAllTableGrants"); + Query query = pm.newQuery(MTablePrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + final List mSecurityTabPartList = + (List) query.execute(principalName, principalType.toString()); + + pm.retrieveAll(mSecurityTabPartList); + + LOG.debug("Done retrieving all objects for listPrincipalAllTableGrants"); + + return Collections.unmodifiableList(new ArrayList<>(mSecurityTabPartList)); + } + + @Override + public List listPrincipalTableGrantsAll(String principalName, + PrincipalType principalType) { + Query query; + LOG.debug("Executing listPrincipalAllTableGrants"); + List mSecurityTabPartList; + if (principalName != null && principalType != null) { + query = pm.newQuery(MTablePrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mSecurityTabPartList = + (List) query.execute(principalName, principalType.toString()); + } else { + query = pm.newQuery(MTablePrivilege.class); + mSecurityTabPartList = (List) query.execute(); + } + pm.retrieveAll(mSecurityTabPartList); + List result = convertTable(mSecurityTabPartList); + return result; + } + + @Override + public List listTableGrantsAll(TableName table) { + return listTableGrantsAll(table, null); + } + + private List listTableGrantsAll(TableName table, + String authorizer) { + Query query; + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + LOG.debug("Executing listTableGrantsAll"); + List mSecurityTabPartList = null; + if (authorizer != null) { + query = pm.newQuery(MTablePrivilege.class, + "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3" + + " && authorizer == t4"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4"); + mSecurityTabPartList = (List) query.executeWithArray(tableName, dbName, catName, authorizer); + } else { + query = pm.newQuery(MTablePrivilege.class, + "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + mSecurityTabPartList = (List) query.executeWithArray(tableName, dbName, catName); + } + LOG.debug("Done executing query for listTableGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertTable(mSecurityTabPartList); + return result; + } + + private List convertTable(List privs) { + List result = new ArrayList<>(); + for (MTablePrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + + String table = priv.getTable().getTableName(); + String database = priv.getTable().getDatabase().getName(); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.TABLE, database, table, + null, null); + objectRef.setCatName(priv.getTable().getDatabase().getCatalogName()); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalAllPartitionGrants(String principalName, PrincipalType principalType) { + LOG.debug("Executing listPrincipalAllPartitionGrants"); + + Query query = pm.newQuery(MPartitionPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + final List mSecurityTabPartList = + (List) query.execute(principalName, principalType.toString()); + + pm.retrieveAll(mSecurityTabPartList); + LOG.debug("Done retrieving all objects for listPrincipalAllPartitionGrants"); + + return Collections.unmodifiableList(new ArrayList<>(mSecurityTabPartList)); + } + + @Override + public List listPrincipalPartitionGrantsAll(String principalName, + PrincipalType principalType) { + Query query = null; + LOG.debug("Executing listPrincipalPartitionGrantsAll"); + List mSecurityTabPartList; + if (principalName != null && principalType != null) { + query = + pm.newQuery(MPartitionPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mSecurityTabPartList = + (List) query.execute(principalName, principalType.toString()); + } else { + query = pm.newQuery(MPartitionPrivilege.class); + mSecurityTabPartList = (List) query.execute(); + } + LOG.debug("Done executing query for listPrincipalPartitionGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertPartition(mSecurityTabPartList); + return result; + } + + @Override + public List listPartitionGrantsAll(TableName table, String partitionName) { + String tableName = normalizeIdentifier(table.getTable()); + String dbName = normalizeIdentifier(table.getDb()); + String catName = normalizeIdentifier(table.getCat()); + LOG.debug("Executing listPrincipalPartitionGrantsAll"); + Query query = + pm.newQuery(MPartitionPrivilege.class, + "partition.table.tableName == t3 && partition.table.database.name == t4 && " + + "partition.table.database.catalogName == t5 && partition.partitionName == t6"); + query.declareParameters("java.lang.String t3, java.lang.String t4, java.lang.String t5, " + + "java.lang.String t6"); + List mSecurityTabPartList = + (List) query.executeWithArray(tableName, dbName, catName, partitionName); + LOG.debug("Done executing query for listPrincipalPartitionGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertPartition(mSecurityTabPartList); + return result; + } + + private List convertPartition(List privs) { + List result = new ArrayList<>(); + for (MPartitionPrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + + MPartition mpartition = priv.getPartition(); + MTable mtable = mpartition.getTable(); + MDatabase mdatabase = mtable.getDatabase(); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.PARTITION, + mdatabase.getName(), mtable.getTableName(), mpartition.getValues(), null); + objectRef.setCatName(mdatabase.getCatalogName()); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalAllTableColumnGrants(String principalName, + PrincipalType principalType) { + + LOG.debug("Executing listPrincipalAllTableColumnGrants"); + + + Query query = pm.newQuery(MTableColumnPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + final List mSecurityColumnList = + (List) query.execute(principalName, principalType.toString()); + + pm.retrieveAll(mSecurityColumnList); + LOG.debug("Done retrieving all objects for listPrincipalAllTableColumnGrants"); + + return Collections.unmodifiableList(new ArrayList<>(mSecurityColumnList)); + } + + @Override + public List listPrincipalTableColumnGrantsAll(String principalName, + PrincipalType principalType) { + Query query = null; + LOG.debug("Executing listPrincipalTableColumnGrantsAll"); + + List mSecurityTabPartList; + if (principalName != null && principalType != null) { + query = + pm.newQuery(MTableColumnPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + mSecurityTabPartList = + (List) query.execute(principalName, principalType.toString()); + } else { + query = pm.newQuery(MTableColumnPrivilege.class); + mSecurityTabPartList = (List) query.execute(); + } + LOG.debug("Done executing query for listPrincipalTableColumnGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertTableCols(mSecurityTabPartList); + return result; + } + + @Override + public List listTableColumnGrantsAll(TableName table, + String columnName) { + Query query = null; + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + LOG.debug("Executing listPrincipalTableColumnGrantsAll"); + query = + pm.newQuery(MTableColumnPrivilege.class, + "table.tableName == t3 && table.database.name == t4 && " + + "table.database.catalogName == t5 && columnName == t6"); + query.declareParameters("java.lang.String t3, java.lang.String t4, java.lang.String t5, " + + "java.lang.String t6"); + List mSecurityTabPartList = + (List) query.executeWithArray(tableName, dbName, + catName, columnName); + LOG.debug("Done executing query for listPrincipalTableColumnGrantsAll"); + pm.retrieveAll(mSecurityTabPartList); + List result = convertTableCols(mSecurityTabPartList); + return result; + } + + private List convertTableCols(List privs) { + List result = new ArrayList<>(); + for (MTableColumnPrivilege priv : privs) { + String pname = priv.getPrincipalName(); + String authorizer = priv.getAuthorizer(); + PrincipalType ptype = PrincipalType.valueOf(priv.getPrincipalType()); + + MTable mtable = priv.getTable(); + MDatabase mdatabase = mtable.getDatabase(); + + HiveObjectRef objectRef = new HiveObjectRef(HiveObjectType.COLUMN, + mdatabase.getName(), mtable.getTableName(), null, priv.getColumnName()); + objectRef.setCatName(mdatabase.getCatalogName()); + PrivilegeGrantInfo grantor = new PrivilegeGrantInfo(priv.getPrivilege(), priv.getCreateTime(), + priv.getGrantor(), PrincipalType.valueOf(priv.getGrantorType()), priv.getGrantOption()); + + result.add(new HiveObjectPrivilege(objectRef, pname, ptype, grantor, authorizer)); + } + return result; + } + + private List listPrincipalAllPartitionColumnGrants(String principalName, + PrincipalType principalType) { + LOG.debug("Executing listPrincipalAllTableColumnGrants"); + + Query query = pm.newQuery(MPartitionColumnPrivilege.class, "principalName == t1 && principalType == t2"); + query.declareParameters("java.lang.String t1, java.lang.String t2"); + final List mSecurityColumnList = + (List) query.execute(principalName, principalType.toString()); + + pm.retrieveAll(mSecurityColumnList); + LOG.debug("Done retrieving all objects for listPrincipalAllTableColumnGrants"); + + return Collections.unmodifiableList(new ArrayList<>(mSecurityColumnList)); + } + + @Override + public void setBaseStore(RawStore store) { + super.setBaseStore(store); + this.conf = baseStore.getConf(); + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/TableStoreImpl.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/TableStoreImpl.java new file mode 100644 index 000000000000..c03e1b8e0016 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metastore/impl/TableStoreImpl.java @@ -0,0 +1,2966 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.metastore.impl; + +import com.google.common.base.Joiner; + +import javax.jdo.Query; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.common.ValidReaderWriteIdList; +import org.apache.hadoop.hive.common.ValidWriteIdList; +import org.apache.hadoop.hive.metastore.Batchable; +import org.apache.hadoop.hive.metastore.DatabaseProduct; +import org.apache.hadoop.hive.metastore.directsql.MetaStoreDirectSql; +import org.apache.hadoop.hive.metastore.PartFilterExprUtil; +import org.apache.hadoop.hive.metastore.PartitionExpressionProxy; +import org.apache.hadoop.hive.metastore.PartitionProjectionEvaluator; +import org.apache.hadoop.hive.metastore.PersistenceManagerProvider; +import org.apache.hadoop.hive.metastore.QueryWrapper; +import org.apache.hadoop.hive.metastore.RawStore; +import org.apache.hadoop.hive.metastore.TableFields; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.CreationMetadata; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; +import org.apache.hadoop.hive.metastore.api.GetProjectionsSpec; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidPartitionException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.metastore.api.PartitionFilterMode; +import org.apache.hadoop.hive.metastore.api.PartitionValuesResponse; +import org.apache.hadoop.hive.metastore.api.PartitionValuesRow; +import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.client.builder.GetPartitionsArgs; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.metastore.RawStoreAware; +import org.apache.hadoop.hive.metastore.model.FetchGroups; +import org.apache.hadoop.hive.metastore.model.MColumnDescriptor; +import org.apache.hadoop.hive.metastore.model.MConstraint; +import org.apache.hadoop.hive.metastore.model.MCreationMetadata; +import org.apache.hadoop.hive.metastore.model.MMVSource; +import org.apache.hadoop.hive.metastore.model.MPartition; +import org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege; +import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; +import org.apache.hadoop.hive.metastore.model.MPartitionEvent; +import org.apache.hadoop.hive.metastore.model.MPartitionPrivilege; +import org.apache.hadoop.hive.metastore.model.MStorageDescriptor; +import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; +import org.apache.hadoop.hive.metastore.model.MTablePrivilege; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree; +import org.apache.hadoop.hive.metastore.metastore.GetHelper; +import org.apache.hadoop.hive.metastore.metastore.GetListHelper; +import org.apache.hadoop.hive.metastore.metastore.iface.TableStore; +import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.commons.lang3.StringUtils.join; +import static org.apache.hadoop.hive.metastore.Batchable.NO_BATCHING; +import static org.apache.hadoop.hive.metastore.ObjectStore.appendPatternCondition; +import static org.apache.hadoop.hive.metastore.ObjectStore.appendSimpleCondition; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToCreationMetadata; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToFieldSchemas; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToMCreationMetadata; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToMPart; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToMTable; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToPart; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToParts; +import static org.apache.hadoop.hive.metastore.ObjectStore.convertToTable; +import static org.apache.hadoop.hive.metastore.ObjectStore.getJDOFilterStrForPartitionNames; +import static org.apache.hadoop.hive.metastore.ObjectStore.getPartQueryWithParams; +import static org.apache.hadoop.hive.metastore.ObjectStore.makeParameterDeclarationString; +import static org.apache.hadoop.hive.metastore.ObjectStore.putPersistentPrivObjects; +import static org.apache.hadoop.hive.metastore.ObjectStore.verifyStatsChangeCtx; +import static org.apache.hadoop.hive.metastore.metastore.impl.PrivilegeStoreImpl.getPrincipalTypeFromStr; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.newMetaException; +import static org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier; + +@SuppressWarnings("unchecked") +public class TableStoreImpl extends RawStoreAware implements TableStore { + private final static Logger LOG = LoggerFactory.getLogger(TableStoreImpl.class); + private DatabaseProduct dbType; + protected int batchSize = NO_BATCHING; + private boolean areTxnStatsSupported = false; + private PartitionExpressionProxy expressionProxy = null; + private Configuration conf; + + @Override + public void setBaseStore(RawStore store) { + super.setBaseStore(store); + this.dbType = PersistenceManagerProvider.getDatabaseProduct(); + this.batchSize = MetastoreConf.getIntVar(store.getConf(), + MetastoreConf.ConfVars.RAWSTORE_PARTITION_BATCH_SIZE); + this.areTxnStatsSupported = MetastoreConf.getBoolVar(baseStore.getConf(), + MetastoreConf.ConfVars.HIVE_TXN_STATS_ENABLED); + this.expressionProxy = PartFilterExprUtil.createExpressionProxy(store.getConf()); + this.conf = store.getConf(); + } + + @Override + public boolean dropTable(TableName table) + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + boolean materializedView = false; + MTable tbl = getMTable(catName, dbName, tableName); + pm.retrieve(tbl); + if (tbl != null) { + materializedView = TableType.MATERIALIZED_VIEW.toString().equals(tbl.getTableType()); + // first remove all the grants + List tabGrants = listAllTableGrants(catName, dbName, tableName); + if (CollectionUtils.isNotEmpty(tabGrants)) { + pm.deletePersistentAll(tabGrants); + } + List tblColGrants = listTableAllColumnGrants(catName, dbName, + tableName, null); + if (CollectionUtils.isNotEmpty(tblColGrants)) { + pm.deletePersistentAll(tblColGrants); + } + + List partGrants = listTableAllPartitionGrants(catName, dbName, tableName); + if (CollectionUtils.isNotEmpty(partGrants)) { + pm.deletePersistentAll(partGrants); + } + + List partColGrants = listTableAllPartitionColumnGrants(catName, dbName, + tableName); + if (CollectionUtils.isNotEmpty(partColGrants)) { + pm.deletePersistentAll(partColGrants); + } + + // delete column statistics if present + baseStore.deleteTableColumnStatistics(catName, dbName, tableName, null, null); + + List tabConstraints = listAllTableConstraintsWithOptionalConstraintName( + catName, dbName, tableName, null); + if (CollectionUtils.isNotEmpty(tabConstraints)) { + pm.deletePersistentAll(tabConstraints); + } + + preDropStorageDescriptor(tbl.getSd()); + + if (materializedView) { + dropCreationMetadata(tbl.getDatabase().getCatalogName(), + tbl.getDatabase().getName(), tbl.getTableName()); + } + + // then remove the table + pm.deletePersistentAll(tbl); + } + return true; + } + + private List listAllTableConstraintsWithOptionalConstraintName( + String catName, String dbName, String tableName, String constraintname) { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + constraintname = constraintname!=null?normalizeIdentifier(constraintname):null; + List mConstraints = null; + List constraintNames = new ArrayList<>(); + + Query queryForConstraintName = pm.newQuery("select constraintName from org.apache.hadoop.hive.metastore.model.MConstraint where " + + "((parentTable.tableName == ptblname && parentTable.database.name == pdbname && " + + "parentTable.database.catalogName == pcatname) || " + + "(childTable != null && childTable.tableName == ctblname &&" + + "childTable.database.name == cdbname && childTable.database.catalogName == ccatname)) " + + (constraintname != null ? " && constraintName == constraintname" : "")); + Query queryForMConstraint = pm.newQuery(MConstraint.class); + queryForConstraintName.declareParameters("java.lang.String ptblname, java.lang.String pdbname," + + "java.lang.String pcatname, java.lang.String ctblname, java.lang.String cdbname," + + "java.lang.String ccatname" + + (constraintname != null ? ", java.lang.String constraintname" : "")); + Collection constraintNamesColl = + constraintname != null ? + ((Collection) queryForConstraintName. + executeWithArray(tableName, dbName, catName, tableName, dbName, catName, constraintname)): + ((Collection) queryForConstraintName. + executeWithArray(tableName, dbName, catName, tableName, dbName, catName)); + for (Iterator i = constraintNamesColl.iterator(); i.hasNext();) { + String currName = (String) i.next(); + constraintNames.add(currName); + } + + queryForMConstraint.setFilter("param.contains(constraintName)"); + queryForMConstraint.declareParameters("java.util.Collection param"); + Collection constraints = (Collection)queryForMConstraint.execute(constraintNames); + mConstraints = new ArrayList<>(); + for (Iterator i = constraints.iterator(); i.hasNext();) { + MConstraint currConstraint = (MConstraint) i.next(); + mConstraints.add(currConstraint); + } + return mConstraints; + } + + private List listTableAllPartitionColumnGrants( + String catName, String dbName, String tableName) { + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + List mSecurityColList = new ArrayList<>(); + LOG.debug("Executing listTableAllPartitionColumnGrants"); + + String queryStr = "partition.table.tableName == t1 && partition.table.database.name == t2 " + + "&& partition.table.database.catalogName == t3"; + Query query = pm.newQuery(MPartitionColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + List mPrivs = + (List) query.executeWithArray(tableName, dbName, catName); + pm.retrieveAll(mPrivs); + mSecurityColList.addAll(mPrivs); + + LOG.debug("Done retrieving all objects for listTableAllPartitionColumnGrants"); + return mSecurityColList; + } + + + private List listTableAllPartitionGrants(String catName, String dbName, String tableName) { + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + List mSecurityTabPartList = new ArrayList<>(); + LOG.debug("Executing listTableAllPartitionGrants"); + + String queryStr = "partition.table.tableName == t1 && partition.table.database.name == t2 " + + "&& partition.table.database.catalogName == t3"; + Query query = pm.newQuery(MPartitionPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + List mPrivs = + (List) query.executeWithArray(tableName, dbName, catName); + pm.retrieveAll(mPrivs); + mSecurityTabPartList.addAll(mPrivs); + + LOG.debug("Done retrieving all objects for listTableAllPartitionGrants"); + return mSecurityTabPartList; + } + + private void dropCreationMetadata(String catName, String dbName, String tableName) { + MCreationMetadata mcm = getCreationMetadata(catName, dbName, tableName); + pm.retrieve(mcm); + if (mcm != null) { + pm.deletePersistentAll(mcm); + } + } + + /** + * Called right before an action that would drop a storage descriptor. + * This function makes the SD's reference to a CD null, and then deletes the CD + * if it no longer is referenced in the table. + * @param msd the storage descriptor to drop + */ + private void preDropStorageDescriptor(MStorageDescriptor msd) { + if (msd == null || msd.getCD() == null) { + return; + } + + MColumnDescriptor mcd = msd.getCD(); + // Because there is a 1-N relationship between CDs and SDs, + // we must set the SD's CD to null first before dropping the storage descriptor + // to satisfy foreign key constraints. + msd.setCD(null); + removeUnusedColumnDescriptor(mcd); + } + + + @Override + public List dropAllPartitionsAndGetLocations(TableName table, String baseLocationToNotShow, + AtomicReference message) + throws MetaException, InvalidInputException, NoSuchObjectException, InvalidObjectException { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tblName = normalizeIdentifier(table.getTable()); + return new GetHelper>(this, new TableName(catName, dbName, tblName)) { + @Override + protected String describeResult() { + return "delete all partitions from " + table; + } + + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql() + .dropAllPartitionsAndGetLocations(getTable().getId(), baseLocationToNotShow, message); + } + + @Override + protected List getJdoResult() + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { + Map partitionLocations = + getPartitionLocations(table, baseLocationToNotShow, -1); + dropPartitionsViaJdo(catName, dbName, tblName, new ArrayList<>(partitionLocations.keySet()), message); + return partitionLocations.values().stream().filter(Objects::nonNull).toList(); + } + }.run(true); + } + + @Override + public Map getPartitionLocations(TableName tableName, + String baseLocationToNotShow, int max) { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + + Map partLocations = new HashMap<>(); + LOG.debug("Executing getPartitionLocations"); + + Query query = pm.newQuery(MPartition.class); + query.setFilter( + "this.table.database.catalogName == t1 && this.table.database.name == t2 " + + "&& this.table.tableName == t3"); + query.declareParameters("String t1, String t2, String t3"); + query.setResult("this.partitionName, this.sd.location"); + if (max >= 0) { + //Row limit specified, set it on the Query + query.setRange(0, max); + } + + List result = (List) query.execute(catName, dbName, tblName); + for (Object[] row : result) { + String location = (String) row[1]; + if (baseLocationToNotShow != null && location != null && + FileUtils.isSubdirectory(baseLocationToNotShow, location)) { + location = null; + } + partLocations.put((String) row[0], location); + } + LOG.debug("Done executing query for getPartitionLocations"); + return partLocations; + } + + private void dropPartitionsViaJdo(String catName, String dbName, String tblName, List partNames, + AtomicReference message) throws MetaException { + if (partNames.isEmpty()) { + return; + } + int batch = batchSize == NO_BATCHING ? 1 : (partNames.size() + batchSize) / batchSize; + AtomicLong batchIdx = new AtomicLong(1); + AtomicLong timeSpent = new AtomicLong(0); + Batchable.runBatched(batchSize, partNames, new Batchable() { + @Override + public List run(List input) throws MetaException { + StringBuilder progress = new StringBuilder("Dropping partitions, batch: "); + long start = System.currentTimeMillis(); + progress.append(batchIdx.get()).append("/").append(batch); + if (batchIdx.get() > 1) { + long leftTime = (batch - batchIdx.get()) * timeSpent.get() / batchIdx.get(); + progress.append(", time left: ").append(leftTime).append("ms"); + } + message.set(progress.toString()); + // Delete all things. + dropPartitionGrantsNoTxn(catName, dbName, tblName, input); + dropPartitionAllColumnGrantsNoTxn(catName, dbName, tblName, input); + dropPartitionColumnStatisticsNoTxn(catName, dbName, tblName, input); + + // CDs are reused; go try partition SDs, detach all CDs from SDs, then remove unused CDs. + for (MColumnDescriptor mcd : detachCdsFromSdsNoTxn(catName, dbName, tblName, input)) { + removeUnusedColumnDescriptor(mcd); + } + dropPartitionsNoTxn(catName, dbName, tblName, input); + timeSpent.addAndGet(System.currentTimeMillis() - start); + batchIdx.incrementAndGet(); + return Collections.emptyList(); + } + }); + } + + /** + * Checks if a column descriptor has any remaining references by storage descriptors + * in the db. If it does not, then delete the CD. If it does, then do nothing. + * + * @param oldCD the column descriptor to delete if it is no longer referenced anywhere + */ + private void removeUnusedColumnDescriptor(MColumnDescriptor oldCD) { + if (oldCD == null) { + return; + } + LOG.debug("execute removeUnusedColumnDescriptor"); + if (!hasRemainingCDReference(oldCD)) { + // First remove any constraints that may be associated with this CD + Query query = pm.newQuery(MConstraint.class, "parentColumn == inCD || childColumn == inCD"); + query.declareParameters("MColumnDescriptor inCD"); + List mConstraintsList = (List) query.execute(oldCD); + if (CollectionUtils.isNotEmpty(mConstraintsList)) { + pm.deletePersistentAll(mConstraintsList); + } + // Finally remove CD + pm.retrieve(oldCD); + pm.deletePersistent(oldCD); + LOG.debug("successfully deleted a CD in removeUnusedColumnDescriptor"); + } + } + + /** + * Checks if a column descriptor has any remaining references by storage descriptors + * in the db. + * + * @param oldCD the column descriptor to check if it has references or not + * @return true if has references + */ + private boolean hasRemainingCDReference(MColumnDescriptor oldCD) { + assert oldCD != null; + Query query; + /** + * In order to workaround oracle not supporting limit statement caused performance issue, HIVE-9447 makes + * all the backend DB run select count(1) from SDS where SDS.CD_ID=? to check if the specific CD_ID is + * referenced in SDS table before drop a partition. This select count(1) statement does not scale well in + * Postgres, and there is no index for CD_ID column in SDS table. + * For a SDS table with with 1.5 million rows, select count(1) has average 700ms without index, while in + * 10-20ms with index. But the statement before + * HIVE-9447( SELECT * FROM "SDS" "A0" WHERE "A0"."CD_ID" = $1 limit 1) uses less than 10ms . + */ + // HIVE-21075: Fix Postgres performance regression caused by HIVE-9447 + LOG.debug("The dbType is {} ", dbType.getHiveSchemaPostfix()); + if (dbType.isPOSTGRES() || dbType.isMYSQL()) { + query = pm.newQuery(MStorageDescriptor.class, "this.cd == inCD"); + query.declareParameters("MColumnDescriptor inCD"); + List referencedSDs = null; + LOG.debug("Executing listStorageDescriptorsWithCD"); + // User specified a row limit, set it on the Query + query.setRange(0L, 1L); + referencedSDs = (List) query.execute(oldCD); + LOG.debug("Done executing query for listStorageDescriptorsWithCD"); + pm.retrieveAll(referencedSDs); + LOG.debug("Done retrieving all objects for listStorageDescriptorsWithCD"); + //if no other SD references this CD, we can throw it out. + return referencedSDs != null && !referencedSDs.isEmpty(); + } else { + query = pm.newQuery( + "select count(1) from org.apache.hadoop.hive.metastore.model.MStorageDescriptor where (this.cd == inCD)"); + query.declareParameters("MColumnDescriptor inCD"); + long count = (Long) query.execute(oldCD); + //if no other SD references this CD, we can throw it out. + return count != 0; + } + } + + /** + * Detaches column descriptors from storage descriptors; returns the set of unique CDs + * thus detached. This is done before dropping partitions because CDs are reused between + * SDs; so, we remove the links to delete SDs and then check the returned CDs to see if + * they are referenced by other SDs. + */ + private Set detachCdsFromSdsNoTxn(String catName, String dbName, String tblName, + List partNames) { + Pair> queryWithParams = getPartQueryWithParams(pm, catName, dbName, tblName, partNames); + Query query = queryWithParams.getLeft(); + query.setClass(MPartition.class); + query.setResult("sd"); + List sds = + (List) query.executeWithMap(queryWithParams.getRight()); + HashSet candidateCds = new HashSet<>(); + for (MStorageDescriptor sd : sds) { + if (sd != null && sd.getCD() != null) { + candidateCds.add(sd.getCD()); + sd.setCD(null); + } + } + return candidateCds; + } + + private void dropPartitionsNoTxn(String catName, String dbName, String tblName, List partNames) { + Pair> queryWithParams = getPartQueryWithParams(pm, catName, dbName, tblName, partNames); + Query query = queryWithParams.getLeft(); + query.setClass(MPartition.class); + long deleted = query.deletePersistentAll(queryWithParams.getRight()); + LOG.debug("Deleted {} partition from store", deleted); + } + + private void dropPartitionGrantsNoTxn(String catName, String dbName, String tableName, List partNames) { + Pair queryWithParams = makeQueryByPartitionNames(catName, dbName, tableName, partNames, + MPartitionPrivilege.class, "partition.table.tableName", "partition.table.database.name", + "partition.partitionName", "partition.table.database.catalogName"); + Query query = queryWithParams.getLeft(); + query.deletePersistentAll(queryWithParams.getRight()); + } + + private Pair makeQueryByPartitionNames(String catName, String dbName, String tableName, + List partNames, Class clazz, String tbCol, String dbCol, String partCol, String catCol) { + StringBuilder queryStr = new StringBuilder(tbCol + " == t1 && " + dbCol + " == t2 && " + catCol + " == t3"); + StringBuilder paramStr = new StringBuilder("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + Object[] params = new Object[3 + partNames.size()]; + params[0] = normalizeIdentifier(tableName); + params[1] = normalizeIdentifier(dbName); + params[2] = normalizeIdentifier(catName); + int index = 0; + for (String partName : partNames) { + params[index + 3] = partName; + queryStr.append(((index == 0) ? " && (" : " || ") + partCol + " == p" + index); + paramStr.append(", java.lang.String p" + index); + ++index; + } + queryStr.append(")"); + Query query = pm.newQuery(clazz, queryStr.toString()); + query.declareParameters(paramStr.toString()); + return Pair.of(query, params); + } + + private void dropPartitionAllColumnGrantsNoTxn(String catName, String dbName, String tableName, + List partNames) { + Pair queryWithParams = makeQueryByPartitionNames(catName, dbName, tableName, partNames, + MPartitionColumnPrivilege.class, "partition.table.tableName", "partition.table.database.name", + "partition.partitionName", "partition.table.database.catalogName"); + Query query = queryWithParams.getLeft(); + query.deletePersistentAll(queryWithParams.getRight()); + } + + private void dropPartitionColumnStatisticsNoTxn(String catName, String dbName, String tableName, + List partNames) { + Pair queryWithParams = makeQueryByPartitionNames(catName, dbName, tableName, partNames, + MPartitionColumnStatistics.class, "partition.table.tableName", "partition.table.database.name", + "partition.partitionName", "partition.table.database.catalogName"); + Query query = queryWithParams.getLeft(); + query.deletePersistentAll(queryWithParams.getRight()); + } + + class AttachedMTableInfo { + MTable mtbl; + MColumnDescriptor mcd; + + public AttachedMTableInfo() {} + + public AttachedMTableInfo(MTable mtbl, MColumnDescriptor mcd) { + this.mtbl = mtbl; + this.mcd = mcd; + } + } + + private MTable getMTable(String catName, String db, String table) { + AttachedMTableInfo nmtbl = getMTable(catName, db, table, false); + return nmtbl.mtbl; + } + + private AttachedMTableInfo getMTable(String catName, String db, String table, + boolean retrieveCD) { + AttachedMTableInfo nmtbl = new AttachedMTableInfo(); + catName = normalizeIdentifier(Optional.ofNullable(catName).orElse(getDefaultCatalog(baseStore.getConf()))); + db = normalizeIdentifier(db); + table = normalizeIdentifier(table); + Query query = pm.newQuery(MTable.class, + "tableName == table && database.name == db && database.catalogName == catname"); + query.declareParameters( + "java.lang.String table, java.lang.String db, java.lang.String catname"); + query.setUnique(true); + if (LOG.isDebugEnabled()) { + LOG.debug("Executing getMTable for {}", + TableName.getQualified(catName, db, table)); + } + MTable mtbl = (MTable) query.execute(table, db, catName); + pm.retrieve(mtbl); + // Retrieving CD can be expensive and unnecessary, so do it only when required. + if (mtbl != null && retrieveCD) { + pm.retrieve(mtbl.getSd()); + pm.retrieveAll(mtbl.getSd().getCD()); + nmtbl.mcd = mtbl.getSd().getCD(); + } + nmtbl.mtbl = mtbl; + return nmtbl; + } + + @Override + public Table getTable(TableName table, String writeIdList, long tableId) + throws MetaException { + Table tbl; + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + MTable mtable = getMTable(catName, dbName, tableName); + tbl = convertToTable(mtable, conf); + // Retrieve creation metadata if needed + if (tbl != null && TableType.MATERIALIZED_VIEW.toString().equals(tbl.getTableType())) { + tbl.setCreationMetadata( + convertToCreationMetadata(getCreationMetadata(catName, dbName, tableName), baseStore)); + } + + // If transactional non partitioned table, + // check whether the current version table statistics + // in the metastore comply with the client query's snapshot isolation. + // Note: a partitioned table has table stats and table snapshot in MPartiiton. + if (writeIdList != null) { + boolean isTxn = TxnUtils.isTransactionalTable(tbl); + if (isTxn && !areTxnStatsSupported) { + StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from Table's parameters."); + } else if (isTxn && tbl.getPartitionKeysSize() == 0) { + if (isCurrentStatsValidForTheQuery(mtable.getParameters(), + mtable.getWriteId(), writeIdList, false)) { + tbl.setIsStatsCompliant(true); + } else { + tbl.setIsStatsCompliant(false); + // Do not make persistent the following state since it is the query specific (not global). + StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from Table's parameters."); + } + } + } + return tbl; + } + + private MCreationMetadata getCreationMetadata(String catName, String dbName, String tblName) { + Query query = pm.newQuery( + MCreationMetadata.class, "tblName == table && dbName == db && catalogName == cat"); + query.declareParameters("java.lang.String table, java.lang.String db, java.lang.String cat"); + query.setUnique(true); + MCreationMetadata mcm = (MCreationMetadata) query.execute(tblName, dbName, catName); + pm.retrieve(mcm); + return mcm; + } + + // TODO: move to somewhere else + public static boolean isCurrentStatsValidForTheQuery( + Map statsParams, long statsWriteId, String queryValidWriteIdList, + boolean isCompleteStatsWriter) throws MetaException { + + // Note: can be changed to debug/info to verify the calls. + LOG.debug("isCurrentStatsValidForTheQuery with stats write ID {}; query {}; writer: {} params {}", + statsWriteId, queryValidWriteIdList, isCompleteStatsWriter, statsParams); + // return true since the stats does not seem to be transactional. + if (statsWriteId < 1) { + return true; + } + // This COLUMN_STATS_ACCURATE(CSA) state checking also includes the case that the stats is + // written by an aborted transaction but TXNS has no entry for the transaction + // after compaction. Don't check for a complete stats writer - it may replace invalid stats. + if (!isCompleteStatsWriter && !StatsSetupConst.areBasicStatsUptoDate(statsParams)) { + return false; + } + + if (queryValidWriteIdList != null) { // Can be null when stats are being reset to invalid. + ValidWriteIdList list4TheQuery = ValidReaderWriteIdList.fromValue(queryValidWriteIdList); + // Just check if the write ID is valid. If it's valid (i.e. we are allowed to see it), + // that means it cannot possibly be a concurrent write. If it's not valid (we are not + // allowed to see it), that means it's either concurrent or aborted, same thing for us. + if (list4TheQuery.isWriteIdValid(statsWriteId)) { + return true; + } + // Updater is also allowed to overwrite stats from aborted txns, as long as they are not concurrent. + if (isCompleteStatsWriter && list4TheQuery.isWriteIdAborted(statsWriteId)) { + return true; + } + } + + return false; + } + + @Override + public boolean addPartitions(TableName tableName, List parts) throws InvalidObjectException, MetaException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + List tabGrants = null; + List tabColumnGrants = null; + MTable table = this.getMTable(catName, dbName, tblName); + if (table == null) { + throw new InvalidObjectException("Unable to add partitions because " + + tableName + " does not exist"); + } + if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { + tabGrants = listAllTableGrants(catName, dbName, tblName); + tabColumnGrants = listTableAllColumnGrants(catName, dbName, tblName, null); + } + List mParts = new ArrayList<>(); + List> mPartPrivilegesList = new ArrayList<>(); + List> mPartColPrivilegesList = new ArrayList<>(); + for (Partition part : parts) { + if (!part.getTableName().equalsIgnoreCase(tblName) || !part.getDbName().equalsIgnoreCase(dbName)) { + throw new MetaException("Partition does not belong to target table " + + dbName + "." + tblName + ": " + part); + } + MPartition mpart = convertToMPart(part, table); + mParts.add(mpart); + int now = (int) (System.currentTimeMillis() / 1000); + List mPartPrivileges = new ArrayList<>(); + if (tabGrants != null) { + for (MTablePrivilege tab: tabGrants) { + MPartitionPrivilege mPartPrivilege = new MPartitionPrivilege(tab.getPrincipalName(), tab.getPrincipalType(), + mpart, tab.getPrivilege(), now, tab.getGrantor(), tab.getGrantorType(), tab.getGrantOption(), + tab.getAuthorizer()); + mPartPrivileges.add(mPartPrivilege); + } + } + + List mPartColumnPrivileges = new ArrayList<>(); + if (tabColumnGrants != null) { + for (MTableColumnPrivilege col : tabColumnGrants) { + MPartitionColumnPrivilege mPartColumnPrivilege = new MPartitionColumnPrivilege(col.getPrincipalName(), + col.getPrincipalType(), mpart, col.getColumnName(), col.getPrivilege(), now, col.getGrantor(), + col.getGrantorType(), col.getGrantOption(), col.getAuthorizer()); + mPartColumnPrivileges.add(mPartColumnPrivilege); + } + } + mPartPrivilegesList.add(mPartPrivileges); + mPartColPrivilegesList.add(mPartColumnPrivileges); + } + if (CollectionUtils.isNotEmpty(mParts)) { + GetHelper helper = new GetHelper<>(this, tableName) { + @Override + protected Void getSqlResult() throws MetaException { + getDirectSql().addPartitions(mParts, mPartPrivilegesList, mPartColPrivilegesList); + return null; + } + + @Override + protected Void getJdoResult() { + List toPersist = new ArrayList<>(mParts); + mPartPrivilegesList.forEach(toPersist::addAll); + mPartColPrivilegesList.forEach(toPersist::addAll); + pm.makePersistentAll(toPersist); + pm.flush(); + return null; + } + + @Override + protected String describeResult() { + return "add partitions"; + } + }; + try { + helper.run(false); + } catch (NoSuchObjectException e) { + throw newMetaException(e); + } + } + return true; + } + + private List listAllTableGrants(String catName, String dbName, String tableName) { + List mSecurityTabList = new ArrayList<>(); + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + LOG.debug("Executing listAllTableGrants"); + + String queryStr = "table.tableName == t1 && table.database.name == t2" + + "&& table.database.catalogName == t3"; + Query query = pm.newQuery(MTablePrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + List mPrivs = + (List) query.executeWithArray(tableName, dbName, catName); + LOG.debug("Done executing query for listAllTableGrants"); + pm.retrieveAll(mPrivs); + mSecurityTabList.addAll(mPrivs); + LOG.debug("Done retrieving all objects for listAllTableGrants"); + return mSecurityTabList; + } + + private List listTableAllColumnGrants( + String catName, String dbName, String tableName, String authorizer) { + Query query; + List mTblColPrivilegeList = new ArrayList<>(); + tableName = normalizeIdentifier(tableName); + dbName = normalizeIdentifier(dbName); + catName = normalizeIdentifier(catName); + LOG.debug("Executing listTableAllColumnGrants"); + List mPrivs = null; + if (authorizer != null) { + String queryStr = "table.tableName == t1 && table.database.name == t2 &&" + + "table.database.catalogName == t3 && authorizer == t4"; + query = pm.newQuery(MTableColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4"); + mPrivs = (List) query.executeWithArray(tableName, dbName, catName, authorizer); + } else { + String queryStr = "table.tableName == t1 && table.database.name == t2 &&" + + "table.database.catalogName == t3"; + query = pm.newQuery(MTableColumnPrivilege.class, queryStr); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + mPrivs = (List) query.executeWithArray(tableName, dbName, catName); + } + LOG.debug("Query to obtain objects for listTableAllColumnGrants finished"); + pm.retrieveAll(mPrivs); + LOG.debug("RetrieveAll on all the objects for listTableAllColumnGrants finished"); + mTblColPrivilegeList.addAll(mPrivs); + LOG.debug("Done retrieving " + mPrivs.size() + " objects for listTableAllColumnGrants"); + return mTblColPrivilegeList; + } + + @Override + public Partition getPartition(TableName tabName, List part_vals, String validWriteIds) + throws MetaException, NoSuchObjectException { + String catName = normalizeIdentifier(tabName.getCat()); + String dbName = normalizeIdentifier(tabName.getDb()); + String tableName = normalizeIdentifier(tabName.getTable()); + Partition part = null; + MTable table = this.getMTable(catName, dbName, tableName); + if (table == null) { + throw new NoSuchObjectException("Unable to get partition because " + + TableName.getQualified(catName, dbName, tableName) + + " does not exist"); + } + MPartition mpart = getMPartition(catName, dbName, tableName, part_vals, table); + part = convertToPart(catName, dbName, tableName, mpart, + TxnUtils.isAcidTable(table.getParameters()), conf); + if (part == null) { + throw new NoSuchObjectException("partition values=" + + part_vals.toString()); + } + + part.setValues(part_vals); + // If transactional table partition, check whether the current version partition + // statistics in the metastore comply with the client query's snapshot isolation. + long statsWriteId = mpart.getWriteId(); + if (TxnUtils.isTransactionalTable(table.getParameters())) { + if (!areTxnStatsSupported) { + // Do not make persistent the following state since it is query specific (not global). + StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); + } else if (validWriteIds != null) { + if (isCurrentStatsValidForTheQuery(part.getParameters(), statsWriteId, validWriteIds, false)) { + part.setIsStatsCompliant(true); + } else { + part.setIsStatsCompliant(false); + // Do not make persistent the following state since it is query specific (not global). + StatsSetupConst.setBasicStatsState(part.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from Partition object's parameters."); + } + } + } + return part; + } + + /** + * Getting MPartition object. Use this method only if the partition name is not available, + * since then the table will be queried to get the partition keys. + * @param catName The catalogue + * @param dbName The database + * @param tableName The table + * @param part_vals The values defining the partition + * @return The MPartition object in the backend database + */ + private MPartition getMPartition(String catName, String dbName, String tableName, List part_vals, MTable mtbl) + throws MetaException { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + if (mtbl == null) { + mtbl = getMTable(catName, dbName, tableName); + if (mtbl == null) { + return null; + } + } + // Change the query to use part_vals instead of the name which is + // redundant TODO: callers of this often get part_vals out of name for no reason... + String name = + Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), part_vals); + MPartition result = getMPartition(catName, dbName, tableName, name); + return result; + } + + @Override + public List getPartitions(TableName table, GetPartitionsArgs args) + throws MetaException, NoSuchObjectException { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tblName = normalizeIdentifier(table.getTable()); + return new GetListHelper(this, table) { + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getPartitions(catName, dbName, tblName, args); + } + @Override + protected List getJdoResult() throws MetaException { + try { + return convertToParts(catName, dbName, tblName, + listMPartitions(catName, dbName, tblName, args.getMax()), false, conf, args); + } catch (Exception e) { + LOG.error("Failed to convert to parts", e); + throw new MetaException(e.getMessage()); + } + } + }.run(false); + } + + private List listMPartitions(String catName, String dbName, String tableName, int max) { + LOG.debug("Executing listMPartitions"); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + + Query query = pm.newQuery(MPartition.class, + "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + query.setOrdering("partitionName ascending"); + if (max >= 0) { + query.setRange(0, max); + } + final List mparts = (List) query.execute(tableName, dbName, catName); + LOG.debug("Done executing query for listMPartitions"); + + pm.retrieveAll(mparts); + pm.makeTransientAll(mparts); + + LOG.debug("Done retrieving all objects for listMPartitions {}", mparts); + + return Collections.unmodifiableList(new ArrayList<>(mparts)); + } + + @Override + public Table alterTable(TableName tableName, Table newTable, String queryValidWriteIds) + throws InvalidObjectException, MetaException { + String name = normalizeIdentifier(tableName.getTable()); + String dbname = normalizeIdentifier(tableName.getDb()); + String catName = normalizeIdentifier(tableName.getCat()); + MTable newt = convertToMTable(newTable, baseStore); + if (newt == null) { + throw new InvalidObjectException("new table is invalid"); + } + + MTable oldt = getMTable(catName, dbname, name); + if (oldt == null) { + throw new MetaException("table " + dbname + "." + name + " doesn't exist"); + } + + // For now only alter name, owner, parameters, cols, bucketcols are allowed + oldt.setDatabase(newt.getDatabase()); + oldt.setTableName(normalizeIdentifier(newt.getTableName())); + boolean isTxn = TxnUtils.isTransactionalTable(newTable); + boolean isToTxn = isTxn && !TxnUtils.isTransactionalTable(oldt.getParameters()); + if (!isToTxn && isTxn && areTxnStatsSupported) { + // Transactional table is altered without a txn. Make sure there are no changes to the flag. + String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(name, dbname), oldt.getParameters(), + newTable.getParameters(), newTable.getWriteId(), queryValidWriteIds, false); + if (errorMsg != null) { + throw new MetaException(errorMsg); + } + } + oldt.setParameters(newt.getParameters()); + oldt.setOwner(newt.getOwner()); + oldt.setOwnerType(newt.getOwnerType()); + // Fully copy over the contents of the new SD into the old SD, + // so we don't create an extra SD in the metastore db that has no references. + MColumnDescriptor oldCD = null; + MStorageDescriptor oldSD = oldt.getSd(); + if (oldSD != null) { + oldCD = oldSD.getCD(); + } + copyMSD(newt.getSd(), oldt.getSd()); + removeUnusedColumnDescriptor(oldCD); + oldt.setRetention(newt.getRetention()); + oldt.setPartitionKeys(newt.getPartitionKeys()); + oldt.setTableType(newt.getTableType()); + oldt.setLastAccessTime(newt.getLastAccessTime()); + oldt.setViewOriginalText(newt.getViewOriginalText()); + oldt.setViewExpandedText(newt.getViewExpandedText()); + oldt.setRewriteEnabled(newt.isRewriteEnabled()); + + // If transactional, update the stats state for the current Stats updater query. + // Set stats invalid for ACID conversion; it doesn't pass in the write ID. + if (isTxn) { + if (!areTxnStatsSupported || isToTxn) { + StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); + } else if (queryValidWriteIds != null && newTable.getWriteId() > 0) { + // Check concurrent INSERT case and set false to the flag. + if (!isCurrentStatsValidForTheQuery(oldt.getParameters(), oldt.getWriteId(), queryValidWriteIds, true)) { + StatsSetupConst.setBasicStatsState(oldt.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + + dbname + "." + name + ". will be made persistent."); + } + assert newTable.getWriteId() > 0; + oldt.setWriteId(newTable.getWriteId()); + } + } + newTable = convertToTable(oldt, conf); + return newTable; + } + + + private void copyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) { + oldSd.setLocation(newSd.getLocation()); + // If the columns of the old column descriptor != the columns of the new one, + // then change the old storage descriptor's column descriptor. + // Convert the MFieldSchema's to their thrift object counterparts, because we maintain + // datastore identity (i.e., identity of the model objects are managed by JDO, + // not the application). + List oldCols = oldSd.getCD() != null && oldSd.getCD().getCols() != null ? + convertToFieldSchemas(oldSd.getCD().getCols()) : null; + List newCols = newSd.getCD() != null && newSd.getCD().getCols() != null ? + convertToFieldSchemas(newSd.getCD().getCols()) : null; + if (oldCols == null || !oldCols.equals(newCols)) { + // First replace any constraints that may be associated with this CD + // Create mapping from old col indexes to new col indexes + if (oldCols != null && newCols != null) { + Map mapping = new HashMap<>(); + for (int i = 0; i < oldCols.size(); i++) { + FieldSchema oldCol = oldCols.get(i); + //TODO: replace for loop with list.indexOf() + for (int j = 0; j < newCols.size(); j++) { + FieldSchema newCol = newCols.get(j); + if (oldCol.equals(newCol)) { + mapping.put(i, j); + break; + } + } + } + // If we find it, we will change the reference for the CD. + // If we do not find it, i.e., the column will be deleted, we do not change it + // and we let the logic in removeUnusedColumnDescriptor take care of it + try (QueryWrapper query = new QueryWrapper(pm.newQuery(MConstraint.class, "parentColumn == inCD || childColumn == inCD"))) { + query.declareParameters("MColumnDescriptor inCD"); + List mConstraintsList = (List) query.execute(oldSd.getCD()); + pm.retrieveAll(mConstraintsList); + for (MConstraint mConstraint : mConstraintsList) { + if (oldSd.getCD().equals(mConstraint.getParentColumn())) { + Integer newIdx = mapping.get(mConstraint.getParentIntegerIndex()); + if (newIdx != null) { + mConstraint.setParentColumn(newSd.getCD()); + mConstraint.setParentIntegerIndex(newIdx); + } + } + if (oldSd.getCD().equals(mConstraint.getChildColumn())) { + Integer newIdx = mapping.get(mConstraint.getChildIntegerIndex()); + if (newIdx != null) { + mConstraint.setChildColumn(newSd.getCD()); + mConstraint.setChildIntegerIndex(newIdx); + } + } + } + pm.makePersistentAll(mConstraintsList); + } + // Finally replace CD + oldSd.setCD(newSd.getCD()); + } + } + + oldSd.setBucketCols(newSd.getBucketCols()); + oldSd.setIsCompressed(newSd.isCompressed()); + oldSd.setInputFormat(newSd.getInputFormat()); + oldSd.setOutputFormat(newSd.getOutputFormat()); + oldSd.setNumBuckets(newSd.getNumBuckets()); + oldSd.getSerDeInfo().setName(newSd.getSerDeInfo().getName()); + oldSd.getSerDeInfo().setSerializationLib( + newSd.getSerDeInfo().getSerializationLib()); + oldSd.getSerDeInfo().setParameters(newSd.getSerDeInfo().getParameters()); + oldSd.getSerDeInfo().setDescription(newSd.getSerDeInfo().getDescription()); + oldSd.setSkewedColNames(newSd.getSkewedColNames()); + oldSd.setSkewedColValues(newSd.getSkewedColValues()); + oldSd.setSkewedColValueLocationMaps(newSd.getSkewedColValueLocationMaps()); + oldSd.setSortCols(newSd.getSortCols()); + oldSd.setParameters(newSd.getParameters()); + oldSd.setStoredAsSubDirectories(newSd.isStoredAsSubDirectories()); + } + + @Override + public void createTable(Table tbl) throws InvalidObjectException, MetaException { + MTable mtbl = convertToMTable(tbl, baseStore);; + + if (TxnUtils.isTransactionalTable(tbl)) { + mtbl.setWriteId(tbl.getWriteId()); + } + pm.makePersistent(mtbl); + + if (tbl.getCreationMetadata() != null) { + MCreationMetadata mcm = convertToMCreationMetadata(tbl.getCreationMetadata(), baseStore); + pm.makePersistent(mcm); + } + tbl.setId(mtbl.getId()); + + PrincipalPrivilegeSet principalPrivs = tbl.getPrivileges(); + List toPersistPrivObjs = new ArrayList<>(); + if (principalPrivs != null) { + int now = (int) (System.currentTimeMillis() / 1000); + + Map> userPrivs = principalPrivs.getUserPrivileges(); + putPersistentPrivObjects(mtbl, toPersistPrivObjs, now, userPrivs, PrincipalType.USER, "SQL"); + + Map> groupPrivs = principalPrivs.getGroupPrivileges(); + putPersistentPrivObjects(mtbl, toPersistPrivObjs, now, groupPrivs, PrincipalType.GROUP, "SQL"); + + Map> rolePrivs = principalPrivs.getRolePrivileges(); + putPersistentPrivObjects(mtbl, toPersistPrivObjs, now, rolePrivs, PrincipalType.ROLE, "SQL"); + } + pm.makePersistentAll(toPersistPrivObjs); + } + + @Override + public boolean dropPartitions(TableName tableName, List partNames) + throws MetaException, NoSuchObjectException { + if (CollectionUtils.isEmpty(partNames)) { + return false; + } + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + new GetListHelper(this, tableName) { + @Override + protected List getSqlResult() throws MetaException { + getDirectSql().dropPartitionsViaSqlFilter(catName, dbName, tblName, partNames); + return Collections.emptyList(); + } + @Override + protected List getJdoResult() throws MetaException { + dropPartitionsViaJdo(catName, dbName, tblName, partNames, new AtomicReference<>()); + return Collections.emptyList(); + } + }.run(false); + return true; + } + + @Override + public List getTables(String catName, String dbName, String pattern, TableType tableType, int limit) + throws MetaException { + try { + final String db_name = normalizeIdentifier(dbName); + final String cat_name = normalizeIdentifier(catName); + return new GetListHelper(this, null) { + @Override + protected boolean canUseDirectSql() throws MetaException { + return (pattern == null || pattern.equals(".*")); + } + + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getTables(cat_name, db_name, tableType, limit); + } + + @Override + protected List getJdoResult() throws MetaException, NoSuchObjectException { + return getTablesInternalViaJdo(cat_name, db_name, pattern, tableType, limit); + } + }.run(false); + } catch (NoSuchObjectException nse) { + throw new MetaException(nse.getMessage()); + } + } + + private List getTablesInternalViaJdo(String catName, String dbName, String pattern, + TableType tableType, int limit) { + dbName = normalizeIdentifier(dbName); + // Take the pattern and split it on the | to get all the composing + // patterns + List parameterVals = new ArrayList<>(); + StringBuilder filterBuilder = new StringBuilder(); + //adds database.name == dbName to the filter + appendSimpleCondition(filterBuilder, "database.name", new String[] {dbName}, parameterVals); + appendSimpleCondition(filterBuilder, "database.catalogName", new String[] {catName}, parameterVals); + if(pattern != null) { + appendPatternCondition(filterBuilder, "tableName", pattern, parameterVals); + } + if(tableType != null) { + appendSimpleCondition(filterBuilder, "tableType", new String[] {tableType.toString()}, parameterVals); + } + + Query query = pm.newQuery(MTable.class, filterBuilder.toString()); + query.setResult("tableName"); + query.setOrdering("tableName ascending"); + if (limit >= 0) { + query.setRange(0, limit); + } + Collection names = (Collection) query.executeWithArray(parameterVals.toArray(new String[0])); + return new ArrayList<>(names); + } + + + @Override + public List
getTableObjectsByName(String catName, String db, List tbl_names, + GetProjectionsSpec projectionSpec, String tablePattern) throws MetaException, UnknownDBException { + List
tables = new ArrayList<>(); + List mtables = null; + catName = normalizeIdentifier(catName); + + List lowered_tbl_names = new ArrayList<>(); + if(tbl_names != null) { + lowered_tbl_names = new ArrayList<>(tbl_names.size()); + for (String t : tbl_names) { + lowered_tbl_names.add(normalizeIdentifier(t)); + } + } + + StringBuilder filterBuilder = new StringBuilder(); + List parameterVals = new ArrayList<>(); + appendPatternCondition(filterBuilder, "database.name", db, parameterVals); + appendSimpleCondition(filterBuilder, "database.catalogName", new String[] {catName}, parameterVals); + if(tbl_names != null){ + appendSimpleCondition(filterBuilder, "tableName", lowered_tbl_names.toArray(new String[0]), parameterVals); + } + if(tablePattern != null){ + appendPatternCondition(filterBuilder, "tableName", tablePattern, parameterVals); + } + Query query = pm.newQuery(MTable.class, filterBuilder.toString()) ; + List projectionFields = null; + + // If a projection specification has been set, validate it and translate it to JDO columns. + if (projectionSpec != null) { + //Validate the projection fields for multi-valued fields. + projectionFields = TableFields.getMFieldNames(projectionSpec.getFieldList()); + } + + // If the JDO translation resulted in valid JDO columns names, use it to create a projection for the JDO query. + if (projectionFields != null) { + // fetch partially filled tables using result clause + query.setResult(Joiner.on(',').join(projectionFields)); + } + + if (projectionFields == null) { + mtables = (List) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); + } else { + if (projectionFields.size() > 1) { + // Execute the query to fetch the partial results. + List results = (List) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); + // Declare the tables array to return the list of tables + mtables = new ArrayList<>(results.size()); + // Iterate through each row of the result and create the MTable object. + for (Object[] row : results) { + MTable mtable = new MTable(); + int i = 0; + for (Object val : row) { + MetaStoreServerUtils.setNestedProperty(mtable, projectionFields.get(i), val, true); + i++; + } + mtables.add(mtable); + } + } else if (projectionFields.size() == 1) { + // Execute the query to fetch the partial results. + List results = (List) query.executeWithArray(parameterVals.toArray(new String[parameterVals.size()])); + // Iterate through each row of the result and create the MTable object. + mtables = new ArrayList<>(results.size()); + for (Object row : results) { + MTable mtable = new MTable(); + MetaStoreServerUtils.setNestedProperty(mtable, projectionFields.get(0), row, true); + mtables.add(mtable); + } + } + } + + if (mtables == null || mtables.isEmpty()) { + try { + baseStore.ensureGetMDatabase(catName, db); + } catch (NoSuchObjectException nse) { + throw new UnknownDBException(nse.getMessage()); + } + } else { + for (Iterator iter = mtables.iterator(); iter.hasNext(); ) { + Table tbl = convertToTable((MTable) iter.next(), conf); + // Retrieve creation metadata if needed + if (TableType.MATERIALIZED_VIEW.toString().equals(tbl.getTableType())) { + tbl.setCreationMetadata( + convertToCreationMetadata( + getCreationMetadata(tbl.getCatName(), tbl.getDbName(), tbl.getTableName()), baseStore)); + } + tables.add(tbl); + } + } + return tables; + } + + @Override + public List getMaterializedViewsForRewriting(String catName, String dbName) + throws MetaException, NoSuchObjectException { + catName = normalizeIdentifier(catName); + List params = new ArrayList<>(Arrays.asList(catName, TableType.MATERIALIZED_VIEW.toString(), true)); + if (dbName != null) { + params.add(normalizeIdentifier(dbName)); + } + Query query = pm.newQuery(MTable.class, + "database.catalogName == cat && tableType == tt && rewriteEnabled == re" + + (dbName != null ? " && database.name == db" : "")); + query.declareParameters( + "java.lang.String cat, java.lang.String tt, boolean re" + ((dbName != null) ? " , java.lang.String db" : "")); + query.setResult("tableName"); + Collection names = (Collection) query.executeWithArray(params.toArray()); + return new ArrayList<>(names); + } + + @Override + public List getTableMeta(String catName, String dbNames, String tableNames, List tableTypes) + throws MetaException { + List metas = new ArrayList<>(); + try { + // Take the pattern and split it on the | to get all the composing + // patterns + StringBuilder filterBuilder = new StringBuilder(); + List parameterVals = new ArrayList<>(); + appendSimpleCondition(filterBuilder, "database.catalogName", new String[] {catName}, parameterVals); + if (dbNames != null && !dbNames.equals("*")) { + appendPatternCondition(filterBuilder, "database.name", dbNames, parameterVals); + } + if (tableNames != null && !tableNames.equals("*")) { + appendPatternCondition(filterBuilder, "tableName", tableNames, parameterVals); + } + if (tableTypes != null && !tableTypes.isEmpty()) { + appendSimpleCondition(filterBuilder, "tableType", tableTypes.toArray(new String[0]), parameterVals); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("getTableMeta with filter " + filterBuilder + " params: " + + StringUtils.join(parameterVals, ", ")); + } + // Add the fetch group here which retrieves the database object along with the MTable + // objects. If we don't prefetch the database object, we could end up in a situation where + // the database gets dropped while we are looping through the tables throwing a + // JDOObjectNotFoundException. This causes HMS to go into a retry loop which greatly degrades + // performance of this function when called with dbNames="*" and tableNames="*" (fetch all + // tables in all databases, essentially a full dump) + pm.getFetchPlan().addGroup(FetchGroups.FETCH_DATABASE_ON_MTABLE); + Query query = pm.newQuery(MTable.class, filterBuilder.toString()) ; + query.setResult("database.name, tableName, tableType, parameters.get(\"comment\"), owner, ownerType"); + List tables = (List) query.executeWithArray(parameterVals.toArray(new String[0])); + for (Object[] table : tables) { + TableMeta metaData = new TableMeta(table[0].toString(), table[1].toString(), table[2].toString()); + metaData.setCatName(catName); + if (table[3] != null) { + metaData.setComments(table[3].toString()); + } + if (table[4] != null) { + metaData.setOwnerName(table[4].toString()); + } + if (table[5] != null) { + metaData.setOwnerType(getPrincipalTypeFromStr(table[5].toString())); + } + metas.add(metaData); + } + } finally { + pm.getFetchPlan().removeGroup(FetchGroups.FETCH_DATABASE_ON_MTABLE); + } + return metas; + } + + @Override + public List listTableNamesByFilter(String catName, String dbName, String filter, short maxTables) + throws MetaException, UnknownDBException { + Query query = null; + List tableNames = new ArrayList<>(); + LOG.debug("Executing listTableNamesByFilter"); + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + + try { + baseStore.ensureGetMDatabase(catName, dbName); + } catch (NoSuchObjectException nse) { + throw new UnknownDBException(nse.getMessage()); + } + + Map params = new HashMap<>(); + String queryFilterString = makeQueryFilterString(catName, dbName, null, filter, params); + query = pm.newQuery(MTable.class); + query.declareImports("import java.lang.String"); + query.setResult("tableName"); + query.setResultClass(java.lang.String.class); + if (maxTables >= 0) { + query.setRange(0, maxTables); + } + LOG.debug("filter specified is {}, JDOQL filter is {}", filter, queryFilterString); + if (LOG.isDebugEnabled()) { + for (Map.Entry entry : params.entrySet()) { + LOG.debug("key: {} value: {} class: {}", entry.getKey(), entry.getValue(), + entry.getValue().getClass().getName()); + } + } + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); + query.setFilter(queryFilterString); + Collection names = (Collection)query.executeWithMap(params); + // have to emulate "distinct", otherwise tables with the same name may be returned + tableNames = new ArrayList<>(new HashSet<>(names)); + LOG.debug("Done executing query for listTableNamesByFilter"); + return tableNames; + } + + @Override + public List listPartitionNamesByFilter(TableName tableName, GetPartitionsArgs args) + throws MetaException, NoSuchObjectException { + + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + + MTable mTable = ensureGetMTable(tableName); + List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); + String filter = args.getFilter(); + final ExpressionTree tree = (filter != null && !filter.isEmpty()) + ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; + return new GetListHelper(this, tableName) { + private final MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(); + + @Override + protected boolean canUseDirectSql() throws MetaException { + return getDirectSql().generateSqlFilterForPushdown(catName, dbName, tblName, + partitionKeys, tree, null, filter); + } + + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getPartitionNamesViaSql(filter, partitionKeys, + getDefaultPartitionName(args.getDefaultPartName()), null, args.getMax()); + } + + @Override + protected List getJdoResult() + throws MetaException, NoSuchObjectException, InvalidObjectException { + return getPartitionNamesViaOrm(catName, dbName, tblName, tree, null, + args.getMax(), true, partitionKeys); + } + }.run(false); + } + + private String makeParameterDeclarationStringObj(Map params) { + //Create the parameter declaration string + StringBuilder paramDecl = new StringBuilder(); + for (Map.Entry entry : params.entrySet()) { + paramDecl.append(", "); + paramDecl.append(entry.getValue().getClass().getName()); + paramDecl.append(' '); + paramDecl.append(entry.getKey()); + } + return paramDecl.toString(); + } + + /** + * Makes a JDO query filter string for tables or partitions. + * @param dbName Database name. + * @param table Table. If null, the query returned is over tables in a database. + * If not null, the query returned is over partitions in a table. + * @param tree The expression tree from which JDOQL filter will be made. + * @param params Parameters for the filter. Some parameters may be added here. + * @param isValidatedFilter Whether the filter was pre-validated for JDOQL pushdown + * by the client; if it was and we fail to create a filter, we will throw. + * @return Resulting filter. Can be null if isValidatedFilter is false, and there was error. + */ + private String makeQueryFilterString(String catName, String dbName, Table table, + ExpressionTree tree, Map params, + boolean isValidatedFilter) throws MetaException { + assert tree != null; + ExpressionTree.FilterBuilder queryBuilder = new ExpressionTree.FilterBuilder(isValidatedFilter); + if (table != null) { + queryBuilder.append("table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); + params.put("t1", table.getTableName()); + params.put("t2", table.getDbName()); + params.put("t3", table.getCatName()); + } else { + queryBuilder.append("database.name == dbName && database.catalogName == catName"); + params.put("dbName", dbName); + params.put("catName", catName); + } + + tree.accept(new ExpressionTree.JDOFilterGenerator(baseStore.getConf(), + table != null ? table.getPartitionKeys() : null, queryBuilder, params)); + if (queryBuilder.hasError()) { + assert !isValidatedFilter; + LOG.debug("JDO filter pushdown cannot be used: {}", queryBuilder.getErrorMessage()); + return null; + } + String jdoFilter = queryBuilder.getFilter(); + LOG.debug("jdoFilter = {}", jdoFilter); + return jdoFilter; + } + + private String makeQueryFilterString(String catName, String dbName, String tblName, + ExpressionTree tree, Map params, + boolean isValidatedFilter, List partitionKeys) throws MetaException { + assert tree != null; + ExpressionTree.FilterBuilder queryBuilder = new ExpressionTree.FilterBuilder(isValidatedFilter); + queryBuilder.append("table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); + params.put("t1", tblName); + params.put("t2", dbName); + params.put("t3", catName); + tree.accept(new ExpressionTree.JDOFilterGenerator(baseStore.getConf(), partitionKeys, queryBuilder, params)); + if (queryBuilder.hasError()) { + assert !isValidatedFilter; + LOG.debug("JDO filter pushdown cannot be used: {}", queryBuilder.getErrorMessage()); + return null; + } + String jdoFilter = queryBuilder.getFilter(); + LOG.debug("jdoFilter = {}", jdoFilter); + return jdoFilter; + } + + + private List getPartitionNamesViaOrm(String catName, String dbName, String tblName, + ExpressionTree tree, String order, Integer maxParts, boolean isValidatedFilter, + List partitionKeys) throws MetaException { + Map params = new HashMap(); + String jdoFilter = makeQueryFilterString(catName, dbName, tblName, tree, + params, isValidatedFilter, partitionKeys); + if (jdoFilter == null) { + assert !isValidatedFilter; + throw new MetaException("Failed to generate filter."); + } + + try (QueryWrapper query = new QueryWrapper(pm.newQuery( + "select partitionName from org.apache.hadoop.hive.metastore.model.MPartition"))) { + query.setFilter(jdoFilter); + List orderSpecs = MetaStoreUtils.makeOrderSpecs(order); + StringBuilder builder = new StringBuilder(); + for (Object[] spec : orderSpecs) { + // TODO: order by casted value if the type of partition key is not string + builder.append("values.get(").append(spec[0]).append(") ").append(spec[1]).append(","); + } + if (builder.length() > 0) { + builder.setLength(builder.length() - 1); + query.setOrdering(builder.toString()); + } else { + query.setOrdering("partitionName ascending"); + } + + if (maxParts > -1) { + query.setRange(0, maxParts); + } + + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); + Collection jdoRes = (Collection) query.executeWithMap(params); + List result = new LinkedList(); + for (Object partName : jdoRes) { + result.add((String) partName); + } + return result; + } + } + + /** + * Gets the default partition name. + * @param inputDefaultPartName Incoming default partition name. + * @return Valid default partition name + */ + private String getDefaultPartitionName(String inputDefaultPartName) { + return (((inputDefaultPartName == null) || (inputDefaultPartName.isEmpty())) + ? MetastoreConf.getVar(baseStore.getConf(), MetastoreConf.ConfVars.DEFAULTPARTITIONNAME) + : inputDefaultPartName); + } + + /** + * Gets the table object for a given table, throws if anything goes wrong. + * @param tableName Table name. + * @return Table object. + */ + @Override + public MTable ensureGetMTable(TableName tableName) + throws NoSuchObjectException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + MTable mtable = getMTable(catName, dbName, tblName); + if (mtable == null) { + throw new NoSuchObjectException( + "Specified catalog.database.table does not exist : " + tableName); + } + return mtable; + } + + @Override + public List listPartitionNames(TableName tableName, String defaultPartName, byte[] exprBytes, String order, + int maxParts) throws MetaException, NoSuchObjectException { + final String defaultPartitionName = getDefaultPartitionName(defaultPartName); + final boolean isEmptyFilter = exprBytes == null || (exprBytes.length == 1 && exprBytes[0] == -1); + ExpressionTree tmp = null; + if (!isEmptyFilter) { + tmp = PartFilterExprUtil.makeExpressionTree(expressionProxy, exprBytes, + getDefaultPartitionName(defaultPartName), baseStore.getConf()); + } + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + final ExpressionTree exprTree = tmp; + return new GetListHelper(this, tableName) { + private List getPartNamesPrunedByExpr(Table table, boolean isJdoQuery) throws MetaException { + int max = isEmptyFilter ? maxParts : -1; + List result; + if (isJdoQuery) { + result = getPartitionNamesViaOrm(catName, dbName, tblName, ExpressionTree.EMPTY_TREE, + order, max, true, table.getPartitionKeys()); + } else { + MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(table, false); + result = getDirectSql().getPartitionNamesViaSql(filter, table.getPartitionKeys(), + defaultPartitionName, order, max); + } + if (!isEmptyFilter) { + prunePartitionNamesByExpr(catName, dbName, tblName, result, + new GetPartitionsArgs.GetPartitionsArgsBuilder() + .expr(exprBytes).defaultPartName(defaultPartName).max(maxParts).build()); + } + return result; + } + @Override + protected List getSqlResult() throws MetaException { + MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(getTable(), false); + List partNames = null; + Table table = getTable(); + if (exprTree != null) { + if (getDirectSql().generateSqlFilterForPushdown(table.getCatName(), table.getDbName(), table.getTableName(), + getTable().getPartitionKeys(), exprTree, defaultPartitionName, filter)) { + partNames = getDirectSql().getPartitionNamesViaSql(filter, table.getPartitionKeys(), + defaultPartitionName, order, (int)maxParts); + } + } + if (partNames == null) { + partNames = getPartNamesPrunedByExpr(table, false); + } + return partNames; + } + @Override + protected List getJdoResult() throws MetaException, NoSuchObjectException { + List result = null; + if (exprTree != null) { + try { + result = getPartitionNamesViaOrm(catName, dbName, tblName, exprTree, order, + maxParts, true, getTable().getPartitionKeys()); + } catch (MetaException e) { + result = null; + } + } + if (result == null) { + result = getPartNamesPrunedByExpr(getTable(), true); + } + return result; + } + }.run(true); + } + + private boolean prunePartitionNamesByExpr(String catName, String dbName, String tblName, + List result, GetPartitionsArgs args) throws MetaException { + MTable mTable = getMTable(catName, dbName, tblName); + List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); + boolean hasUnknownPartitions = expressionProxy.filterPartitionsByExpr( + partitionKeys, + args.getExpr(), + getDefaultPartitionName(args.getDefaultPartName()), + result); + if (args.getMax() >= 0 && result.size() > args.getMax()) { + result = result.subList(0, args.getMax()); + } + return hasUnknownPartitions; + } + + @Override + public boolean getPartitionsByExpr(TableName tableName, List result, GetPartitionsArgs args) + throws TException { + assert result != null; + byte[] expr = args.getExpr(); + final ExpressionTree exprTree = expr.length != 0 ? PartFilterExprUtil.makeExpressionTree( + expressionProxy, expr, getDefaultPartitionName(args.getDefaultPartName()), baseStore.getConf()) : ExpressionTree.EMPTY_TREE; + final AtomicBoolean hasUnknownPartitions = new AtomicBoolean(false); + + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + MTable mTable = ensureGetMTable(tableName); + List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); + boolean isAcidTable = TxnUtils.isAcidTable(mTable.getParameters()); + result.addAll(new GetListHelper(this, tableName) { + @Override + protected List getSqlResult() throws MetaException { + // If we have some sort of expression tree, try SQL filter pushdown. + if (exprTree != null) { + MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(); + if (getDirectSql().generateSqlFilterForPushdown(catName, dbName, tblName, partitionKeys, + exprTree, args.getDefaultPartName(), filter)) { + String catalogName = (catName != null) ? catName : getDefaultCatalog(baseStore.getConf()); + return getDirectSql().getPartitionsViaSqlFilter(catalogName, dbName, tblName, filter, + isAcidTable, args); + } + } + // We couldn't do SQL filter pushdown. Get names via normal means. + List partNames = new LinkedList<>(); + hasUnknownPartitions.set(getPartitionNamesPrunedByExprNoTxn( + catName, dbName, tblName, partitionKeys, expr, args.getDefaultPartName(), (short) args.getMax(), partNames)); + GetPartitionsArgs newArgs = new GetPartitionsArgs.GetPartitionsArgsBuilder(args).partNames(partNames).build(); + return getDirectSql().getPartitionsViaPartNames(catName, dbName, tblName, newArgs); + } + + @Override + protected List getJdoResult() throws MetaException, NoSuchObjectException { + // If we have some sort of expression tree, try JDOQL filter pushdown. + List result = null; + if (exprTree != null) { + result = getPartitionsViaOrmFilter(catName, dbName, tblName, exprTree, + false, partitionKeys, isAcidTable, args); + } + if (result == null) { + // We couldn't do JDOQL filter pushdown. Get names via normal means. + List partNames = new ArrayList<>(); + hasUnknownPartitions.set(getPartitionNamesPrunedByExprNoTxn( + catName, dbName, tblName, partitionKeys, expr, args.getDefaultPartName(), (short) args.getMax(), partNames)); + GetPartitionsArgs newArgs = new GetPartitionsArgs.GetPartitionsArgsBuilder(args).partNames(partNames).build(); + result = getPartitionsViaOrmFilter(catName, dbName, tblName, isAcidTable, newArgs); + } + return result; + } + }.run(false)); + return hasUnknownPartitions.get(); + } + + /** + * Gets partition names from the table via ORM (JDOQL) filter pushdown. + * @param tblName The table. + * @param tree The expression tree from which JDOQL filter will be made. + * @param isValidatedFilter Whether the filter was pre-validated for JDOQL pushdown by a client + * (old hive client or non-hive one); if it was and we fail to create a filter, we will throw. + * @param args additional arguments for getting partitions + * @return Resulting partitions. Can be null if isValidatedFilter is false, and + * there was error deriving the JDO filter. + */ + private List getPartitionsViaOrmFilter(String catName, String dbName, String tblName, ExpressionTree tree, + boolean isValidatedFilter, List partitionKeys, boolean isAcidTable, + GetPartitionsArgs args) throws MetaException { + Map params = new HashMap<>(); + String jdoFilter = + makeQueryFilterString(catName, dbName, tblName, tree, params, isValidatedFilter, partitionKeys); + if (jdoFilter == null) { + assert !isValidatedFilter; + return null; + } + Query query = pm.newQuery(MPartition.class, jdoFilter); + if (args.getMax() >= 0) { + // User specified a row limit, set it on the Query + query.setRange(0, args.getMax()); + } + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); + query.setOrdering("partitionName ascending"); + List mparts = (List) query.executeWithMap(params); + LOG.debug("Done executing query for getPartitionsViaOrmFilter"); + pm.retrieveAll(mparts); // TODO: why is this inconsistent with what we get by names? + LOG.debug("Done retrieving all objects for getPartitionsViaOrmFilter"); + List results = + convertToParts(catName, dbName, tblName, mparts, isAcidTable, conf, args); + return results; + } + + /** + * Gets partition names from the table via ORM (JDOQL) name filter. + * @param dbName Database name. + * @param tblName Table name. + * @param isAcidTable True if the table is ACID + * @param args additional arguments for getting partitions + * @return Resulting partitions. + */ + private List getPartitionsViaOrmFilter(String catName, String dbName, String tblName, + boolean isAcidTable, GetPartitionsArgs args) throws MetaException { + List partNames = args.getPartNames(); + if (partNames.isEmpty()) { + return Collections.emptyList(); + } + return Batchable.runBatched(batchSize, partNames, new Batchable() { + @Override + public List run(List input) throws MetaException { + Pair> queryWithParams = + getPartQueryWithParams(pm, catName, dbName, tblName, input); + + try (QueryWrapper query = new QueryWrapper(queryWithParams.getLeft())) { + query.setResultClass(MPartition.class); + query.setClass(MPartition.class); + query.setOrdering("partitionName ascending"); + + List mparts = (List) query.executeWithMap(queryWithParams.getRight()); + List partitions = convertToParts(catName, dbName, tblName, mparts, + isAcidTable, conf, args); + + return partitions; + } + } + }); + } + + + /** + * Gets the partition names from a table, pruned using an expression. + * @param catName + * @param dbName + * @param tblName + * @param expr Expression. + * @param defaultPartName Default partition name from job config, if any. + * @param maxParts Maximum number of partition names to return. + * @param result The resulting names. + * @return Whether the result contains any unknown partitions. + */ + private boolean getPartitionNamesPrunedByExprNoTxn(String catName, String dbName, String tblName, List partColumns, byte[] expr, + String defaultPartName, short maxParts, List result) throws MetaException { + result.addAll(getPartitionNamesNoTxn(catName, dbName, tblName, (short) -1)); + return prunePartitionNamesByExpr(catName, dbName, tblName, result, + new GetPartitionsArgs.GetPartitionsArgsBuilder() + .expr(expr).defaultPartName(defaultPartName).max(maxParts).build()); + } + + private List getPartitionNamesNoTxn(String catName, String dbName, String tableName, short max) { + List pns = new ArrayList<>(); + if (max == 0) { + return pns; + } + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + try (QueryWrapper query = new QueryWrapper( + pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " + + "where table.database.name == t1 && table.tableName == t2 && table.database.catalogName == t3 " + + "order by partitionName asc"))) { + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + query.setResult("partitionName"); + + if (max > 0) { + query.setRange(0, max); + } + Collection names = (Collection) query.execute(dbName, tableName, catName); + pns.addAll(names); + + return pns; + } + } + + @Override + public List getPartitionsByNames(TableName tableName, GetPartitionsArgs args) + throws MetaException, NoSuchObjectException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + return new GetListHelper(this, tableName) { + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getPartitionsViaPartNames(catName, dbName, tblName, args); + } + @Override + protected List getJdoResult() throws MetaException, NoSuchObjectException { + return getPartitionsViaOrmFilter(catName, dbName, tblName, false, args); + } + }.run(false); + } + + @Override + public Partition alterPartition(TableName tableName, List part_vals, Partition new_part, + String queryValidWriteIds) throws InvalidObjectException, MetaException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbname = normalizeIdentifier(tableName.getDb()); + String name = normalizeIdentifier(tableName.getTable()); + AtomicReference oldCd = new AtomicReference<>(); + Partition result = alterPartitionNoTxn(catName, dbname, name, part_vals, new_part, queryValidWriteIds, oldCd); + removeUnusedColumnDescriptor(oldCd.get()); + return result; + } + + /** + * Alters an existing partition. Initiates copy of SD. Returns the old CD. + * @param part_vals Partition values (of the original partition instance) + * @param newPart Partition object containing new information + */ + private Partition alterPartitionNoTxn(String catName, String dbname, String name, + List part_vals, Partition newPart, String validWriteIds, AtomicReference oldCd) + throws InvalidObjectException, MetaException { + MTable table = this.getMTable(newPart.getCatName(), newPart.getDbName(), newPart.getTableName()); + MPartition oldp = getMPartition(catName, dbname, name, part_vals, table); + return alterPartitionNoTxn(catName, dbname, name, oldp, newPart, + validWriteIds, oldCd, table); + } + + private Partition alterPartitionNoTxn(String catName, String dbname, + String name, MPartition oldp, Partition newPart, + String validWriteIds, + AtomicReference oldCd, MTable table) + throws InvalidObjectException, MetaException { + catName = normalizeIdentifier(catName); + name = normalizeIdentifier(name); + dbname = normalizeIdentifier(dbname); + MPartition newp = convertToMPart(newPart, table); + MColumnDescriptor oldCD = null; + MStorageDescriptor oldSD = oldp.getSd(); + if (oldSD != null) { + oldCD = oldSD.getCD(); + } + if (newp == null) { + throw new InvalidObjectException("partition does not exist."); + } + oldp.setValues(newp.getValues()); + oldp.setPartitionName(newp.getPartitionName()); + boolean isTxn = TxnUtils.isTransactionalTable(table.getParameters()); + if (isTxn && areTxnStatsSupported) { + // Transactional table is altered without a txn. Make sure there are no changes to the flag. + String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(dbname, name), + oldp.getParameters(), + newPart.getParameters(), newPart.getWriteId(), validWriteIds, false); + if (errorMsg != null) { + throw new MetaException(errorMsg); + } + } + oldp.setParameters(newPart.getParameters()); + if (!TableType.VIRTUAL_VIEW.name().equals(oldp.getTable().getTableType())) { + copyMSD(newp.getSd(), oldp.getSd()); + } + if (newp.getCreateTime() != oldp.getCreateTime()) { + oldp.setCreateTime(newp.getCreateTime()); + } + if (newp.getLastAccessTime() != oldp.getLastAccessTime()) { + oldp.setLastAccessTime(newp.getLastAccessTime()); + } + + // If transactional, add/update the MUPdaterTransaction + // for the current updater query. + if (isTxn) { + if (!areTxnStatsSupported) { + StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); + } else if (validWriteIds != null && newPart.getWriteId() > 0) { + // Check concurrent INSERT case and set false to the flag. + if (!isCurrentStatsValidForTheQuery(oldp.getParameters(), oldp.getWriteId(), validWriteIds, true)) { + StatsSetupConst.setBasicStatsState(oldp.getParameters(), StatsSetupConst.FALSE); + LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + + dbname + "." + name + "." + oldp.getPartitionName() + " will be made persistent."); + } + oldp.setWriteId(newPart.getWriteId()); + } + } + + oldCd.set(oldCD); + return convertToPart(catName, dbname, name, oldp, TxnUtils.isAcidTable(table.getParameters()), conf); + } + + @Override + public List alterPartitions(TableName tableName, List> part_vals, + List newParts, long writeId, String queryWriteIdList) throws InvalidObjectException, MetaException { + List results = new ArrayList<>(newParts.size()); + if (newParts.isEmpty()) { + return results; + } + try { + MTable table = ensureGetMTable(tableName); + if (writeId > 0) { + newParts.forEach(newPart -> newPart.setWriteId(writeId)); + } + List partCols = convertToFieldSchemas(table.getPartitionKeys()); + List partNames = new ArrayList<>(); + for (List partVal : part_vals) { + partNames.add(Warehouse.makePartName(partCols, partVal)); + } + results = alterPartitionsInternal(table, partNames, newParts, queryWriteIdList); + } catch (NoSuchObjectException nse) { + throw new MetaException(nse.getMessage()); + } + // commit the changes + return results; + } + + protected List alterPartitionsInternal(MTable table, + List partNames, List newParts, String queryWriteIdList) + throws InvalidObjectException, MetaException, NoSuchObjectException { + // Validate new parts: StorageDescriptor and SerDeInfo must be set in Partition. + if (!TableType.VIRTUAL_VIEW.name().equals(table.getTableType())) { + for (Partition newPart : newParts) { + if (!newPart.isSetSd() || !newPart.getSd().isSetSerdeInfo()) { + throw new InvalidObjectException("Partition does not set storageDescriptor or serdeInfo."); + } + } + } + + String dbName = table.getDatabase().getName(); + String tblName = table.getTableName(); + for (Partition tmpPart : newParts) { + if (!tmpPart.getDbName().equalsIgnoreCase(dbName)) { + throw new MetaException("Invalid DB name : " + tmpPart.getDbName()); + } + if (!tmpPart.getTableName().equalsIgnoreCase(tblName)) { + throw new MetaException("Invalid table name : " + tmpPart.getDbName()); + } + } + return new GetListHelper(this, null) { + @Override + protected List getSqlResult() + throws MetaException { + return getDirectSql().alterPartitions(table, partNames, newParts, queryWriteIdList); + } + + @Override + protected List getJdoResult() + throws MetaException, InvalidObjectException { + return alterPartitionsViaJdo(table, partNames, newParts, queryWriteIdList); + } + }.run(false); + } + + private List alterPartitionsViaJdo(MTable table, List partNames, + List newParts, String queryWriteIdList) + throws MetaException, InvalidObjectException { + String catName = table.getDatabase().getCatalogName(); + String dbName = table.getDatabase().getName(); + String tblName = table.getTableName(); + List results = new ArrayList<>(newParts.size()); + List mPartitionList; + + try (QueryWrapper query = new QueryWrapper(pm.newQuery(MPartition.class, + "table.tableName == t1 && table.database.name == t2 && t3.contains(partitionName) " + + " && table.database.catalogName == t4"))) { + query.declareParameters("java.lang.String t1, java.lang.String t2, java.util.Collection t3, " + + "java.lang.String t4"); + mPartitionList = (List) query.executeWithArray(tblName, dbName, partNames, catName); + pm.retrieveAll(mPartitionList); + + if (mPartitionList.size() > newParts.size()) { + throw new MetaException("Expecting only one partition but more than one partitions are found."); + } + + Map, MPartition> mPartsMap = new HashMap(); + for (MPartition mPartition : mPartitionList) { + mPartsMap.put(mPartition.getValues(), mPartition); + } + + Set oldCds = new HashSet<>(); + AtomicReference oldCdRef = new AtomicReference<>(); + for (Partition tmpPart : newParts) { + oldCdRef.set(null); + Partition result = alterPartitionNoTxn(catName, dbName, tblName, + mPartsMap.get(tmpPart.getValues()), tmpPart, queryWriteIdList, oldCdRef, table); + results.add(result); + if (oldCdRef.get() != null) { + oldCds.add(oldCdRef.get()); + } + } + for (MColumnDescriptor oldCd : oldCds) { + removeUnusedColumnDescriptor(oldCd); + } + } + + return results; + } + + @Override + public List getPartitionsByFilter(TableName tableName, GetPartitionsArgs args) + throws MetaException, NoSuchObjectException { + + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + + MTable mTable = ensureGetMTable(tableName); + List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); + boolean isAcidTable = TxnUtils.isAcidTable(mTable.getParameters()); + String filter = args.getFilter(); + final ExpressionTree tree = (filter != null && !filter.isEmpty()) + ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; + return new GetListHelper(this, tableName) { + private final MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(); + + @Override + protected boolean canUseDirectSql() throws MetaException { + return getDirectSql().generateSqlFilterForPushdown(catName, dbName, tblName, partitionKeys, tree, null, filter); + } + + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getPartitionsViaSqlFilter(catName, dbName, tblName, filter, isAcidTable, args); + } + + @Override + protected List getJdoResult() throws MetaException, NoSuchObjectException { + return getPartitionsViaOrmFilter(catName, dbName, tblName, tree, true, + partitionKeys, isAcidTable, args); + } + }.run(false); + } + + @Override + public List getPartitionSpecsByFilterAndProjection(Table table, GetProjectionsSpec partitionsProjectSpec, + GetPartitionsFilterSpec filterSpec) throws MetaException, NoSuchObjectException { + List fieldList = null; + String inputIncludePattern = null; + String inputExcludePattern = null; + if (partitionsProjectSpec != null) { + fieldList = partitionsProjectSpec.getFieldList(); + if (partitionsProjectSpec.isSetIncludeParamKeyPattern()) { + inputIncludePattern = partitionsProjectSpec.getIncludeParamKeyPattern(); + } + if (partitionsProjectSpec.isSetExcludeParamKeyPattern()) { + inputExcludePattern = partitionsProjectSpec.getExcludeParamKeyPattern(); + } + } + TableName tableName = new TableName(table.getCatName(), table.getDbName(), table.getTableName()); + if (fieldList == null || fieldList.isEmpty()) { + // no fields are requested. Fallback to regular getPartitions implementation to return all the fields + GetPartitionsArgs.GetPartitionsArgsBuilder argsBuilder = new GetPartitionsArgs.GetPartitionsArgsBuilder() + .excludeParamKeyPattern(inputExcludePattern) + .includeParamKeyPattern(inputIncludePattern); + return getPartitions(tableName, argsBuilder.build()); + } + + // anonymous class below requires final String objects + final String includeParamKeyPattern = inputIncludePattern; + final String excludeParamKeyPattern = inputExcludePattern; + + return new GetListHelper(this, tableName, fieldList) { + private final MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(); + private ExpressionTree tree; + + @Override + protected boolean canUseDirectSql() throws MetaException { + if (filterSpec.isSetFilterMode() && filterSpec.getFilterMode().equals(PartitionFilterMode.BY_EXPR)) { + // if the filter mode is BY_EXPR initialize the filter and generate the expression tree + // if there are more than one filter string we AND them together + initExpressionTree(); + return getDirectSql().generateSqlFilterForPushdown(table.getCatName(), table.getDbName(), table.getTableName(), + table.getPartitionKeys(), tree, null, filter); + } + // BY_VALUES and BY_NAMES are always supported + return true; + } + + private void initExpressionTree() throws MetaException { + StringBuilder filterBuilder = new StringBuilder(); + int len = filterSpec.getFilters().size(); + List filters = filterSpec.getFilters(); + for (int i = 0; i < len; i++) { + filterBuilder.append('('); + filterBuilder.append(filters.get(i)); + filterBuilder.append(')'); + if (i + 1 < len) { + filterBuilder.append(" AND "); + } + } + String filterStr = filterBuilder.toString(); + tree = PartFilterExprUtil.parseFilterTree(filterStr); + } + + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getPartitionsUsingProjectionAndFilterSpec(getTable(), getPartitionFields(), + includeParamKeyPattern, excludeParamKeyPattern, filterSpec, filter); + } + + @Override + protected List getJdoResult() throws MetaException { + // For single-valued fields we can use setResult() to implement projection of fields but + // JDO doesn't support multi-valued fields in setResult() so currently JDO implementation + // fallbacks to full-partition fetch if the requested fields contain multi-valued fields + List fieldNames = PartitionProjectionEvaluator.getMPartitionFieldNames(getPartitionFields()); + Map params = new HashMap<>(); + String jdoFilter = null; + if (filterSpec.isSetFilterMode()) { + // generate the JDO filter string + switch(filterSpec.getFilterMode()) { + case BY_EXPR: + if (tree == null) { + // tree could be null when directSQL is disabled + initExpressionTree(); + } + jdoFilter = + makeQueryFilterString(table.getCatName(), table.getDbName(), table, tree, params, + true); + if (jdoFilter == null) { + throw new MetaException("Could not generate JDO filter from given expression"); + } + break; + case BY_NAMES: + jdoFilter = getJDOFilterStrForPartitionNames(table.getCatName(), table.getDbName(), + table.getTableName(), filterSpec.getFilters(), params); + break; + case BY_VALUES: + jdoFilter = getJDOFilterStrForPartitionVals(table, filterSpec.getFilters(), params); + break; + default: + throw new MetaException("Unsupported filter mode " + filterSpec.getFilterMode()); + } + } else { + // filter mode is not set create simple JDOFilterStr and params + jdoFilter = "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"; + params.put("t1", normalizeIdentifier(table.getTableName())); + params.put("t2", normalizeIdentifier(table.getDbName())); + params.put("t3", normalizeIdentifier(table.getCatName())); + } + try { + List mparts = listMPartitionsWithProjection(fieldNames, jdoFilter, params); + return convertToParts(table.getCatName(), table.getDbName(), table.getTableName(), + mparts, false, conf, new GetPartitionsArgs.GetPartitionsArgsBuilder() + .excludeParamKeyPattern(excludeParamKeyPattern) + .includeParamKeyPattern(includeParamKeyPattern) + .build()); + } catch (MetaException me) { + throw me; + } catch (Exception e) { + throw new MetaException(e.getMessage()); + } + } + }.run(true); + } + + @Override + public List listPartitionsPsWithAuth(TableName tableName, GetPartitionsArgs args) + throws MetaException, InvalidObjectException, NoSuchObjectException { + List partitions; + LOG.debug("executing listPartitionNamesPsWithAuth"); + MTable mtbl = ensureGetMTable(tableName); + String userName = args.getUserName(); + List groupNames = args.getGroupNames(); + List part_vals = args.getPart_vals(); + List partNames = args.getPartNames(); + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + boolean getauth = null != userName && null != groupNames && + "TRUE".equalsIgnoreCase( + mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")); + if (MetaStoreUtils.arePartValsEmpty(part_vals) && partNames == null) { + partitions = getPartitions(tableName, args); + } else if (partNames != null) { + partitions = getPartitionsByNames(tableName, args); + } else { + partitions = getPartitionsByPs(tableName, args); + } + if (getauth) { + for (Partition part : partitions) { + String partName = Warehouse.makePartName(convertToFieldSchemas(mtbl + .getPartitionKeys()), part.getValues()); + PrincipalPrivilegeSet partAuth = baseStore.getPartitionPrivilegeSet(catName, dbName, + tblName, partName, userName, groupNames); + part.setPrivileges(partAuth); + } + } + return partitions; + } + + private List getPartitionsByPs(TableName tableName, GetPartitionsArgs args) + throws MetaException, NoSuchObjectException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + + return new GetListHelper(this, tableName) { + + @Override + protected List getSqlResult() throws MetaException { + return getDirectSql().getPartitionsViaSqlPs(getTable(), args); + } + + @Override + protected List getJdoResult() + throws MetaException, NoSuchObjectException { + List result = new ArrayList<>(); + Collection parts = getPartitionPsQueryResults(catName, dbName, tblName, + args.getPart_vals(), args.getMax(), null); + boolean isAcidTable = TxnUtils.isAcidTable(getTable()); + for (MPartition o : parts) { + Partition part = convertToPart(catName, dbName, tblName, o, isAcidTable, conf, args); + result.add(part); + } + return result; + } + }.run(true); + } + + /** + * Retrieves a Collection of partition-related results from the database that match + * the partial specification given for a specific table. + * @param dbName the name of the database + * @param tableName the name of the table + * @param part_vals the partial specification values + * @param max_parts the maximum number of partitions to return + * @param resultsCol the metadata column of the data to return, e.g. partitionName, etc. + * if resultsCol is empty or null, a collection of MPartition objects is returned + * @return A Collection of partition-related items from the db that match the partial spec + * for a table. The type of each item in the collection corresponds to the column + * you want results for. E.g., if resultsCol is partitionName, the Collection + * has types of String, and if resultsCol is null, the types are MPartition. + */ + private Collection getPartitionPsQueryResults(String catName, String dbName, + String tableName, List part_vals, + int max_parts, String resultsCol) + throws MetaException, NoSuchObjectException { + + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + Table table = getTable(new TableName(catName, dbName, tableName), null, -1); + if (table == null) { + throw new NoSuchObjectException(TableName.getQualified(catName, dbName, tableName) + " table not found"); + } + // size is known since it contains dbName, catName, tblName and partialRegex + // pattern + Map params = new HashMap<>(4); + String filter = getJDOFilterStrForPartitionVals(table, part_vals, params); + try (QueryWrapper query = new QueryWrapper(pm.newQuery(MPartition.class))) { + query.setFilter(filter); + query.setOrdering("partitionName ascending"); + query.declareParameters(makeParameterDeclarationString(params)); + if (max_parts >= 0) { + // User specified a row limit, set it on the Query + query.setRange(0, max_parts); + } + if (resultsCol != null && !resultsCol.isEmpty()) { + query.setResult(resultsCol); + } + + Collection result = (Collection) query.executeWithMap(params); + + return Collections.unmodifiableCollection(new ArrayList<>(result)); + } + } + + + private String getJDOFilterStrForPartitionVals(Table table, List vals, + Map params) throws MetaException { + String partNameMatcher = MetaStoreUtils.makePartNameMatcher(table, vals, ".*"); + params.put("dbName", table.getDbName()); + params.put("catName", table.getCatName()); + params.put("tableName", table.getTableName()); + params.put("partialRegex", partNameMatcher); + return "table.database.name == dbName" + " && table.database.catalogName == catName" + + " && table.tableName == tableName" + " && partitionName.matches(partialRegex)"; + } + + // This code is only executed in JDO code path, not from direct SQL code path. + private List listMPartitionsWithProjection(List fieldNames, String jdoFilter, + Map params) throws Exception { + List mparts = null; + LOG.debug("Executing listMPartitionsWithProjection"); + Query query = pm.newQuery(MPartition.class, jdoFilter); + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); + query.setOrdering("partitionName ascending"); + if (fieldNames == null || fieldNames.isEmpty()) { + // full fetch of partitions + mparts = (List) query.executeWithMap(params); + pm.retrieveAll(mparts); + pm.makeTransientAll(mparts); + mparts = new ArrayList<>(mparts); + } else { + // fetch partially filled partitions using result clause + query.setResult(Joiner.on(',').join(fieldNames)); + // if more than one fields are in the result class the return type is + // List + if (fieldNames.size() > 1) { + List results = (List) query.executeWithMap(params); + mparts = new ArrayList<>(results.size()); + for (Object[] row : results) { + MPartition mpart = new MPartition(); + int i = 0; + for (Object val : row) { + MetaStoreServerUtils.setNestedProperty(mpart, fieldNames.get(i), val, true); + i++; + } + mparts.add(mpart); + } + } else { + // only one field is requested, return type is List + List results = (List) query.executeWithMap(params); + mparts = new ArrayList<>(results.size()); + for (Object row : results) { + MPartition mpart = new MPartition(); + MetaStoreServerUtils.setNestedProperty(mpart, fieldNames.get(0), row, true); + mparts.add(mpart); + } + } + } + return mparts; + } + + @Override + public int getNumPartitionsByFilter(TableName tableName, String filter) throws MetaException, NoSuchObjectException { + final ExpressionTree exprTree = org.apache.commons.lang3.StringUtils.isNotEmpty(filter) + ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; + + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + MTable mTable = ensureGetMTable(tableName); + List partitionKeys = convertToFieldSchemas(mTable.getPartitionKeys()); + + return new GetHelper(this, tableName) { + private final MetaStoreDirectSql.SqlFilterForPushdown filter = new MetaStoreDirectSql.SqlFilterForPushdown(); + + @Override + protected String describeResult() { + return "Partition count"; + } + + @Override + protected boolean canUseDirectSql() throws MetaException { + return getDirectSql().generateSqlFilterForPushdown(catName, dbName, tblName, partitionKeys, exprTree, null, filter); + } + + @Override + protected Integer getSqlResult() throws MetaException { + return getDirectSql().getNumPartitionsViaSqlFilter(filter); + } + @Override + protected Integer getJdoResult() throws MetaException, NoSuchObjectException { + return getNumPartitionsViaOrmFilter(catName ,dbName, tblName, exprTree, true, partitionKeys); + } + }.run(false); + } + + private Integer getNumPartitionsViaOrmFilter(String catName, String dbName, String tblName, ExpressionTree tree, boolean isValidatedFilter, List partitionKeys) + throws MetaException { + Map params = new HashMap<>(); + String jdoFilter = makeQueryFilterString(catName, dbName, tblName, tree, + params, isValidatedFilter, partitionKeys); + if (jdoFilter == null) { + assert !isValidatedFilter; + return null; + } + + Query query = pm.newQuery( + "select count(partitionName) from org.apache.hadoop.hive.metastore.model.MPartition"); + query.setFilter(jdoFilter); + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); + Long result = (Long) query.executeWithMap(params); + + return result.intValue(); + } + + @Override + public int getNumPartitionsByPs(TableName tableName, List partVals) + throws MetaException, NoSuchObjectException { + + return new GetHelper(this, tableName) { + @Override + protected String describeResult() { + return "Partition count by partial values"; + } + + @Override + protected Integer getSqlResult() throws MetaException { + return getDirectSql().getNumPartitionsViaSqlPs(getTable(), partVals); + } + + @Override + protected Integer getJdoResult() + throws MetaException, NoSuchObjectException, InvalidObjectException { + // size is known since it contains dbName, catName, tblName and partialRegex pattern + Map params = new HashMap<>(4); + String filter = getJDOFilterStrForPartitionVals(getTable(), partVals, params); + try (QueryWrapper query = new QueryWrapper(pm.newQuery( + "select count(partitionName) from org.apache.hadoop.hive.metastore.model.MPartition"))) { + query.setFilter(filter); + query.declareParameters(makeParameterDeclarationString(params)); + Long result = (Long) query.executeWithMap(params); + + return result.intValue(); + } + } + }.run(true); + } + + @Override + public PartitionValuesResponse listPartitionValues(TableName table, List cols, boolean applyDistinct, + String filter, boolean ascending, List order, long maxParts) throws MetaException { + String catName = normalizeIdentifier(table.getCat()); + String dbName = normalizeIdentifier(table.getDb()); + String tableName = normalizeIdentifier(table.getTable()); + try { + if (filter == null || filter.isEmpty()) { + PartitionValuesResponse response = getDistinctValuesForPartitionsNoTxn(catName, dbName, + tableName, cols, applyDistinct, maxParts); + LOG.info("Number of records fetched: {}", response.getPartitionValues().size()); + return response; + } else { + PartitionValuesResponse response = + extractPartitionNamesByFilter(catName, dbName, tableName, filter, cols, ascending, maxParts); + if (response.getPartitionValues() != null) { + LOG.info("Number of records fetched with filter: {}", response.getPartitionValues().size()); + } + return response; + } + } catch (Exception t) { + LOG.error("Exception in ORM", t); + throw new MetaException("Error retrieving partition values: " + t); + } + } + + private PartitionValuesResponse extractPartitionNamesByFilter( + String catName, String dbName, String tableName, String filter, List cols, + boolean ascending, long maxParts) + throws MetaException, NoSuchObjectException { + + LOG.info("Table: {} filter: \"{}\" cols: {}", + TableName.getQualified(catName, dbName, tableName), filter, cols); + List partitionNames = null; + List partitions = null; + Table tbl = getTable(new TableName(catName, dbName, tableName), null, -1); + try { + // Get partitions by name - ascending or descending + partitionNames = getPartitionNamesByFilter(catName, dbName, tableName, filter, ascending, + maxParts); + } catch (MetaException e) { + LOG.warn("Querying by partition names failed, trying out with partition objects, filter: {}", filter); + } + + if (partitionNames == null) { + partitions = getPartitionsByFilter(new TableName(catName, dbName, tableName), + new GetPartitionsArgs.GetPartitionsArgsBuilder().filter(filter).max((short) maxParts).build()); + } + + if (partitions != null) { + partitionNames = new ArrayList<>(partitions.size()); + for (Partition partition : partitions) { + // Check for NULL's just to be safe + if (tbl.getPartitionKeys() != null && partition.getValues() != null) { + partitionNames.add(Warehouse.makePartName(tbl.getPartitionKeys(), partition.getValues())); + } + } + } + + if (partitionNames == null) { + throw new MetaException("Cannot obtain list of partitions by filter:\"" + filter + + "\" for " + TableName.getQualified(catName, dbName, tableName)); + } + + if (!ascending) { + partitionNames.sort(Collections.reverseOrder()); + } + + // Return proper response + PartitionValuesResponse response = new PartitionValuesResponse(); + response.setPartitionValues(new ArrayList<>(partitionNames.size())); + LOG.info("Converting responses to Partition values for items: {}", partitionNames.size()); + for (String partName : partitionNames) { + ArrayList vals = new ArrayList<>(Collections.nCopies(tbl.getPartitionKeys().size(), null)); + PartitionValuesRow row = new PartitionValuesRow(); + Warehouse.makeValsFromName(partName, vals); + for (String value : vals) { + row.addToRow(value); + } + response.addToPartitionValues(row); + } + return response; + } + + private PartitionValuesResponse getDistinctValuesForPartitionsNoTxn( + String catName, String dbName, String tableName, List cols, + boolean applyDistinct, long maxParts) + throws MetaException { + Query q = pm.newQuery("select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " + + "where table.database.name == t1 && table.database.catalogName == t2 && " + + "table.tableName == t3 "); + q.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + + // TODO: Ordering seems to affect the distinctness, needs checking, disabling. +/* + if (ascending) { + q.setOrdering("partitionName ascending"); + } else { + q.setOrdering("partitionName descending"); + } +*/ + if (maxParts > 0) { + q.setRange(0, maxParts); + } + StringBuilder partValuesSelect = new StringBuilder(256); + if (applyDistinct) { + partValuesSelect.append("DISTINCT "); + } + List partitionKeys = + getTable(new TableName(catName, dbName, tableName), null, -1).getPartitionKeys(); + for (FieldSchema key : cols) { + partValuesSelect.append(extractPartitionKey(key, partitionKeys)).append(", "); + } + partValuesSelect.setLength(partValuesSelect.length() - 2); + LOG.info("Columns to be selected from Partitions: {}", partValuesSelect); + q.setResult(partValuesSelect.toString()); + + PartitionValuesResponse response = new PartitionValuesResponse(); + response.setPartitionValues(new ArrayList<>()); + if (cols.size() > 1) { + List results = (List) q.execute(dbName, catName, tableName); + for (Object[] row : results) { + PartitionValuesRow rowResponse = new PartitionValuesRow(); + for (Object columnValue : row) { + rowResponse.addToRow((String) columnValue); + } + response.addToPartitionValues(rowResponse); + } + } else { + List results = (List) q.execute(dbName, catName, tableName); + for (Object row : results) { + PartitionValuesRow rowResponse = new PartitionValuesRow(); + rowResponse.addToRow((String) row); + response.addToPartitionValues(rowResponse); + } + } + return response; + } + + private String extractPartitionKey(FieldSchema key, List pkeys) { + StringBuilder buffer = new StringBuilder(256); + + assert pkeys.size() >= 1; + + String partKey = "/" + key.getName() + "="; + + // Table is partitioned by single key + if (pkeys.size() == 1 && (pkeys.get(0).getName().matches(key.getName()))) { + buffer.append("partitionName.substring(partitionName.indexOf(\"") + .append(key.getName()).append("=\") + ").append(key.getName().length() + 1) + .append(")"); + + // First partition key - anything between key= and first / + } else if ((pkeys.get(0).getName().matches(key.getName()))) { + + buffer.append("partitionName.substring(partitionName.indexOf(\"") + .append(key.getName()).append("=\") + ").append(key.getName().length() + 1).append(", ") + .append("partitionName.indexOf(\"/\")") + .append(")"); + + // Last partition key - anything between /key= and end + } else if ((pkeys.get(pkeys.size() - 1).getName().matches(key.getName()))) { + buffer.append("partitionName.substring(partitionName.indexOf(\"") + .append(partKey).append("\") + ").append(partKey.length()) + .append(")"); + + // Intermediate key - anything between /key= and the following / + } else { + + buffer.append("partitionName.substring(partitionName.indexOf(\"") + .append(partKey).append("\") + ").append(partKey.length()).append(", ") + .append("partitionName.indexOf(\"/\", partitionName.indexOf(\"").append(partKey) + .append("\") + 1))"); + } + LOG.info("Query for Key:" + key.getName() + " is :" + buffer); + return buffer.toString(); + } + + private List getPartitionNamesByFilter(String catName, String dbName, String tableName, + String filter, boolean ascending, long maxParts) + throws MetaException { + List partNames = new ArrayList<>(); + Query query = null; + LOG.debug("Executing getPartitionNamesByFilter"); + catName = normalizeIdentifier(catName); + dbName = dbName.toLowerCase(); + tableName = tableName.toLowerCase(); + + MTable mtable = getMTable(catName, dbName, tableName); + if( mtable == null ) { + // To be consistent with the behavior of listPartitionNames, if the + // table or db does not exist, we return an empty list + return partNames; + } + Map params = new HashMap<>(); + String queryFilterString = makeQueryFilterString(catName, dbName, mtable, filter, params); + query = pm.newQuery( + "select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " + + "where " + queryFilterString); + + if (maxParts >= 0) { + //User specified a row limit, set it on the Query + query.setRange(0, maxParts); + } + + LOG.debug("Filter specified is {}, JDOQL filter is {}", filter, + queryFilterString); + + LOG.debug("Parms is {}", params); + + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); + if (ascending) { + query.setOrdering("partitionName ascending"); + } else { + query.setOrdering("partitionName descending"); + } + query.setResult("partitionName"); + + Collection names = (Collection) query.executeWithMap(params); + partNames = new ArrayList<>(names); + + LOG.debug("Done executing query for getPartitionNamesByFilter"); + return partNames; + } + + @Override + public List listPartitionNamesPs(TableName tableName, List partVals, short maxParts) + throws MetaException, NoSuchObjectException { + LOG.debug("Executing listPartitionNamesPs"); + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + Collection names = getPartitionPsQueryResults(catName, dbName, tblName, + partVals, maxParts, "partitionName"); + return new ArrayList<>(names); + } + + @Override + public Partition getPartitionWithAuth(TableName tableName, List partVals, String user_name, + List group_names) throws MetaException, NoSuchObjectException, InvalidObjectException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + MPartition mpart = getMPartition(catName, dbName, tblName, partVals, null); + if (mpart == null) { + throw new NoSuchObjectException("partition values=" + + partVals.toString()); + } + MTable mtbl = mpart.getTable(); + + Partition part = convertToPart(catName, dbName, tblName, mpart, TxnUtils.isAcidTable(mtbl.getParameters()), conf); + if ("TRUE".equalsIgnoreCase(mtbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { + String partName = Warehouse.makePartName(convertToFieldSchemas(mtbl + .getPartitionKeys()), partVals); + PrincipalPrivilegeSet partAuth = baseStore.getPartitionPrivilegeSet(catName, dbName, + tblName, partName, user_name, group_names); + part.setPrivileges(partAuth); + } + return part; + } + + /** + * Makes a JDO query filter string. + * Makes a JDO query filter string for tables or partitions. + * @param dbName Database name. + * @param mtable Table. If null, the query returned is over tables in a database. + * If not null, the query returned is over partitions in a table. + * @param filter The filter from which JDOQL filter will be made. + * @param params Parameters for the filter. Some parameters may be added here. + * @return Resulting filter. + */ + private String makeQueryFilterString(String catName, String dbName, MTable mtable, String filter, + Map params) throws MetaException { + ExpressionTree tree = (filter != null && !filter.isEmpty()) + ? PartFilterExprUtil.parseFilterTree(filter) : ExpressionTree.EMPTY_TREE; + return makeQueryFilterString(catName, dbName, convertToTable(mtable, baseStore.getConf()), tree, params, true); + } + + @Override + public void updateCreationMetadata(TableName tableName, CreationMetadata cm) throws MetaException { + // Update creation metadata + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String name = normalizeIdentifier(tableName.getTable()); + MCreationMetadata newMcm = convertToMCreationMetadata(cm, baseStore); + MCreationMetadata mcm = getCreationMetadata(catName, dbName, name); + mcm.setTables(newMcm.getTables()); + mcm.setMaterializationTime(newMcm.getMaterializationTime()); + mcm.setTxnList(newMcm.getTxnList()); + // commit the changes + cm.setMaterializationTime(newMcm.getMaterializationTime()); + } + + @Override + public List
getAllMaterializedViewObjectsForRewriting(String catName) throws MetaException { + List
allMaterializedViews = new ArrayList<>(); + Query query = null; + catName = normalizeIdentifier(catName); + query = pm.newQuery(MTable.class); + query.setFilter("database.catalogName == catName && tableType == tt && rewriteEnabled == re"); + query.declareParameters("java.lang.String catName, java.lang.String tt, boolean re"); + Collection mTbls = (Collection) query.executeWithArray( + catName, TableType.MATERIALIZED_VIEW.toString(), true); + for (MTable mTbl : mTbls) { + Table tbl = convertToTable(mTbl, conf); + tbl.setCreationMetadata( + convertToCreationMetadata( + getCreationMetadata(tbl.getCatName(), tbl.getDbName(), tbl.getTableName()), baseStore)); + allMaterializedViews.add(tbl); + } + return allMaterializedViews; + } + + @Override + public List isPartOfMaterializedView(TableName tableName) { + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + List mViewList = new ArrayList<>(); + Query query = pm.newQuery("select from org.apache.hadoop.hive.metastore.model.MCreationMetadata"); + List creationMetadata = (List) query.execute(); + Iterator iter = creationMetadata.iterator(); + + while (iter.hasNext()) { + MCreationMetadata p = iter.next(); + Set tables = p.getTables(); + for (MMVSource sourceTable : tables) { + MTable table = sourceTable.getTable(); + if (dbName.equals(table.getDatabase().getName()) && tblName.equals(table.getTableName())) { + LOG.info("Cannot drop table {} as it is being used by MView {}", table.getTableName(), p.getTblName()); + mViewList.add(p.getDbName() + "." + p.getTblName()); + } + } + } + return mViewList; + } + + @Override + public Table markPartitionForEvent(TableName tableName, Map partVals, PartitionEventType evtType) + throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { + LOG.debug("Begin executing markPartitionForEvent"); + Table tbl = getTable(tableName, null, -1); + if(null == tbl) { + throw new UnknownTableException("Table: "+ tableName + " is not found."); + } + + pm.makePersistent(new MPartitionEvent(normalizeIdentifier(tableName.getCat()), + normalizeIdentifier(tableName.getDb()), normalizeIdentifier(tableName.getTable()), + getPartitionStr(tbl, partVals), evtType.getValue())); + LOG.debug("Done executing markPartitionForEvent"); + return tbl; + } + + private String getPartitionStr(Table tbl, Map partVals) throws InvalidPartitionException{ + if(tbl.getPartitionKeysSize() != partVals.size()){ + throw new InvalidPartitionException("Number of partition columns in table: "+ tbl.getPartitionKeysSize() + + " doesn't match with number of supplied partition values: "+ partVals.size()); + } + final List storedVals = new ArrayList<>(tbl.getPartitionKeysSize()); + for(FieldSchema partKey : tbl.getPartitionKeys()){ + String partVal = partVals.get(partKey.getName()); + if(null == partVal) { + throw new InvalidPartitionException("No value found for partition column: "+partKey.getName()); + } + storedVals.add(partVal); + } + return join(storedVals,','); + } + + @Override + public boolean isPartitionMarkedForEvent(TableName tableName, Map partName, + PartitionEventType evtType) + throws MetaException, UnknownTableException, InvalidPartitionException, UnknownPartitionException { + LOG.debug("Begin Executing isPartitionMarkedForEvent"); + Query query = pm.newQuery(MPartitionEvent.class, + "dbName == t1 && tblName == t2 && partName == t3 && eventType == t4 && catalogName == t5"); + query + .declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, int t4," + + "java.lang.String t5"); + Table tbl = getTable(tableName, null, -1); // Make sure dbName and tblName are valid. + if (null == tbl) { + throw new UnknownTableException("Table: " + tableName + " is not found."); + } + Collection partEvents = (Collection) query.executeWithArray( + normalizeIdentifier(tableName.getDb()), normalizeIdentifier(tableName.getTable()), + getPartitionStr(tbl, partName), evtType.getValue(), normalizeIdentifier(tableName.getCat())); + pm.retrieveAll(partEvents); + LOG.debug("Done executing isPartitionMarkedForEvent"); + return partEvents != null && !partEvents.isEmpty(); + } + + @Override + public int getObjectCount(String fieldName, String objName) { + String queryStr = "select count(" + fieldName + ") from " + objName; + Query query = pm.newQuery(queryStr); + Long result = (Long) query.execute(); + return result != null ? result.intValue() : 0; + } + + @Override + public long updateParameterWithExpectedValue(Table table, String key, String expectedValue, String newValue) + throws MetaException, NoSuchObjectException { + return new GetHelper(this, new TableName(table.getCatName(), table.getDbName(), table.getTableName())) { + @Override + protected String describeResult() { + return "Affected rows"; + } + @Override + protected Long getSqlResult() throws MetaException { + return getDirectSql().updateTableParam(table, key, expectedValue, newValue); + } + @Override + protected Long getJdoResult() + throws MetaException, NoSuchObjectException, InvalidObjectException { + throw new UnsupportedOperationException( + "Cannot update parameter with JDO, make sure direct SQL is enabled"); + } + @Override + protected boolean canUseJdoQuery() throws MetaException { + return false; + } + }.run(false); + } + + @Override + public MPartition ensureGetMPartition(TableName tableName, List partVals) throws MetaException { + String catName = normalizeIdentifier(tableName.getCat()); + String dbName = normalizeIdentifier(tableName.getDb()); + String tblName = normalizeIdentifier(tableName.getTable()); + MPartition result = null; + MTable mtbl = getMTable(catName, dbName, tblName); + if (mtbl == null) { + // throw exception? + return null; + } + // Change the query to use part_vals instead of the name which is + // redundant TODO: callers of this often get part_vals out of name for no reason... + String name = + Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), partVals); + result = getMPartition(catName, dbName, tblName, name); + + return result; + } + + /** + * Getting MPartition object. Use this method if the partition name is available, so we do not + * query the table object again. + * @param catName The catalogue + * @param dbName The database + * @param tableName The table + * @param name The partition name + * @return The MPartition object in the backend database + */ + private MPartition getMPartition(String catName, String dbName, String tableName, + String name) throws MetaException { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + MPartition ret = null; + Query query = + pm.newQuery(MPartition.class, + "table.tableName == t1 && table.database.name == t2 && partitionName == t3 " + + " && table.database.catalogName == t4"); + query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3, " + + "java.lang.String t4"); + List mparts = (List) query.executeWithArray(tableName, dbName, name, catName); + pm.retrieveAll(mparts); + // We need to compare partition name with requested name since some DBs + // (like MySQL, Derby) considers 'a' = 'a ' whereas others like (Postgres, + // Oracle) doesn't exhibit this problem. + if (CollectionUtils.isNotEmpty(mparts)) { + if (mparts.size() > 1) { + throw new MetaException( + "Expecting only one partition but more than one partitions are found."); + } else { + MPartition mpart = mparts.get(0); + if (name.equals(mpart.getPartitionName())) { + ret = mpart; + } else { + throw new MetaException("Expecting a partition with name " + name + + ", but metastore is returning a partition with name " + mpart.getPartitionName() + + "."); + } + } + } + return ret; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricLogger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricLogger.java index 4fbfced83284..24fc17a2a222 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricLogger.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricLogger.java @@ -50,8 +50,8 @@ public class AcidMetricLogger implements MetastoreTaskThread { @Override public long runFrequency(TimeUnit timeUnit) { - return MetastoreConf - .getTimeVar(conf, MetastoreConf.ConfVars.COMPACTOR_ACID_METRICS_LOGGER_FREQUENCY, timeUnit); + return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID) ? + MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.COMPACTOR_ACID_METRICS_LOGGER_FREQUENCY, timeUnit) : 0; } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricService.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricService.java index d80f84219eea..23200e502357 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricService.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricService.java @@ -103,7 +103,8 @@ public class AcidMetricService implements MetastoreTaskThread { @Override public long runFrequency(TimeUnit unit) { - return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_CHECK_INTERVAL, unit); + return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID) ? + MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_CHECK_INTERVAL, unit) : 0; } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidHouseKeeperService.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidHouseKeeperService.java index 836b85851e76..84a92e40367c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidHouseKeeperService.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidHouseKeeperService.java @@ -75,7 +75,8 @@ public Configuration getConf() { @Override public long runFrequency(TimeUnit unit) { - return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.ACID_HOUSEKEEPER_SERVICE_INTERVAL, unit); + return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID) ? + MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.ACID_HOUSEKEEPER_SERVICE_INTERVAL, unit) : 0; } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidOpenTxnsCounterService.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidOpenTxnsCounterService.java index 89dbff3f96fb..be39e615cb69 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidOpenTxnsCounterService.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidOpenTxnsCounterService.java @@ -42,7 +42,8 @@ public class AcidOpenTxnsCounterService implements MetastoreTaskThread { @Override public long runFrequency(TimeUnit unit) { - return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.COUNT_OPEN_TXNS_INTERVAL, unit); + return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID) ? + MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.COUNT_OPEN_TXNS_INTERVAL, unit) : 0; } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidTxnCleanerService.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidTxnCleanerService.java index 766ef7b67d8e..227d846ae272 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidTxnCleanerService.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/AcidTxnCleanerService.java @@ -53,7 +53,8 @@ public Configuration getConf() { @Override public long runFrequency(TimeUnit unit) { - return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.ACID_TXN_CLEANER_INTERVAL, unit); + return MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID) ? + MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.ACID_TXN_CLEANER_INTERVAL, unit) : 0; } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/CompactionHouseKeeperService.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/CompactionHouseKeeperService.java index 6eca48283445..653ccc50d6e7 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/CompactionHouseKeeperService.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/service/CompactionHouseKeeperService.java @@ -36,11 +36,14 @@ public CompactionHouseKeeperService() { @Override protected void initTasks(){ - tasks = ImmutableMap., String>builder() - .put(txnHandler::removeDuplicateCompletedTxnComponents, - "Cleaning duplicate COMPLETED_TXN_COMPONENTS entries") - .put(txnHandler::purgeCompactionHistory, "Cleaning obsolete compaction history entries") - .build(); + ImmutableMap.Builder, String> taskBuilder = + ImmutableMap., String>builder() + .put(txnHandler::purgeCompactionHistory, "Cleaning obsolete compaction history entries"); + if (MetastoreConf.getBoolVar(getConf(), MetastoreConf.ConfVars.METASTORE_SUPPORT_ACID)) { + taskBuilder.put(txnHandler::removeDuplicateCompletedTxnComponents, + "Cleaning duplicate COMPLETED_TXN_COMPONENTS entries"); + } + tasks = taskBuilder.build(); } @Override diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/InjectableBehaviourObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/InjectableBehaviourObjectStore.java index f056cc65e762..f05bdd4f7056 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/InjectableBehaviourObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/InjectableBehaviourObjectStore.java @@ -180,13 +180,8 @@ public static void setUpdatePartColStatsBehaviour(com.google.common.base.Functio // ObjectStore methods to be overridden with injected behavior @Override - public Table getTable(String catName, String dbName, String tableName) throws MetaException { - return getTableModifier.apply(super.getTable(catName, dbName, tableName)); - } - - @Override - public Table getTable(String catName, String dbName, String tableName, String writeIdList) throws MetaException { - return getTableModifier.apply(super.getTable(catName, dbName, tableName, writeIdList)); + public Table getTable(String catName, String dbName, String tableName, String writeIdList, long tableId) throws MetaException { + return getTableModifier.apply(super.getTable(catName, dbName, tableName, writeIdList, tableId)); } @Override diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatisticsTestUtils.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatisticsTestUtils.java index a6ece7aafceb..e5e2bb5a5ba3 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatisticsTestUtils.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/StatisticsTestUtils.java @@ -83,6 +83,7 @@ public static ColumnStatistics createColumnStatistics(ColumnStatisticsData data, ColumnStatistics colStats = new ColumnStatistics(); ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, tbl.getDbName(), tbl.getTableName()); statsDesc.setPartName(partName); + statsDesc.setCatName(tbl.getCatName()); colStats.setStatsDesc(statsDesc); colStats.setStatsObj(Collections.singletonList(statObj)); colStats.setEngine(HIVE_ENGINE); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 8ebc1a7ccf9e..b6c579c9d052 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -61,12 +61,10 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.dataconnector.jdbc.AbstractJDBCConnectorProvider; import org.apache.hadoop.hive.metastore.handler.AddPartitionsHandler; -import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.MetastoreVersionInfo; import org.apache.hadoop.hive.metastore.utils.SecurityUtils; -import org.apache.orc.impl.OrcAcidUtils; import org.datanucleus.api.jdo.JDOPersistenceManager; import org.datanucleus.api.jdo.JDOPersistenceManagerFactory; import org.junit.Assert; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestNoAcidSupport.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestNoAcidSupport.java new file mode 100644 index 000000000000..95c0ca7a57f5 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestNoAcidSupport.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; +import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.thrift.TException; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests to verify metastore without acid support. + */ +@Category(MetastoreUnitTest.class) +public class TestNoAcidSupport { + private static Configuration conf; + private static HiveMetaStoreClient client; + private static final String DB_NAME = "TestNoAcidSupport"; + private static final String TABLE_NAME = "t"; + + @BeforeClass + public static void beforeTests() throws Exception { + conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setBoolVar(conf, ConfVars.METASTORE_SUPPORT_ACID, false); + client = new HiveMetaStoreClient(conf); + client.dropDatabase(DB_NAME, true, true, true); + new DatabaseBuilder().setName(DB_NAME).create(client, conf); + } + + @AfterClass + public static void afterTests() throws Exception { + try { + client.dropDatabase(DB_NAME, true, true, true); + client.close(); + } catch (Throwable e) { + System.err.println(StringUtils.stringifyException(e)); + throw e; + } + } + + @After + public void afterTest() throws TException { + client.dropTable(DB_NAME, TABLE_NAME); + } + + @Test + public void testCreateManagedAcidTable() { + Exception exception = Assert.assertThrows(MetaException.class, () -> { + new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME) + .addCol("i", ColumnType.INT_TYPE_NAME) + .setType(TableType.MANAGED_TABLE.name()) + .addTableParam(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true") + .create(client, conf); + }); + Assert.assertTrue(exception.getMessage().contains("ACID tables are not permitted when the " + + ConfVars.METASTORE_SUPPORT_ACID.getHiveName() + " property is set to false")); + } + + @Test + public void testCreateManagedTranslateToExternalTable() throws Exception { + new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME) + .addCol("i", ColumnType.INT_TYPE_NAME) + .setType(TableType.MANAGED_TABLE.name()) + .create(client, conf); + Table t = client.getTable(DB_NAME, TABLE_NAME); + Assert.assertEquals(TableType.EXTERNAL_TABLE.name(), t.getTableType()); + Assert.assertTrue(Boolean.parseBoolean(t.getParameters().get(HiveMetaHook.EXTERNAL))); + Assert.assertTrue(Boolean.parseBoolean(t.getParameters().get(MetaStoreUtils.EXTERNAL_TABLE_PURGE))); + } + + @Test + public void testCreateExternalTable() throws Exception { + new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME) + .addCol("i", ColumnType.INT_TYPE_NAME) + .setType(TableType.EXTERNAL_TABLE.name()) + .create(client, conf); + Table t = client.getTable(DB_NAME, TABLE_NAME); + Assert.assertEquals(TableType.EXTERNAL_TABLE.name(), t.getTableType()); + Assert.assertTrue(Boolean.parseBoolean(t.getParameters().get(HiveMetaHook.EXTERNAL))); + Assert.assertNull(t.getParameters().get(MetaStoreUtils.EXTERNAL_TABLE_PURGE)); + } + + @Test + public void testAlterToManagedAcidTable() throws Exception { + new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME) + .addCol("i", ColumnType.INT_TYPE_NAME) + .setType(TableType.EXTERNAL_TABLE.name()) + .create(client, conf); + Table t = client.getTable(DB_NAME, TABLE_NAME); + Assert.assertEquals(TableType.EXTERNAL_TABLE.name(), t.getTableType()); + t.setTableType(TableType.MANAGED_TABLE.name()); + t.getParameters().put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + Exception exception = Assert.assertThrows(MetaException.class, () -> { + client.alter_table(DB_NAME, TABLE_NAME, t); + }); + Assert.assertTrue(exception.getMessage().contains("ACID tables are not permitted when the " + + ConfVars.METASTORE_SUPPORT_ACID.getHiveName() + " property is set to false")); + } +} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java index afede2f768c0..6328c980bb29 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; import org.apache.hadoop.hive.metastore.api.Catalog; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -81,6 +82,8 @@ import org.apache.hadoop.hive.metastore.model.MNotificationLog; import org.apache.hadoop.hive.metastore.model.MNotificationNextId; import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.metastore.iface.TableStore; +import org.apache.hadoop.hive.metastore.utils.DirectSqlConfigurator; import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils; import org.apache.hadoop.hive.metastore.utils.RetryingExecutor; import org.junit.Assert; @@ -149,6 +152,9 @@ public class TestObjectStore { private static final String USER1 = "testobjectstoreuser1"; private static final String ROLE1 = "testobjectstorerole1"; private static final String ROLE2 = "testobjectstorerole2"; + private static final String SQLI_PART_NAME = "test_part_col=missing') OR 1=1 -- "; + private static final List ALL_PART_NAMES = + Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"); private static final Logger LOG = LoggerFactory.getLogger(TestObjectStore.class.getName()); private static final class LongSupplier implements Supplier { @@ -783,25 +789,46 @@ public void testDirectSQLDropPartitionsCacheInSession() createPartitionedTable(false, false, new HashSet<>()); // query the partitions with JDO List partitions; - try(AutoCloseable c = deadline()) { - partitions = objectStore.getPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, - false, true, new GetPartitionsArgs.GetPartitionsArgsBuilder().max(10).build()); + TableStore tableStore = objectStore.unwrap(TableStore.class); + try(AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(objectStore.getConf(), false)) { + partitions = tableStore.getPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), + new GetPartitionsArgs.GetPartitionsArgsBuilder().max(10).build()); } Assert.assertEquals(3, partitions.size()); // drop partitions with directSql - try(AutoCloseable c = deadline()) { - objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, - Arrays.asList("test_part_col=a0", "test_part_col=a1"), true, false); + try(AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(objectStore.getConf(), true)) { + tableStore.dropPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), + Arrays.asList("test_part_col=a0", "test_part_col=a1")); } - try (AutoCloseable c = deadline()) { + try (AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(objectStore.getConf(), false)) { // query the partitions with JDO, checking the cache is not causing any problem - partitions = objectStore.getPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, false, true, + partitions = tableStore.getPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), new GetPartitionsArgs.GetPartitionsArgsBuilder().max(10).build()); } Assert.assertEquals(1, partitions.size()); } + @Test + public void testDirectSQLDropPartitionsRejectsSqlInjectionInPartName() + throws Exception { + createPartitionedTable(false, false, new HashSet<>()); + + try (DirectSqlConfigurator configurator = new DirectSqlConfigurator(conf, true)) { + objectStore.unwrap(TableStore.class).dropPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), + Collections.singletonList(SQLI_PART_NAME)); + } + + List partitions; + try (AutoCloseable c = deadline()) { + partitions = objectStore.getPartitionsByNames(DEFAULT_CATALOG_NAME, DB1, TABLE1, ALL_PART_NAMES); + } + Assert.assertEquals(3, partitions.size()); + } + /** * Checks if the JDO cache is able to handle directSQL partition drops cross sessions. */ @@ -815,27 +842,34 @@ public void testDirectSQLDropPartitionsCacheCrossSession() GetPartitionsArgs args = new GetPartitionsArgs.GetPartitionsArgsBuilder().max(10).build(); // query the partitions with JDO in the 1st session List partitions; - try (AutoCloseable c = deadline()) { - partitions = objectStore.getPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, false, true, args); + TableStore tableStore2 = objectStore2.unwrap(TableStore.class); + TableStore tableStore1 = objectStore.unwrap(TableStore.class); + TableName targetTable = new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1); + try (AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(conf, false)) { + partitions = tableStore1.getPartitions(targetTable, args); } Assert.assertEquals(3, partitions.size()); // query the partitions with JDO in the 2nd session - try (AutoCloseable c = deadline()) { - partitions = objectStore2.getPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, false, true, args); + try (AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(conf, false)) { + partitions = tableStore2.getPartitions(targetTable, args); } Assert.assertEquals(3, partitions.size()); // drop partitions with directSql in the 1st session - try (AutoCloseable c = deadline()) { - objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, - Arrays.asList("test_part_col=a0", "test_part_col=a1"), true, false); + try (AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(conf, true)) { + tableStore1.dropPartitions(targetTable, + Arrays.asList("test_part_col=a0", "test_part_col=a1")); } // query the partitions with JDO in the 2nd session, checking the cache is not causing any // problem - try (AutoCloseable c = deadline()) { - partitions = objectStore2.getPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, false, true, args); + try (AutoCloseable c = deadline(); + AutoCloseable d = new DirectSqlConfigurator(conf, false)) { + partitions = tableStore2.getPartitions(targetTable, args); } Assert.assertEquals(1, partitions.size()); } @@ -864,9 +898,9 @@ public void testDirectSQLDropPartitionsCleanup() throws Exception { checkBackendTableSize("SERDES", 4); // Table has a serde // drop the partitions - try (AutoCloseable c = deadline()) { - objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, - Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"), true, false); + try (AutoCloseable c = deadline(); AutoCloseable d = new DirectSqlConfigurator(conf, true)) { + objectStore.unwrap(TableStore.class).dropPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), + Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2")); } // Check, if every data is dropped connected to the partitions @@ -907,9 +941,9 @@ public void testDirectSQLCDsCleanup() throws Exception { checkBackendTableSize("CDS", 2); checkBackendTableSize("COLUMNS_V2", 11); // drop the partitions - try (AutoCloseable c = deadline()) { - objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, - Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"), true, false); + try (AutoCloseable c = deadline(); AutoCloseable d = new DirectSqlConfigurator(conf, true)) { + objectStore.unwrap(TableStore.class).dropPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), + Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2")); } // Checks if the data connected to the partitions is dropped checkBackendTableSize("PARTITIONS", 0); @@ -1024,6 +1058,63 @@ public void testDeletePartitionColumnStatisticsWhenEngineHasSpecialCharacter() t List.of("test_part_col=a2"), null, "special '"); } + @Test + public void testGetPartitionsByNamesRejectsSqlInjectionInPartName() throws Exception { + createPartitionedTable(true, true, new HashSet<>()); + List partitions; + try (AutoCloseable c = deadline()) { + partitions = objectStore.getPartitionsByNames(DEFAULT_CATALOG_NAME, DB1, TABLE1, + Collections.singletonList(SQLI_PART_NAME)); + } + Assert.assertEquals(0, partitions.size()); + try (AutoCloseable c = deadline()) { + partitions = objectStore.getPartitionsByNames(DEFAULT_CATALOG_NAME, DB1, TABLE1, ALL_PART_NAMES); + } + Assert.assertEquals(3, partitions.size()); + } + + @Test + public void testUpdatePartitionColumnStatisticsInBatchRejectsSqlInjectionInPartName() + throws Exception { + createPartitionedTable(true, true, new HashSet<>()); + Table tbl = objectStore.getTable(DEFAULT_CATALOG_NAME, DB1, TABLE1); + + List> baseline; + try (AutoCloseable c = deadline()) { + baseline = objectStore.getPartitionColumnStatistics(DEFAULT_CATALOG_NAME, DB1, TABLE1, + ALL_PART_NAMES, Collections.singletonList("test_part_col")); + } + Assert.assertEquals(1, baseline.size()); + Assert.assertEquals(3, baseline.get(0).size()); + long baselineNumNulls = baseline.get(0).get(0).getStatsObj().get(0).getStatsData() + .getLongStats().getNumNulls(); + + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, DB1, TABLE1); + statsDesc.setCatName(DEFAULT_CATALOG_NAME); + statsDesc.setPartName(SQLI_PART_NAME); + ColumnStatisticsData injectedData = new ColStatsBuilder<>(long.class).numNulls(999).numDVs(2) + .low(3L).high(4L).build(); + ColumnStatisticsObj statsObj = new ColumnStatisticsObj("test_part_col", "int", injectedData); + ColumnStatistics maliciousStats = new ColumnStatistics(statsDesc, + Collections.singletonList(statsObj)); + maliciousStats.setEngine(ENGINE); + + Map statsMap = new HashMap<>(); + statsMap.put(SQLI_PART_NAME, maliciousStats); + objectStore.updatePartitionColumnStatisticsInBatch(statsMap, tbl, null, null, -1); + + List> after; + try (AutoCloseable c = deadline()) { + after = objectStore.getPartitionColumnStatistics(DEFAULT_CATALOG_NAME, DB1, TABLE1, + ALL_PART_NAMES, Collections.singletonList("test_part_col")); + } + Assert.assertEquals(3, after.get(0).size()); + for (ColumnStatistics cs : after.get(0)) { + Assert.assertEquals(baselineNumNulls, + cs.getStatsObj().get(0).getStatsData().getLongStats().getNumNulls()); + } + } + private void setAggrConf(boolean enableBitVector, boolean enableKll, int batchSize) { Configuration conf2 = MetastoreConf.newMetastoreConf(conf); MetastoreConf.setBoolVar(conf2, ConfVars.STATS_FETCH_BITVECTOR, enableBitVector); @@ -1050,10 +1141,10 @@ private void assertAggrStats(AggrStats aggrStats, ColumnStatisticsData computedS private void statsAggrResourceCleanup() throws Exception { - try (AutoCloseable c = deadline()) { - objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1, - Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2"), true, true); - objectStore.dropTable(DEFAULT_CATALOG_NAME, DB1, TABLE1); + try (AutoCloseable c = deadline(); AutoCloseable d = new DirectSqlConfigurator(conf, true)) { + objectStore.unwrap(TableStore.class).dropPartitions(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1), + Arrays.asList("test_part_col=a0", "test_part_col=a1", "test_part_col=a2")); + objectStore.unwrap(TableStore.class).dropTable(new TableName(DEFAULT_CATALOG_NAME, DB1, TABLE1)); objectStore.dropDatabase(DEFAULT_CATALOG_NAME, DB1); } } @@ -1332,7 +1423,7 @@ public void testQueryCloseOnError() throws Exception { spy.getAllFunctions(DEFAULT_CATALOG_NAME); spy.getAllTables(DEFAULT_CATALOG_NAME, DB1); spy.getPartitionCount(); - Mockito.verify(spy, Mockito.times(3)) + Mockito.verify(spy, Mockito.times(2)) .rollbackAndCleanup(Mockito.anyBoolean(), ArgumentMatchers.any()); } @@ -1880,10 +1971,10 @@ protected String describeResult() { @Override protected Object getSqlResult(ObjectStore.GetHelper ctx) throws MetaException { // drop the partitions with SQL alone - try (AutoCloseable c = deadline()) { - objectStore.dropPartitionsInternal(ctx.catName, ctx.dbName, ctx.tblName, partNames, true, - false); - Assert.assertEquals(0, objectStore.getPartitionCount()); + try (AutoCloseable c = deadline(); AutoCloseable d = new DirectSqlConfigurator(conf, true)) { + objectStore.unwrap(TableStore.class) + .dropPartitions(new TableName(ctx.catName, ctx.dbName, ctx.tblName), partNames); + assertEquals(0, objectStore.getPartitionCount()); } catch (Exception e) { throw new MetaException(e.getMessage()); } @@ -1893,10 +1984,10 @@ protected Object getSqlResult(ObjectStore.GetHelper ctx) throws MetaExce @Override protected Object getJdoResult(ObjectStore.GetHelper ctx) throws MetaException { // drop the partitions with JDO alone - try (AutoCloseable c = deadline()) { - Assert.assertEquals(3, objectStore.getPartitionCount()); - objectStore.dropPartitionsInternal(ctx.catName, ctx.dbName, ctx.tblName, partNames, false, - true); + try (AutoCloseable c = deadline(); AutoCloseable d = new DirectSqlConfigurator(conf, false)) { + assertEquals(3, objectStore.getPartitionCount()); + objectStore.unwrap(TableStore.class) + .dropPartitions(new TableName(ctx.catName, ctx.dbName, ctx.tblName), partNames); } catch (Exception e) { throw new MetaException(e.getMessage()); } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java index e2fd7bf9cc51..55e86e3a5323 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java @@ -54,6 +54,11 @@ import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.core.test.appender.ListAppender; import org.apache.thrift.TException; import org.junit.After; import org.junit.Assert; @@ -473,7 +478,7 @@ public void testPartitionDiscoveryTablePattern() throws TException, IOException } @Test - public void testPartitionDiscoveryTransactionalTable() + public void testPartitionDiscoveryTransactionalTableConcurrent() throws TException, IOException, InterruptedException, ExecutionException { String dbName = "db6"; String tableName = "tbl6"; @@ -503,47 +508,72 @@ public void testPartitionDiscoveryTransactionalTable() TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); client.alter_table(dbName, tableName, table); - runPartitionManagementTask(conf); - partitions = client.listPartitions(dbName, tableName, (short) -1); - assertEquals(5, partitions.size()); - - // only one partition discovery task is running, there will be no skipped attempts - assertEquals(0, PartitionManagementTask.getSkippedAttempts()); - - // delete a partition from fs, and submit 3 tasks at the same time each of them trying to acquire X lock on the - // same table, only one of them will run other attempts will be skipped - boolean deleted = fs.delete(newPart1.getParent(), true); - assertTrue(deleted); - assertEquals(4, fs.listStatus(tablePath).length); - - // 3 tasks are submitted at the same time, only one will eventually lock the table and only one get to run at a time - // This is to simulate, skipping partition discovery task attempt when previous attempt is still incomplete - PartitionManagementTask partitionDiscoveryTask1 = new PartitionManagementTask(); - partitionDiscoveryTask1.setConf(conf); - PartitionManagementTask partitionDiscoveryTask2 = new PartitionManagementTask(); - partitionDiscoveryTask2.setConf(conf); - PartitionManagementTask partitionDiscoveryTask3 = new PartitionManagementTask(); - partitionDiscoveryTask3.setConf(conf); - List tasks = Lists - .newArrayList(partitionDiscoveryTask1, partitionDiscoveryTask2, partitionDiscoveryTask3); - ExecutorService executorService = Executors.newFixedThreadPool(3); - int successBefore = PartitionManagementTask.getCompletedAttempts(); - int skippedBefore = PartitionManagementTask.getSkippedAttempts(); - List> futures = new ArrayList<>(); - for (PartitionManagementTask task : tasks) { - futures.add(executorService.submit(task)); - } - for (Future future : futures) { - future.get(); + final String appenderName = "testPartitionDiscoveryTransactionalTableConcurrentAppender"; + LoggerContext loggerContext = (LoggerContext) LogManager.getContext(false); + LoggerConfig rootLoggerConfig = loggerContext.getConfiguration().getLoggerConfig(""); + ListAppender skipAppender = new ListAppender(appenderName); + skipAppender.start(); + rootLoggerConfig.addAppender(skipAppender, Level.INFO, null); + try { + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + + // only one partition discovery task is running, there will be no skipped attempts + assertEquals(0, countSkipMessages(skipAppender)); + assertEquals(1, countDiscoveryEntries(skipAppender)); + + // delete a partition from fs, and submit 3 tasks at the same time each of them trying to acquire X lock on the + // same table, only one of them will run other attempts will be skipped + boolean deleted = fs.delete(newPart1.getParent(), true); + assertTrue(deleted); + assertEquals(4, fs.listStatus(tablePath).length); + + // 3 tasks are submitted at the same time, only one will eventually lock the table and only one + // get to run at a time. This is to simulate, skipping partition discovery task attempt when + // previous attempt is still incomplete + PartitionManagementTask partitionDiscoveryTask1 = new PartitionManagementTask(); + partitionDiscoveryTask1.setConf(conf); + PartitionManagementTask partitionDiscoveryTask2 = new PartitionManagementTask(); + partitionDiscoveryTask2.setConf(conf); + PartitionManagementTask partitionDiscoveryTask3 = new PartitionManagementTask(); + partitionDiscoveryTask3.setConf(conf); + List tasks = Lists + .newArrayList(partitionDiscoveryTask1, partitionDiscoveryTask2, partitionDiscoveryTask3); + ExecutorService executorService = Executors.newFixedThreadPool(3); + List> futures = new ArrayList<>(); + for (PartitionManagementTask task : tasks) { + futures.add(executorService.submit(task)); + } + for (Future future : futures) { + future.get(); + } + long skips = countSkipMessages(skipAppender); + long discoveries = countDiscoveryEntries(skipAppender); + assertEquals(4, skips + discoveries); + assertTrue("at least one more task should have entered the work path during the race", discoveries >= 2); + } finally { + rootLoggerConfig.removeAppender(appenderName); + skipAppender.stop(); } - int successAfter = PartitionManagementTask.getCompletedAttempts(); - int skippedAfter = PartitionManagementTask.getSkippedAttempts(); - assertEquals(1, successAfter - successBefore); - assertEquals(2, skippedAfter - skippedBefore); partitions = client.listPartitions(dbName, tableName, (short) -1); assertEquals(4, partitions.size()); } + private static long countSkipMessages(ListAppender appender) { + return appender.getEvents().stream() + .map(e -> e.getMessage().getFormattedMessage()) + .filter(m -> m.equals("Lock is held by some other partition discovery task. Skipping this attempt.")) + .count(); + } + + private static long countDiscoveryEntries(ListAppender appender) { + return appender.getEvents().stream() + .map(e -> e.getMessage().getFormattedMessage()) + .filter(m -> m.equals("Found 1 candidate tables for partition discovery")) + .count(); + } + @Test public void testPartitionRetention() throws TException, IOException, InterruptedException { String dbName = "db7"; diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java index 31bc5635a40d..65e7996b6e02 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java @@ -35,10 +35,13 @@ import org.apache.commons.lang3.ClassUtils; import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.client.builder.GetPartitionsArgs; import org.apache.hadoop.hive.metastore.model.MTable; +import org.apache.hadoop.hive.metastore.metastore.iface.TableStore; +import org.apache.hadoop.hive.metastore.utils.DirectSqlConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -59,52 +62,62 @@ public VerifyingObjectStore() { public List getPartitionsByFilter(String catName, String dbName, String tblName, GetPartitionsArgs args) throws MetaException, NoSuchObjectException { - List sqlResults = getPartitionsByFilterInternal( - catName, dbName, tblName, true, false, args); - List ormResults = getPartitionsByFilterInternal( - catName, dbName, tblName, false, true, args); - verifyLists(sqlResults, ormResults, Partition.class); - return sqlResults; + TableStore tableStore = unwrap(TableStore.class); + try (DirectSqlConfigurator configurator = new DirectSqlConfigurator(conf, false)) { + List ormResults = tableStore.getPartitionsByFilter(new TableName(catName, dbName, tblName), args); + configurator.tryDirectSql(true); + List sqlResults = tableStore.getPartitionsByFilter(new TableName(catName, dbName, tblName), args); + verifyLists(sqlResults, ormResults, Partition.class); + return sqlResults; + } } @Override public List getPartitionsByNames(String catName, String dbName, String tblName, List partNames) throws MetaException, NoSuchObjectException { GetPartitionsArgs args = new GetPartitionsArgs.GetPartitionsArgsBuilder().partNames(partNames).build(); - List sqlResults = getPartitionsByNamesInternal( - catName, dbName, tblName, true, false, args); - List ormResults = getPartitionsByNamesInternal( - catName, dbName, tblName, false, true, args); - verifyLists(sqlResults, ormResults, Partition.class); - return sqlResults; + TableStore tableStore = unwrap(TableStore.class); + try (DirectSqlConfigurator configurator = new DirectSqlConfigurator(conf, false)) { + List ormResults = tableStore.getPartitionsByNames(new TableName(catName, dbName, tblName), args); + configurator.tryDirectSql(true); + List sqlResults = tableStore.getPartitionsByNames(new TableName(catName, dbName, tblName), args); + verifyLists(sqlResults, ormResults, Partition.class); + return sqlResults; + } } @Override public boolean getPartitionsByExpr(String catName, String dbName, String tblName, List result, GetPartitionsArgs args) throws TException { List ormParts = new LinkedList<>(); - boolean sqlResult = getPartitionsByExprInternal( - catName, dbName, tblName, result, true, false, args); - boolean ormResult = getPartitionsByExprInternal( - catName, dbName, tblName, ormParts, false, true, args); - if (sqlResult != ormResult) { - String msg = "The unknown flag is different - SQL " + sqlResult + ", ORM " + ormResult; - LOG.error(msg); - throw new MetaException(msg); + TableStore tableStore = unwrap(TableStore.class); + try (DirectSqlConfigurator configurator = new DirectSqlConfigurator(conf, false)) { + boolean ormResult = tableStore.getPartitionsByExpr(new TableName(catName, dbName, tblName), ormParts, args); + configurator.tryDirectSql(true); + boolean sqlResult = tableStore.getPartitionsByExpr(new TableName(catName, dbName, tblName), result, args); + if (sqlResult != ormResult) { + String msg = "The unknown flag is different - SQL " + sqlResult + ", ORM " + ormResult; + LOG.error(msg); + throw new MetaException(msg); + } + verifyLists(result, ormParts, Partition.class); + return sqlResult; } - verifyLists(result, ormParts, Partition.class); - return sqlResult; } @Override public List getPartitions( String catName, String dbName, String tableName, GetPartitionsArgs args) throws MetaException, NoSuchObjectException { openTransaction(); - List sqlResults = getPartitionsInternal(catName, dbName, tableName, true, false, args); - List ormResults = getPartitionsInternal(catName, dbName, tableName, false, true, args); - verifyLists(sqlResults, ormResults, Partition.class); - commitTransaction(); - return sqlResults; + TableStore tableStore = unwrap(TableStore.class); + try (DirectSqlConfigurator configurator = new DirectSqlConfigurator(conf, false)) { + List ormResults = tableStore.getPartitions(new TableName(catName, dbName, tableName), args); + configurator.tryDirectSql(true); + List sqlResults = tableStore.getPartitions(new TableName(catName, dbName, tableName), args); + verifyLists(sqlResults, ormResults, Partition.class); + commitTransaction(); + return sqlResults; + } } @Override @@ -142,15 +155,22 @@ public List alterPartitions(String catName, String dbName, String tbl // could be different from that in the datastore. // We cannot verify the partitions by getPartitionsByNames now. GetPartitionsArgs args = new GetPartitionsArgs.GetPartitionsArgsBuilder().partNames(partNames).build(); - List oldParts = getPartitionsByNamesInternal( - catName, dbName, tblName, true, true, args); + List oldParts = unwrap(TableStore.class).getPartitionsByNames(new TableName(catName, dbName, tblName), args); if (oldParts.size() != partNames.size()) { throw new MetaException("Some partitions to be altered are missing"); } List tmpNewParts = new ArrayList<>(newParts); - alterPartitionsInternal(table, partNames, newParts, queryWriteIdList, true, false); - alterPartitionsInternal(table, partNames, oldParts, queryWriteIdList, false, true); - results = alterPartitionsInternal(table, partNames, tmpNewParts, queryWriteIdList, true, false); + TableStore tableStore = unwrap(TableStore.class); + try (DirectSqlConfigurator configurator = new DirectSqlConfigurator(conf, true)) { + tableStore.alterPartitions(new TableName(catName, dbName, tblName), part_vals, newParts, writeId, + queryWriteIdList); + configurator.tryDirectSql(false); + tableStore.alterPartitions(new TableName(catName, dbName, tblName), part_vals, oldParts, writeId, + queryWriteIdList); + configurator.tryDirectSql(true); + tableStore.alterPartitions(new TableName(catName, dbName, tblName), part_vals, tmpNewParts, writeId, + queryWriteIdList); + } // commit the changes success = commitTransaction(); } catch (Exception exception) { diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java index 44e00bc69a6a..dd2c2ce507f2 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/client/TestAlterPartitions.java @@ -1157,7 +1157,7 @@ public void testRenamePartitionNullTblName() throws Exception { } } - @Test(expected = MetaException.class) + @Test(expected = InvalidOperationException.class) public void testRenamePartitionChangeTblName() throws Exception { List> oldValues = createTable4PartColsParts(client); List oldParts = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1); @@ -1168,7 +1168,7 @@ public void testRenamePartitionChangeTblName() throws Exception { client.renamePartition(DB_NAME, TABLE_NAME, oldValues.get(3), partToRename); } - @Test(expected = MetaException.class) + @Test(expected = InvalidOperationException.class) public void testRenamePartitionChangeDbName() throws Exception { List> oldValues = createTable4PartColsParts(client); List oldParts = client.listPartitions(DB_NAME, TABLE_NAME, (short)-1); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java index b0a692d82ca4..68d539fbb2b7 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java @@ -31,7 +31,7 @@ */ public class PostgresTPCDS extends Postgres { public PostgresTPCDS() { - super(DockerImageName.parse("zabetak/postgres-tpcds-metastore:1.3").asCompatibleSubstituteFor("postgres")); + super(DockerImageName.parse("apache/hive-postgres-tpcds-metastore:1.4").asCompatibleSubstituteFor("postgres")); container.withUsername("postgres"); } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/DirectSqlConfigurator.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/DirectSqlConfigurator.java new file mode 100644 index 000000000000..ae154d7626d2 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/DirectSqlConfigurator.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.utils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; + +import static org.apache.hadoop.hive.metastore.metastore.GetHelper.getDirectSqlErrors; + +public class DirectSqlConfigurator implements AutoCloseable { + private final Configuration conf; + private final boolean origAllowSql; + private final long directSqlErrors; + + public DirectSqlConfigurator(Configuration configuration, boolean tryDirectSql) { + this.conf = configuration; + this.origAllowSql = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.TRY_DIRECT_SQL); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.TRY_DIRECT_SQL, tryDirectSql); + directSqlErrors = getDirectSqlErrors(); + } + + public void tryDirectSql(boolean tryDirectSql) { + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.TRY_DIRECT_SQL, tryDirectSql); + } + + @Override + public void close() throws MetaException { + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.TRY_DIRECT_SQL, origAllowSql); + if (directSqlErrors != getDirectSqlErrors()) { + throw new MetaException("An unexpected direct sql error raised behind," + + " please check the log to see the details"); + } + } +} diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java index 0f5d7b915168..66ef19e1d35f 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/CacheTag.java @@ -30,17 +30,19 @@ /** * Used for identifying the related object of the buffer stored in cache. * Comes in 3 flavours to optimize for minimal memory overhead: - * - TableCacheTag for tables without partitions: DB/table level - * - SinglePartitionCacheTag for tables with 1 partition level: DB/table/1st_partition + * - TableCacheTag for tables without partitions: catalog.DB.table level + * - SinglePartitionCacheTag for tables with 1 partition level: catalog.DB.table/1st_partition * - MultiPartitionCacheTag for tables with > 1 partition levels: - * DB/table/1st_partition/.../nth_partition . + * catalog.DB.table/1st_partition/.../nth_partition . */ public abstract class CacheTag implements Comparable { private static final String ENCODING = "UTF-8"; /** - * Prepended by DB name and '.' . + * Catalog-qualified, DB-qualified table name. Stored as {@code catalog.db.table}, e.g. + * {@code hive.salesdb.orders}. For DB-level parent tags produced by + * {@link #createParentCacheTag} this is just {@code catalog.db}. */ protected final String tableName; @@ -48,6 +50,9 @@ private CacheTag(String tableName) { this.tableName = tableName.intern(); } + /** + * Returns the full catalog-qualified, DB-qualified name, i.e. {@code catalog.db.table}. + */ public String getTableName() { return tableName; } @@ -71,8 +76,7 @@ public boolean equals(Object obj) { @Override public int hashCode() { - int res = tableName.hashCode(); - return res; + return tableName.hashCode(); } public static final CacheTag build(String tableName) { @@ -82,8 +86,16 @@ public static final CacheTag build(String tableName) { return new TableCacheTag(tableName); } - public static final CacheTag build(String tableName, LinkedHashMap partDescMap) { - if (StringUtils.isEmpty(tableName) || partDescMap == null || partDescMap.isEmpty()) { + public static final CacheTag build(String catalogName, String dbAndTableName) { + return build(catalogName + "." + dbAndTableName); + } + + public static final CacheTag build(String catalogName, String dbAndTableName, LinkedHashMap partDescMap) { + return build(catalogName + "." + dbAndTableName, partDescMap); + } + + public static final CacheTag build(String fullTableName, LinkedHashMap partDescMap) { + if (StringUtils.isEmpty(fullTableName) || partDescMap == null || partDescMap.isEmpty()) { throw new IllegalArgumentException(); } @@ -95,10 +107,10 @@ public static final CacheTag build(String tableName, LinkedHashMap1 - return new MultiPartitionCacheTag(tableName, partDescs); + return new MultiPartitionCacheTag(fullTableName, partDescs); } } @@ -118,7 +130,10 @@ public static final CacheTag build(String tableName, List partDescs) { /** * Constructs a (fake) parent CacheTag instance by walking back in the hierarchy i.e. stepping * from inner to outer partition levels, then producing a CacheTag for the table and finally - * the DB. + * the DB. The catalog prefix is preserved throughout the walk. + * + *

The walk terminates at the DB level: a tag whose {@code tableName} contains exactly one + * dot (i.e. {@code catalog.db}) has no parent, so {@code null} is returned. */ public static final CacheTag createParentCacheTag(CacheTag tag) { if (tag == null) { @@ -134,20 +149,18 @@ public static final CacheTag createParentCacheTag(CacheTag tag) { } return new MultiPartitionCacheTag(multiPartitionCacheTag.tableName, subList); } else { - return new SinglePartitionCacheTag(multiPartitionCacheTag.tableName, - multiPartitionCacheTag.partitionDesc[0]); + return new SinglePartitionCacheTag( + multiPartitionCacheTag.tableName, multiPartitionCacheTag.partitionDesc[0]); } } if (tag instanceof SinglePartitionCacheTag) { return new TableCacheTag(tag.tableName); } else { - // DB level - int ix = tag.tableName.indexOf("."); - if (ix <= 0) { + if (tag.tableName.split("\\.", 3).length < 3) { return null; } - return new TableCacheTag(tag.tableName.substring(0, ix)); + return new TableCacheTag(tag.tableName.substring(0, tag.tableName.lastIndexOf('.'))); } } @@ -381,4 +394,3 @@ private static String[] decodePartDesc(String partDesc) { } } - diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java index 3753d337d8b6..2362c06b8744 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java @@ -63,13 +63,26 @@ public void fill(HiveDecimal value) { */ public void set(int elementNum, HiveDecimalWritable writable) { scratchHiveDecWritable.set(writable); - scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); - if (!scratchHiveDecWritable.isSet()) { - noNulls = false; - isNull[elementNum] = true; - } else { - vector[elementNum] = scratchHiveDecWritable.serialize64(scale); - } + enforceScaleAndSet(elementNum); + } + + /** + * Set a Decimal64 field straight from the unscaled big-integer {@code bigIntegerBytes} (encoded at + * {@code valueScale}), without materializing a HiveDecimal per row. + * + * {@code valueScale} may differ from this vector's {@link #scale} under schema evolution (e.g. + * reading a DECIMAL(38,37) file column as DECIMAL(16,8)); the stored long is always at this + * vector's scale, where the value fits the 64-bit backing store and downstream consumers read it. + * + * FAST version: assumes elementNum is already adjusted for isRepeating and the isNull entry is set. + * + * @param elementNum + * @param bigIntegerBytes + * @param valueScale + */ + public void set(int elementNum, byte[] bigIntegerBytes, int valueScale) { + scratchHiveDecWritable.set(bigIntegerBytes, valueScale); + enforceScaleAndSet(elementNum); } /** @@ -87,6 +100,14 @@ public void set(int elementNum, HiveDecimalWritable writable) { */ public void set(int elementNum, HiveDecimal hiveDec) { scratchHiveDecWritable.set(hiveDec); + enforceScaleAndSet(elementNum); + } + + /** + * Apply this vector's precision/scale to the value in scratchHiveDecWritable and store it as a + * Decimal64 long, marking the entry NULL if out of range. + */ + private void enforceScaleAndSet(int elementNum) { scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale); if (!scratchHiveDecWritable.isSet()) { noNulls = false;