diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index d13b59968aff5..8ee37434861e2 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -65,11 +65,6 @@ provided - - com.fasterxml.jackson.core - jackson-core - - com.fasterxml.jackson.core jackson-databind diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 121eb0d5ff17b..f148757da5522 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -33,8 +33,7 @@ public class ClpConfig private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; - private String splitFilterConfig; - private SplitFilterProviderType splitFilterProviderType = SplitFilterProviderType.MYSQL; + private String splitMetadataConfigPath; private SplitProviderType splitProviderType = SplitProviderType.MYSQL; public boolean isPolymorphicTypeEnabled() @@ -151,27 +150,15 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) return this; } - public String getSplitFilterConfig() + public String getSplitMetadataConfigPath() { - return splitFilterConfig; + return splitMetadataConfigPath; } - @Config("clp.split-filter-config") - public ClpConfig setSplitFilterConfig(String splitFilterConfig) + @Config("clp.split-metadata-config-path") + public ClpConfig setSplitMetadataConfigPath(String splitMetadataConfigPath) { - this.splitFilterConfig = splitFilterConfig; - return this; - } - - public SplitFilterProviderType getSplitFilterProviderType() - { - return splitFilterProviderType; - } - - @Config("clp.split-filter-provider-type") - public ClpConfig setSplitFilterProviderType(SplitFilterProviderType splitFilterProviderType) - { - this.splitFilterProviderType = splitFilterProviderType; + this.splitMetadataConfigPath = splitMetadataConfigPath; return this; } @@ -192,11 +179,6 @@ public enum MetadataProviderType MYSQL } - public enum SplitFilterProviderType - { - MYSQL - } - public enum SplitProviderType { MYSQL diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java index 3b81d6bb2062f..c896dec96b862 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java @@ -16,7 +16,7 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; import com.facebook.presto.plugin.clp.optimization.ClpPlanOptimizerProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; @@ -42,7 +42,7 @@ public class ClpConnector private final ClpSplitManager splitManager; private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpSplitFilterProvider splitFilterProvider; + private final ClpSplitMetadataConfig clpSplitMetadataConfig; @Inject public ClpConnector( @@ -52,7 +52,7 @@ public ClpConnector( ClpSplitManager splitManager, FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, - ClpSplitFilterProvider splitFilterProvider) + ClpSplitMetadataConfig clpSplitMetadataConfig) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); @@ -60,13 +60,13 @@ public ClpConnector( this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.splitFilterProvider = requireNonNull(splitFilterProvider, "splitFilterProvider is null"); + this.clpSplitMetadataConfig = requireNonNull(clpSplitMetadataConfig); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(functionManager, functionResolution, splitFilterProvider); + return new ClpPlanOptimizerProvider(functionManager, functionResolution, clpSplitMetadataConfig); } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java index b3b802e058f58..3ef4451437cf1 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java @@ -54,6 +54,7 @@ public Connector create(String catalogName, Map config, Connecto try { Bootstrap app = new Bootstrap(new JsonModule(), new ClpModule(), binder -> { binder.bind(FunctionMetadataManager.class).toInstance(context.getFunctionMetadataManager()); + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); binder.bind(StandardFunctionResolution.class).toInstance(context.getStandardFunctionResolution()); binder.bind(TypeManager.class).toInstance(context.getTypeManager()); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java index 2530c013455cc..e2a68349be6e6 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java @@ -29,9 +29,8 @@ public enum ClpErrorCode CLP_UNSUPPORTED_TYPE(3, EXTERNAL), CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL), - CLP_SPLIT_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR), - CLP_MANDATORY_SPLIT_FILTER_NOT_VALID(11, USER_ERROR), - CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE(12, EXTERNAL); + CLP_SPLIT_METADATA_CONFIG_NOT_FOUND(10, USER_ERROR), + CLP_MANDATORY_COLUMN_NOT_IN_FILTER(11, USER_ERROR); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java index bf801d0d87242..ceb613d8f83e0 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java @@ -17,19 +17,16 @@ import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.plugin.clp.split.ClpSplitProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.PrestoException; import com.google.inject.Binder; import com.google.inject.Scopes; import static com.facebook.airlift.configuration.ConfigBinder.configBinder; import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType; -import static com.facebook.presto.plugin.clp.ClpConfig.SplitFilterProviderType; import static com.facebook.presto.plugin.clp.ClpConfig.SplitProviderType; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE; public class ClpModule @@ -42,17 +39,11 @@ protected void setup(Binder binder) binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); + binder.bind(ClpSplitMetadataConfig.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); ClpConfig config = buildConfigObject(ClpConfig.class); - if (SplitFilterProviderType.MYSQL == config.getSplitFilterProviderType()) { - binder.bind(ClpSplitFilterProvider.class).to(ClpMySqlSplitFilterProvider.class).in(Scopes.SINGLETON); - } - else { - throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE, "Unsupported split filter provider type: " + config.getSplitFilterProviderType()); - } - if (config.getMetadataProviderType() == MetadataProviderType.MYSQL) { binder.bind(ClpMetadataProvider.class).to(ClpMySqlMetadataProvider.class).in(Scopes.SINGLETON); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java index b82932f0c30fd..40950a3d00ca0 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java @@ -14,6 +14,7 @@ package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.relation.RowExpression; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -27,14 +28,17 @@ public class ClpTableLayoutHandle { private final ClpTableHandle table; private final Optional kqlQuery; - private final Optional metadataSql; + private final Optional metadataExpression; @JsonCreator - public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery, @JsonProperty("metadataFilterQuery") Optional metadataSql) + public ClpTableLayoutHandle( + @JsonProperty("table") ClpTableHandle table, + @JsonProperty("kqlQuery") Optional kqlQuery, + @JsonProperty("metadataExpression") Optional metadataSqlExpression) { this.table = table; this.kqlQuery = kqlQuery; - this.metadataSql = metadataSql; + this.metadataExpression = metadataSqlExpression; } @JsonProperty @@ -50,9 +54,9 @@ public Optional getKqlQuery() } @JsonProperty - public Optional getMetadataSql() + public Optional getMetadataExpression() { - return metadataSql; + return metadataExpression; } @Override @@ -67,13 +71,13 @@ public boolean equals(Object o) ClpTableLayoutHandle that = (ClpTableLayoutHandle) o; return Objects.equals(table, that.table) && Objects.equals(kqlQuery, that.kqlQuery) && - Objects.equals(metadataSql, that.metadataSql); + Objects.equals(metadataExpression, that.metadataExpression); } @Override public int hashCode() { - return Objects.hash(table, kqlQuery, metadataSql); + return Objects.hash(table, kqlQuery, metadataExpression); } @Override @@ -82,7 +86,7 @@ public String toString() return toStringHelper(this) .add("table", table) .add("kqlQuery", kqlQuery) - .add("metadataSql", metadataSql) + .add("metadataExpression", metadataExpression) .toString(); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java index a52a24a0a15b4..fdb722b89298d 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java @@ -17,6 +17,7 @@ import com.facebook.presto.plugin.clp.ClpColumnHandle; import com.facebook.presto.plugin.clp.ClpConfig; import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableList; @@ -28,8 +29,10 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; +import java.util.ArrayList; import java.util.List; +import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; public class ClpMySqlMetadataProvider @@ -56,9 +59,10 @@ public class ClpMySqlMetadataProvider private static final Logger log = Logger.get(ClpMySqlMetadataProvider.class); private final ClpConfig config; + private final ClpSplitMetadataConfig splitMetadataConfig; @Inject - public ClpMySqlMetadataProvider(ClpConfig config) + public ClpMySqlMetadataProvider(ClpConfig config, ClpSplitMetadataConfig splitMetadataConfig) { try { Class.forName("com.mysql.cj.jdbc.Driver"); @@ -68,6 +72,7 @@ public ClpMySqlMetadataProvider(ClpConfig config) throw new RuntimeException("MySQL JDBC driver not found", e); } this.config = config; + this.splitMetadataConfig = splitMetadataConfig; } @Override @@ -87,7 +92,16 @@ public List listColumnHandles(SchemaTableName schemaTableName) catch (SQLException e) { log.warn("Failed to load table schema for %s: %s", schemaTableName.getTableName(), e); } - return schemaTree.collectColumnHandles(); + + List metadataColumns = + splitMetadataConfig.getMetadataColumns(schemaTableName).entrySet().stream() + .map(entry -> new ClpColumnHandle(entry.getKey(), entry.getValue())) + .collect(toImmutableList()); + List dataColumns = schemaTree.collectColumnHandles(); + List allColumns = new ArrayList<>(dataColumns); + allColumns.addAll(metadataColumns); + + return allColumns; } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 2c216614af10f..47eb74d46732c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -14,14 +14,14 @@ package com.facebook.presto.plugin.clp.optimization; import com.facebook.airlift.log.Logger; -import com.facebook.presto.plugin.clp.ClpExpression; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.TableHandle; import com.facebook.presto.spi.VariableAllocator; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -32,16 +32,11 @@ import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; -import com.google.common.collect.ImmutableSet; -import java.util.HashSet; import java.util.Map; import java.util.Optional; -import java.util.Set; -import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; -import static java.lang.String.format; import static java.util.Objects.requireNonNull; public class ClpComputePushDown @@ -50,49 +45,33 @@ public class ClpComputePushDown private static final Logger log = Logger.get(ClpComputePushDown.class); private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpSplitFilterProvider splitFilterProvider; + private final ClpSplitMetadataConfig metadataConfig; - public ClpComputePushDown(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) + public ClpComputePushDown( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig metadataConfig) { this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.splitFilterProvider = requireNonNull(splitFilterProvider, "splitFilterProvider is null"); + this.metadataConfig = requireNonNull(metadataConfig, "metadataConfig is null"); } @Override public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator) { Rewriter rewriter = new Rewriter(idAllocator); - PlanNode optimizedPlanNode = rewriteWith(rewriter, maxSubplan); - - // Throw exception if any required split filters are missing - if (!rewriter.tableScopeSet.isEmpty() && !rewriter.hasVisitedFilter) { - splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, ""); - } - return optimizedPlanNode; + return rewriteWith(rewriter, maxSubplan); } private class Rewriter extends ConnectorPlanRewriter { private final PlanNodeIdAllocator idAllocator; - private final Set tableScopeSet; - private boolean hasVisitedFilter; public Rewriter(PlanNodeIdAllocator idAllocator) { this.idAllocator = idAllocator; - hasVisitedFilter = false; - tableScopeSet = new HashSet<>(); - } - - @Override - public PlanNode visitTableScan(TableScanNode node, RewriteContext context) - { - TableHandle tableHandle = node.getTable(); - ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - tableScopeSet.add(format("%s.%s", CONNECTOR_NAME, clpTableHandle.getSchemaTableName())); - return super.visitTableScan(node, context); } @Override @@ -107,40 +86,27 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNode) { - hasVisitedFilter = true; - TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString(); Map assignments = tableScanNode.getAssignments(); - + SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); ClpExpression clpExpression = filterNode.getPredicate().accept( new ClpFilterToKqlConverter( functionResolution, functionManager, assignments, - splitFilterProvider.getColumnNames(tableScope)), + metadataConfig.getMetadataColumns(schemaTableName).keySet(), + metadataConfig.getDataColumnsWithRangeBounds(schemaTableName)), null); Optional kqlQuery = clpExpression.getPushDownExpression(); - Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery(); + Optional metadataExpression = clpExpression.getMetadataExpression(); Optional remainingPredicate = clpExpression.getRemainingExpression(); - // Perform required metadata filter checks before handling the KQL query (if kqlQuery - // isn't present, we'll return early, skipping subsequent checks). - splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse("")); - boolean hasMetadataFilter = metadataSqlQuery.isPresent() && !metadataSqlQuery.get().isEmpty(); - if (hasMetadataFilter) { - metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get())); - log.debug("Metadata SQL query: %s", metadataSqlQuery.get()); - } - - if (kqlQuery.isPresent() || hasMetadataFilter) { - if (kqlQuery.isPresent()) { - log.debug("KQL query: %s", kqlQuery.get()); - } + if (kqlQuery.isPresent() || metadataExpression.isPresent()) { + kqlQuery.ifPresent(s -> log.debug("KQL query: %s", s)); - ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery); + ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataExpression); TableHandle newTableHandle = new TableHandle( tableHandle.getConnectorId(), clpTableHandle, diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java similarity index 79% rename from presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java index e970f9848a9cf..547f602071f2c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.plugin.clp; +package com.facebook.presto.plugin.clp.optimization; import com.facebook.presto.spi.relation.RowExpression; @@ -31,17 +31,17 @@ public class ClpExpression // Optional KQL query or column name representing the fully or partially translatable part of the expression. private final Optional pushDownExpression; - // Optional SQL string extracted from the query plan, which is only made of up of columns in + // Optional SQL expression extracted from the query plan, which is only made of up of columns in // CLP's metadata database. - private final Optional metadataSqlQuery; + private final Optional metadataExpression; // The remaining (non-translatable) portion of the RowExpression, if any. private final Optional remainingExpression; - public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpression remainingExpression) + public ClpExpression(String pushDownExpression, RowExpression metadataExpression, RowExpression remainingExpression) { this.pushDownExpression = Optional.ofNullable(pushDownExpression); - this.metadataSqlQuery = Optional.ofNullable(metadataSqlQuery); + this.metadataExpression = Optional.ofNullable(metadataExpression); this.remainingExpression = Optional.ofNullable(remainingExpression); } @@ -68,11 +68,11 @@ public ClpExpression(String pushDownExpression) * metadata SQL string. * * @param pushDownExpression - * @param metadataSqlQuery + * @param metadataExpression */ - public ClpExpression(String pushDownExpression, String metadataSqlQuery) + public ClpExpression(String pushDownExpression, RowExpression metadataExpression) { - this(pushDownExpression, metadataSqlQuery, null); + this(pushDownExpression, metadataExpression, null); } /** @@ -90,9 +90,9 @@ public Optional getPushDownExpression() return pushDownExpression; } - public Optional getMetadataSqlQuery() + public Optional getMetadataExpression() { - return metadataSqlQuery; + return metadataExpression; } public Optional getRemainingExpression() diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index b27a61ef0d65a..b1c93946889d8 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -15,11 +15,11 @@ import com.facebook.presto.common.function.OperatorType; import com.facebook.presto.common.type.DecimalType; +import com.facebook.presto.common.type.Decimals; import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.plugin.clp.ClpColumnHandle; -import com.facebook.presto.plugin.clp.ClpExpression; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.FunctionHandle; @@ -32,9 +32,12 @@ import com.facebook.presto.spi.relation.RowExpressionVisitor; import com.facebook.presto.spi.relation.SpecialFormExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; +import java.math.BigDecimal; +import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -62,6 +65,7 @@ import static com.facebook.presto.common.type.TinyintType.TINYINT; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; +import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.OR; import static java.lang.Integer.parseInt; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -109,17 +113,20 @@ public class ClpFilterToKqlConverter private final FunctionMetadataManager functionMetadataManager; private final Map assignments; private final Set metadataFilterColumns; + private final Set dataColumnsWithRangeBounds; public ClpFilterToKqlConverter( StandardFunctionResolution standardFunctionResolution, FunctionMetadataManager functionMetadataManager, Map assignments, - Set metadataFilterColumns) + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) { this.standardFunctionResolution = requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); this.assignments = requireNonNull(assignments, "assignments is null"); this.metadataFilterColumns = requireNonNull(metadataFilterColumns, "metadataFilterColumns is null"); + this.dataColumnsWithRangeBounds = requireNonNull(dataColumnsWithRangeBounds, "dataColumnsWithRangeBounds is null"); } @Override @@ -200,10 +207,25 @@ public ClpExpression visitExpression(RowExpression node, Void context) */ private String getLiteralString(ConstantExpression literal) { - if (literal.getValue() instanceof Slice) { - return ((Slice) literal.getValue()).toStringUtf8(); + Object value = literal.getValue(); + Type type = literal.getType(); + if (value instanceof Slice) { + if (type instanceof DecimalType) { + DecimalType decimalType = (DecimalType) type; + BigInteger unscaled = Decimals.decodeUnscaledValue((Slice) value); + BigDecimal decimalValue = new BigDecimal(unscaled, decimalType.getScale()); + return decimalValue.toPlainString(); + } + return ((Slice) value).toStringUtf8(); + } + + if (type instanceof DecimalType && value instanceof Long) { + DecimalType decimalType = (DecimalType) type; + BigDecimal decimalValue = new BigDecimal(BigInteger.valueOf((Long) value), decimalType.getScale()); + return decimalValue.toPlainString(); } - return literal.toString(); + + return value.toString(); } /** @@ -241,30 +263,81 @@ private ClpExpression handleBetween(CallExpression node) "BETWEEN operator must have exactly three arguments. Received: " + node); } - RowExpression first = arguments.get(0); - RowExpression second = arguments.get(1); - RowExpression third = arguments.get(2); - if (!isClpCompatibleNumericType(first.getType()) - || !isClpCompatibleNumericType(second.getType()) - || !isClpCompatibleNumericType(third.getType())) { + RowExpression lhs = arguments.get(0); + RowExpression lower = arguments.get(1); + RowExpression upper = arguments.get(2); + + Optional variableOpt = lhs.accept(this, null).getPushDownExpression(); + if (!variableOpt.isPresent()) { return new ClpExpression(node); } - Optional variableOpt = first.accept(this, null).getPushDownExpression(); - if (!variableOpt.isPresent() - || !(second instanceof ConstantExpression) - || !(third instanceof ConstantExpression)) { + String variable = variableOpt.get(); + boolean isMetadataColumn = metadataFilterColumns.contains(variable); + + // Type validation + boolean numericCompatible = + isClpCompatibleNumericType(lhs.getType()) + && isClpCompatibleNumericType(lower.getType()) + && isClpCompatibleNumericType(upper.getType()); + + if (!numericCompatible) { + if (isMetadataColumn) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata BETWEEN requires numeric-compatible types. Received: " + node); + } + // Non-metadata columns just fallback (cannot push down) return new ClpExpression(node); } - String variable = variableOpt.get(); - String lowerBound = getLiteralString((ConstantExpression) second); - String upperBound = getLiteralString((ConstantExpression) third); - String kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); - String metadataSqlQuery = metadataFilterColumns.contains(variable) - ? String.format("\"%s\" >= %s AND \"%s\" <= %s", variable, lowerBound, variable, upperBound) - : null; - return new ClpExpression(kql, metadataSqlQuery); + // Metadata columns must have constant bounds + if (isMetadataColumn && + (!(lower instanceof ConstantExpression) || !(upper instanceof ConstantExpression))) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata BETWEEN requires constant bounds. Received: " + node); + } + + // Pushdown only if both bounds are constants + if (!(lower instanceof ConstantExpression) || !(upper instanceof ConstantExpression)) { + return new ClpExpression(node); + } + + RowExpression metadataExpr = null; + if (isMetadataColumn || dataColumnsWithRangeBounds.contains(variable)) { + VariableReferenceExpression varExpr = + new VariableReferenceExpression(lhs.getSourceLocation(), variable, lhs.getType()); + ConstantExpression lowerConst = (ConstantExpression) lower; + ConstantExpression upperConst = (ConstantExpression) upper; + + // (var >= lower) AND (var <= upper) + metadataExpr = new SpecialFormExpression( + AND, + BOOLEAN, + ImmutableList.of( + new CallExpression( + GREATER_THAN_OR_EQUAL.name(), + standardFunctionResolution.comparisonFunction( + GREATER_THAN_OR_EQUAL, varExpr.getType(), lowerConst.getType()), + BOOLEAN, + ImmutableList.of(varExpr, lowerConst)), + new CallExpression( + LESS_THAN_OR_EQUAL.name(), + standardFunctionResolution.comparisonFunction( + LESS_THAN_OR_EQUAL, varExpr.getType(), upperConst.getType()), + BOOLEAN, + ImmutableList.of(varExpr, upperConst)))); + } + + String kql = null; + if (!isMetadataColumn) { + String lowerBound = getLiteralString((ConstantExpression) lower); + String upperBound = getLiteralString((ConstantExpression) upper); + kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); + } + + return new ClpExpression(kql, metadataExpr); } /** @@ -283,18 +356,27 @@ private ClpExpression handleNot(CallExpression node) "NOT operator must have exactly one argument. Received: " + node); } - RowExpression input = node.getArguments().get(0); - ClpExpression expression = input.accept(this, null); - if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { + ClpExpression input = node.getArguments().get(0).accept(this, null); + if ((input.getRemainingExpression().isPresent() || !input.getPushDownExpression().isPresent()) + && !input.getMetadataExpression().isPresent()) { return new ClpExpression(node); } - String notPushDownExpression = "NOT " + expression.getPushDownExpression().get(); - if (expression.getMetadataSqlQuery().isPresent()) { - return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery()); + + String kql = null; + if (input.getPushDownExpression().isPresent()) { + kql = "NOT " + input.getPushDownExpression().get(); } - else { - return new ClpExpression(notPushDownExpression); + + RowExpression metadataExpr = null; + if (input.getMetadataExpression().isPresent()) { + metadataExpr = new CallExpression( + standardFunctionResolution.notFunction().getName(), + standardFunctionResolution.notFunction(), + BOOLEAN, + ImmutableList.of(input.getMetadataExpression().get())); } + + return new ClpExpression(kql, metadataExpr); } /** @@ -321,6 +403,11 @@ private ClpExpression handleLike(CallExpression node) } String variableName = variable.getPushDownExpression().get(); + if (metadataFilterColumns.contains(variableName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for LIKE predicate" + node); + } RowExpression argument = node.getArguments().get(1); String pattern; @@ -334,7 +421,9 @@ else if (argument instanceof CallExpression) { return new ClpExpression(node); } if (callExpression.getArguments().size() != 1) { - throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "CAST function must have exactly one argument. Received: " + callExpression); + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "CAST function must have exactly one argument. Received: " + callExpression); } if (!(callExpression.getArguments().get(0) instanceof ConstantExpression)) { return new ClpExpression(node); @@ -396,8 +485,9 @@ private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression return buildClpExpression( leftPushDownExpression.get(), // variable rightPushDownExpression.get(), // literal + right, // constant operator, - rightType, + leftType, node); } else if (leftIsConstant) { @@ -405,8 +495,9 @@ else if (leftIsConstant) { return buildClpExpression( rightPushDownExpression.get(), // variable leftPushDownExpression.get(), // literal + left, // constant newOperator, - leftType, + rightType, node); } // fallback @@ -428,49 +519,57 @@ else if (leftIsConstant) { * * @param variableName name of the variable * @param literalString string representation of the literal + * @param constant the original ConstantExpression of literalString * @param operator the comparison operator - * @param literalType the type of the literal - * @param originalNode the original RowExpression node + * @param variableType the type of the variable + * @param originalNode the original CallExpression node * @return a ClpExpression containing either the equivalent KQL query, or the original * expression if it couldn't be translated */ private ClpExpression buildClpExpression( String variableName, String literalString, + RowExpression constant, OperatorType operator, - Type literalType, - RowExpression originalNode) + Type variableType, + CallExpression originalNode) { - String metadataSqlQuery = null; - if (operator.equals(EQUAL)) { - if (literalType instanceof VarcharType) { - return new ClpExpression(format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); + boolean isVarchar = constant.getType() instanceof VarcharType; + boolean isMetadataColumn = metadataFilterColumns.contains(variableName); + boolean isDataColumnsWithRangeBounds = dataColumnsWithRangeBounds.contains(variableName); + String formattedLiteral = isVarchar + ? "\"" + escapeKqlSpecialCharsForStringValue(literalString) + "\"" + : literalString; + + String pushDownExpression = null; + CallExpression metadataExpression = null; + if (!isMetadataColumn) { + if (operator.equals(EQUAL)) { + pushDownExpression = format("%s: %s", variableName, formattedLiteral); } - else { - if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("\"%s\" = %s", variableName, literalString); - } - return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery); + else if (operator.equals(NOT_EQUAL)) { + pushDownExpression = format("NOT %s: %s", variableName, formattedLiteral); } - } - else if (operator.equals(NOT_EQUAL)) { - if (literalType instanceof VarcharType) { - return new ClpExpression(format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); + else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !isVarchar) { + pushDownExpression = format("%s %s %s", variableName, operator.getOperator(), literalString); } - else { - if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("NOT \"%s\" = %s", variableName, literalString); - } - return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery); + + if (pushDownExpression == null) { + return new ClpExpression(originalNode); } } - else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { - if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString); - } - return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery); + + if (isMetadataColumn || isDataColumnsWithRangeBounds) { + metadataExpression = new CallExpression( + operator.name(), + originalNode.getFunctionHandle(), + BOOLEAN, + ImmutableList.of( + new VariableReferenceExpression(Optional.empty(), variableName, variableType), + constant)); } - return new ClpExpression(originalNode); + + return new ClpExpression(pushDownExpression, metadataExpression); } /** @@ -531,6 +630,11 @@ private Optional parseSubstringCall(CallExpression callExpression) } String varName = variable.getPushDownExpression().get(); + if (metadataFilterColumns.contains(varName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for substr call" + callExpression); + } RowExpression startExpression = callExpression.getArguments().get(1); RowExpression lengthExpression = null; if (argCount == 3) { @@ -671,49 +775,47 @@ private Optional parseLengthLiteral(RowExpression lengthExpression, Str */ private ClpExpression handleAnd(SpecialFormExpression node) { - StringBuilder metadataQueryBuilder = new StringBuilder(); - metadataQueryBuilder.append("("); - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); + List pushdownKql = new ArrayList<>(); List remainingExpressions = new ArrayList<>(); - boolean hasMetadataSql = false; - boolean hasPushDownExpression = false; + List metadataExpressions = new ArrayList<>(); + for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); - if (expression.getPushDownExpression().isPresent()) { - hasPushDownExpression = true; - queryBuilder.append(expression.getPushDownExpression().get()); - queryBuilder.append(" AND "); - if (expression.getMetadataSqlQuery().isPresent()) { - hasMetadataSql = true; - metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); - metadataQueryBuilder.append(" AND "); - } - } - if (expression.getRemainingExpression().isPresent()) { - remainingExpressions.add(expression.getRemainingExpression().get()); - } + expression.getPushDownExpression().ifPresent(pushdownKql::add); + expression.getMetadataExpression().ifPresent(metadataExpressions::add); + expression.getRemainingExpression().ifPresent(remainingExpressions::add); } - if (!hasPushDownExpression) { - return new ClpExpression(node); + + String combinedKql = null; + if (!pushdownKql.isEmpty()) { + combinedKql = "(" + String.join(" AND ", pushdownKql) + ")"; } - else if (!remainingExpressions.isEmpty()) { - if (remainingExpressions.size() == 1) { - return new ClpExpression( - queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, - remainingExpressions.get(0)); - } - else { - return new ClpExpression( - queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, - new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions)); - } + RowExpression combinedMetadataExpression = null; + + if (metadataExpressions.size() == 1) { + combinedMetadataExpression = metadataExpressions.get(0); + } + else if (metadataExpressions.size() > 1) { + combinedMetadataExpression = new SpecialFormExpression( + node.getSourceLocation(), + AND, + BOOLEAN, + metadataExpressions); + } + + RowExpression combinedRemainingExpression = null; + if (remainingExpressions.size() == 1) { + combinedRemainingExpression = remainingExpressions.get(0); + } + else if (remainingExpressions.size() > 1) { + combinedRemainingExpression = new SpecialFormExpression( + node.getSourceLocation(), + AND, + BOOLEAN, + remainingExpressions); } - // Remove the last " AND " from the query - return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null); + + return new ClpExpression(combinedKql, combinedMetadataExpression, combinedRemainingExpression); } /** @@ -730,37 +832,55 @@ else if (!remainingExpressions.isEmpty()) { */ private ClpExpression handleOr(SpecialFormExpression node) { - StringBuilder metadataQueryBuilder = new StringBuilder(); - metadataQueryBuilder.append("("); - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); - boolean allPushedDown = true; - boolean hasAllMetadataSql = true; + List pushdownKql = new ArrayList<>(); + List metadataExpressions = new ArrayList<>(); + + boolean hasUnpushable = false; + for (RowExpression argument : node.getArguments()) { - ClpExpression expression = argument.accept(this, null); - // Note: It is possible in the future that an expression cannot be pushed down as a KQL query, but can be - // pushed down as a metadata SQL query. - if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { - allPushedDown = false; - continue; + ClpExpression expr = argument.accept(this, null); + + boolean hasRemaining = expr.getRemainingExpression().isPresent(); + boolean hasKql = expr.getPushDownExpression().isPresent(); + boolean hasMeta = expr.getMetadataExpression().isPresent(); + + // If this arg cannot be pushed down at all, bail early + if (hasRemaining || (!hasKql && !hasMeta)) { + hasUnpushable = true; + break; } - queryBuilder.append(expression.getPushDownExpression().get()); - queryBuilder.append(" OR "); - if (hasAllMetadataSql && expression.getMetadataSqlQuery().isPresent()) { - metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); - metadataQueryBuilder.append(" OR "); + + if (hasKql) { + pushdownKql.add(expr.getPushDownExpression().get()); } - else { - hasAllMetadataSql = false; + + if (hasMeta) { + metadataExpressions.add(expr.getMetadataExpression().get()); } } - if (allPushedDown) { - // Remove the last " OR " from the query - return new ClpExpression( - queryBuilder.substring(0, queryBuilder.length() - 4) + ")", - hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null); + + if (hasUnpushable) { + return new ClpExpression(node); + } + + String combinedKql = null; + if (!pushdownKql.isEmpty()) { + combinedKql = "(" + String.join(" OR ", pushdownKql) + ")"; } - return new ClpExpression(node); + + // Only use metadata if every argument has metadata and none had KQL + RowExpression combinedMetadata = null; + if (metadataExpressions.size() == node.getArguments().size() && pushdownKql.isEmpty()) { + combinedMetadata = (metadataExpressions.size() == 1) + ? metadataExpressions.get(0) + : new SpecialFormExpression( + node.getSourceLocation(), + OR, + BOOLEAN, + metadataExpressions); + } + + return new ClpExpression(combinedKql, combinedMetadata); } /** @@ -779,6 +899,11 @@ private ClpExpression handleIn(SpecialFormExpression node) return new ClpExpression(node); } String variableName = variable.getPushDownExpression().get(); + if (metadataFilterColumns.contains(variableName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for IN predicate" + node); + } StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("("); for (RowExpression argument : node.getArguments().subList(1, node.getArguments().size())) { @@ -823,6 +948,11 @@ private ClpExpression handleIsNull(SpecialFormExpression node) } String variableName = expression.getPushDownExpression().get(); + if (metadataFilterColumns.contains(variableName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for IN predicate" + node); + } return new ClpExpression(format("NOT %s: *", variableName)); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java index b536c95ad216a..e58276c948175 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java @@ -13,7 +13,7 @@ */ package com.facebook.presto.plugin.clp.optimization; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -29,14 +29,17 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpSplitFilterProvider splitFilterProvider; + private final ClpSplitMetadataConfig clpSplitMetadataConfig; @Inject - public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) + public ClpPlanOptimizerProvider( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig clpSplitMetadataConfig) { this.functionManager = functionManager; this.functionResolution = functionResolution; - this.splitFilterProvider = splitFilterProvider; + this.clpSplitMetadataConfig = clpSplitMetadataConfig; } @Override @@ -48,6 +51,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpComputePushDown(functionManager, functionResolution, splitFilterProvider)); + return ImmutableSet.of(new ClpComputePushDown(functionManager, functionResolution, clpSplitMetadataConfig)); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java new file mode 100644 index 0000000000000..f1c14415ae3a7 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -0,0 +1,247 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.common.type.DecimalType; +import com.facebook.presto.common.type.Decimals; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.function.FunctionHandle; +import com.facebook.presto.spi.function.FunctionMetadata; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.ConstantExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.RowExpressionVisitor; +import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import io.airlift.slice.Slice; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.facebook.presto.common.function.OperatorType.IS_DISTINCT_FROM; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +/** + * A converter that converts Presto {@link RowExpression} trees representing metadata predicates + * into SQL filter strings that can be pushed down to MySQL for split-level metadata filtering. + *

+ * The converter: + *
    + *
  • Handles standard logical and comparison operators (AND, OR, =, >, >=, <, <=, IS NULL).
  • + *
  • Supports range-bound rewriting for data columns that have metadata columns representing + * lower and upper bounds.
  • + *
  • Tracks required columns and throws an exception if any are missing in the filter + * expression.
  • + *
+ */ +public class ClpMySqlSplitMetadataExpressionConverter + implements RowExpressionVisitor +{ + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final Map exposedToOriginal; + private final Map> dataToMetadataBounds; + private final Set requiredColumns; + private final Set seenRequired = new HashSet<>(); + + public ClpMySqlSplitMetadataExpressionConverter( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + Map exposedToOriginal, + Map> dataToMetadataBounds, + Set requiredColumns) + { + this.functionManager = requireNonNull(functionManager, "functionManager is null"); + this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); + this.exposedToOriginal = exposedToOriginal; + this.dataToMetadataBounds = dataToMetadataBounds; + this.requiredColumns = requiredColumns; + } + + /** + * Converts the given {@link RowExpression} into an equivalent SQL WHERE clause string. + *

+ * After conversion, validates that all required columns were referenced in the expression. If + * any required columns are missing, a {@link PrestoException} is thrown. + * + * @param expression the row expression to convert + * @return a SQL string representing the equivalent predicate + * @throws PrestoException if required columns are missing from the expression + */ + public String transform(RowExpression expression) + { + seenRequired.clear(); + String sql = expression.accept(this, null); + Set missing = new HashSet<>(requiredColumns); + missing.removeAll(seenRequired); + if (!missing.isEmpty()) { + throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "Missing required filter columns: " + missing); + } + return sql; + } + + @Override + public String visitCall(CallExpression node, Void context) + { + FunctionHandle functionHandle = node.getFunctionHandle(); + if (functionResolution.isNotFunction(functionHandle)) { + return format("NOT (%s)", node.getArguments().get(0).accept(this, null)); + } + + FunctionMetadata functionMetadata = functionManager.getFunctionMetadata(node.getFunctionHandle()); + Optional operatorTypeOptional = functionMetadata.getOperatorType(); + if (operatorTypeOptional.isPresent()) { + OperatorType operatorType = operatorTypeOptional.get(); + if (operatorType == OperatorType.NEGATION) { + String value = node.getArguments().get(0).accept(this, null); + return "-" + value; + } + + if (operatorType.isComparisonOperator() && operatorType != IS_DISTINCT_FROM) { + String variableName = node.getArguments().get(0).accept(this, null); + String literalString = node.getArguments().get(1).accept(this, null); + + String rewritten = rewriteComparisonWithBounds(variableName, operatorType, literalString); + if (rewritten != null) { + return rewritten; + } + + return format("%s %s %s", variableName, operatorType.getOperator(), literalString); + } + } + + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query: " + node); + } + + @Override + public String visitSpecialForm(SpecialFormExpression node, Void context) + { + switch (node.getForm()) { + case AND: + case OR: + String op = node.getForm() == SpecialFormExpression.Form.AND ? "AND" : "OR"; + return node.getArguments().stream() + .map(arg -> "(" + arg.accept(this, context) + ")") + .collect(Collectors.joining(" " + op + " ")); + case IS_NULL: + return "(" + node.getArguments().get(0).accept(this, context) + " IS NULL)"; + default: + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query: " + node); + } + } + + @Override + public String visitConstant(ConstantExpression node, Void context) + { + Object value = node.getValue(); + Type type = node.getType(); + if (value instanceof Slice) { + if (type instanceof DecimalType) { + DecimalType decimalType = (DecimalType) type; + BigInteger unscaled = Decimals.decodeUnscaledValue((Slice) value); + BigDecimal decimalValue = new BigDecimal(unscaled, decimalType.getScale()); + return decimalValue.toPlainString(); + } + return "'" + ((Slice) value).toStringUtf8().replace("'", "''") + "'"; + } + + if (type instanceof DecimalType && value instanceof Long) { + DecimalType decimalType = (DecimalType) type; + BigDecimal decimalValue = new BigDecimal(BigInteger.valueOf((Long) value), decimalType.getScale()); + return decimalValue.toPlainString(); + } + + return value.toString(); + } + + @Override + public String visitVariableReference(VariableReferenceExpression node, Void context) + { + String exposed = node.getName(); + seenRequired.add(exposed); + return exposedToOriginal.getOrDefault(exposed, exposed); + } + + /** + * Rewrites a comparison operator involving a data column into an equivalent expression using + * its associated range-bound metadata columns (if configured). + *

+ * Examples: + *
    + *
  • col >= 5upper_col >= 5
  • + *
  • col <= 5lower_col <= 5
  • + *
  • col = 5(lower_col <= 5) AND (upper_col >= 5)
  • + *
+ * Returns null if no rewrite is applicable. + * + * @param variableName the name of the column being compared + * @param operator the comparison operator + * @param literal the literal value as a SQL string + * @return a rewritten SQL expression string, or null if no rewrite applies + */ + private String rewriteComparisonWithBounds(String variableName, OperatorType operator, String literal) + { + String original = exposedToOriginal.getOrDefault(variableName, variableName); + Map bounds = dataToMetadataBounds.get(original); + if (bounds == null) { + return null; + } + + String lower = bounds.get("lower"); + String upper = bounds.get("upper"); + + switch (operator) { + case GREATER_THAN: + case GREATER_THAN_OR_EQUAL: + if (upper != null) { + return format("%s %s %s", upper, operator.getOperator(), literal); + } + break; + case LESS_THAN: + case LESS_THAN_OR_EQUAL: + if (lower != null) { + return format("%s %s %s", lower, operator.getOperator(), literal); + } + break; + case EQUAL: + if (lower != null && upper != null) { + return format("(%s <= %s) AND (%s >= %s)", lower, literal, upper, literal); + } + else if (lower != null) { + return format("%s <= %s", lower, literal); + } + else if (upper != null) { + return format("%s >= %s", upper, literal); + } + break; + default: + break; + } + + return null; + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 6b54218509c7f..d0905611a2ada 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -18,6 +18,10 @@ import com.facebook.presto.plugin.clp.ClpSplit; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; import com.google.common.collect.ImmutableList; import javax.inject.Inject; @@ -29,6 +33,7 @@ import java.sql.SQLException; import java.util.List; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; import static java.lang.String.format; @@ -47,9 +52,16 @@ public class ClpMySqlSplitProvider private static final Logger log = Logger.get(ClpMySqlSplitProvider.class); private final ClpConfig config; + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final ClpSplitMetadataConfig metadataConfig; @Inject - public ClpMySqlSplitProvider(ClpConfig config) + public ClpMySqlSplitProvider( + ClpConfig config, + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig metadataConfig) { try { Class.forName("com.mysql.cj.jdbc.Driver"); @@ -59,6 +71,9 @@ public ClpMySqlSplitProvider(ClpConfig config) throw new RuntimeException("MySQL JDBC driver not found", e); } this.config = config; + this.functionManager = functionManager; + this.functionResolution = functionResolution; + this.metadataConfig = metadataConfig; } @Override @@ -70,10 +85,21 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) String tableName = clpTableHandle.getSchemaTableName().getTableName(); String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName); - if (clpTableLayoutHandle.getMetadataSql().isPresent()) { - String metadataFilterQuery = clpTableLayoutHandle.getMetadataSql().get(); + SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); + if (clpTableLayoutHandle.getMetadataExpression().isPresent()) { + ClpMySqlSplitMetadataExpressionConverter converter = + new ClpMySqlSplitMetadataExpressionConverter( + functionManager, + functionResolution, + metadataConfig.getExposedToOriginalMapping(schemaTableName), + metadataConfig.getDataColumnRangeMapping(schemaTableName), + metadataConfig.getRequiredColumns(schemaTableName)); + String metadataFilterQuery = converter.transform(clpTableLayoutHandle.getMetadataExpression().get()); archivePathQuery += " AND (" + metadataFilterQuery + ")"; } + else if (!metadataConfig.getRequiredColumns(schemaTableName).isEmpty()) { + throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "No required columns specified in the filter"); + } log.debug("Query for archive: %s", archivePathQuery); try (Connection connection = getConnection()) { diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java new file mode 100644 index 0000000000000..c0223fe5a8017 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -0,0 +1,277 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.common.type.TypeSignature; +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import javax.inject.Inject; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_METADATA_CONFIG_NOT_FOUND; +import static java.util.Objects.requireNonNull; + +/** + * A class that loads and manages split-level metadata configuration. + *

+ * The configuration file defines metadata columns and filtering rules that can be applied in query + * planning to prune irrelevant splits based on metadata (e.g., timestamps, partitions, etc.). + *

+ * The configuration supports a hierarchical namespace structure: + *
    + *
  • Global defaults under an empty string key ("").
  • + *
  • Schema-level overrides under the schema name (e.g., "logs").
  • + *
  • Table-level overrides under the full name (e.g., "logs.events"}).
  • + *
+ *

+ * Configurations from broader scopes are merged with more specific ones, with table-level + * definitions overriding schema-level and global definitions. + */ +public class ClpSplitMetadataConfig +{ + private final Map tableConfigs = new HashMap<>(); + private final TypeManager typeManager; + + /** + * Represents a metadata column entry defined in the split metadata configuration file. Each + * {@link MetaColumn} corresponds to one metadata column that can be exposed in query filters. + */ + public static class MetaColumn + { + public final String name; + public final String type; + public final String exposedAs; + public final String description; + public final String asRangeBoundOf; // optional + public final String boundType; // "lower" or "upper" + + public MetaColumn(String name, JsonNode node) + { + this.name = name; + this.type = node.path("type").asText(); + this.exposedAs = node.path("exposedAs").asText(name); + this.description = node.path("description").asText(null); + JsonNode filter = node.path("filter"); + this.asRangeBoundOf = filter.path("asRangeBoundOf").isMissingNode() ? null : filter.path("asRangeBoundOf").asText(); + this.boundType = filter.path("boundType").isMissingNode() ? null : filter.path("boundType").asText(); + } + } + + /** + * Represents a rule that defines how a metadata column or a data column should be used in + * filtering. A rule may indicate that a column is required for query pruning and include an + * explanation of why it is necessary. + */ + public static class FilterRule + { + public final String column; + public final boolean required; + public final String reason; + + public FilterRule(JsonNode node) + { + this.column = node.path("column").asText(); + this.required = node.path("required").asBoolean(false); + this.reason = node.path("reason").asText(null); + } + } + + public static class TableConfig + { + public final Map metaColumns = new HashMap<>(); + public final List filterRules = new ArrayList<>(); + } + + @Inject + public ClpSplitMetadataConfig(ClpConfig config, TypeManager typeManager) + { + requireNonNull(config, "config is null"); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + + if (null == config.getSplitMetadataConfigPath()) { + return; + } + + ObjectMapper mapper = new ObjectMapper(); + JsonNode root; + try { + root = mapper.readTree(Files.readAllBytes(Paths.get(config.getSplitMetadataConfigPath()))); + } + catch (IOException e) { + throw new PrestoException(CLP_SPLIT_METADATA_CONFIG_NOT_FOUND, "Failed to open split metadata config file", e); + } + + for (Iterator it = root.fieldNames(); it.hasNext(); ) { + String namespace = it.next(); + JsonNode tableNode = root.get(namespace); + + TableConfig cfg = new TableConfig(); + if (tableNode.has("metaColumns")) { + for (Iterator m = tableNode.get("metaColumns").fieldNames(); m.hasNext(); ) { + String colName = m.next(); + cfg.metaColumns.put(colName, new MetaColumn(colName, tableNode.get("metaColumns").get(colName))); + } + } + if (tableNode.has("filterRules")) { + for (JsonNode rule : tableNode.get("filterRules")) { + cfg.filterRules.add(new FilterRule(rule)); + } + } + tableConfigs.put(namespace, cfg); + } + } + + /** + * Returns the mapping of exposed metadata column names to their Presto {@link Type}s for the + * given table. + * + * @param name the {@link SchemaTableName} of the target table + * @return a map from exposed column name → type + */ + public Map getMetadataColumns(SchemaTableName name) + { + TableConfig cfg = getTableConfig(name); + Map result = new LinkedHashMap<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + result.put(c.exposedAs, typeManager.getType(TypeSignature.parseTypeSignature(c.type))); + } + return result; + } + + /** + * Returns the set of metadata columns marked as required in the configuration. + * + * @param name the {@link SchemaTableName} of the target table + * @return a set of column names that are required for filtering + */ + public Set getRequiredColumns(SchemaTableName name) + { + TableConfig cfg = getTableConfig(name); + Set requiredColumns = new LinkedHashSet<>(); + for (FilterRule rule : cfg.filterRules) { + if (rule.required) { + requiredColumns.add(rule.column); + } + } + return requiredColumns; + } + + /** + * Returns a mapping of exposed metadata column names to their original internal names. + * + * @param name the {@link SchemaTableName} of the target table + * @return a map from exposed column name → original column name + */ + public Map getExposedToOriginalMapping(SchemaTableName name) + { + TableConfig cfg = getTableConfig(name); + Map mapping = new HashMap<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + mapping.put(c.exposedAs, c.name); + } + return mapping; + } + + /** + * Returns the set of data column names that have associated range bounds defined in the + * metadata configuration. + * + * @param name the {@link SchemaTableName} of the target table + * @return a set of data column names that have range bounds + */ + public Set getDataColumnsWithRangeBounds(SchemaTableName name) + { + TableConfig cfg = getTableConfig(name); + Set result = new LinkedHashSet<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + if (c.asRangeBoundOf != null) { + result.add(c.asRangeBoundOf); + } + } + return result; + } + + /** + * Returns a mapping from data column names to their associated range bound metadata columns. + *

+ * Each entry maps a data column name to another map with keys {@code "lower"} and/or + * {@code "upper"}, representing the metadata column names that define those bounds. + * + * @param name the {@link SchemaTableName} of the target table + * @return a nested mapping from data column name → ("lower"/"upper" → metadata column name) + */ + public Map> getDataColumnRangeMapping(SchemaTableName name) + { + TableConfig cfg = getTableConfig(name); + Map> mapping = new HashMap<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + if (c.asRangeBoundOf != null && c.boundType != null) { + mapping.computeIfAbsent(c.asRangeBoundOf, k -> new HashMap<>()) + .put(c.boundType, c.name); + } + } + return mapping; + } + + /** + * Merges and returns the effective {@link TableConfig} for the given table, taking into account + * the hierarchical configuration structure: global → schema → table. + * + * @param name the {@link SchemaTableName} of the target table + * @return the merged table configuration + */ + private TableConfig getTableConfig(SchemaTableName name) + { + TableConfig merged = new TableConfig(); + + List namespaces = new ArrayList<>(); + namespaces.add(""); + namespaces.add(name.getSchemaName()); + namespaces.add(name.getSchemaName() + "." + name.getTableName()); + + for (String ns : namespaces) { + TableConfig cfg = tableConfigs.get(ns); + if (cfg != null) { + merged.metaColumns.putAll(cfg.metaColumns); + + for (FilterRule rule : cfg.filterRules) { + boolean exists = merged.filterRules.stream() + .anyMatch(r -> r.column.equals(rule.column)); + if (!exists) { + merged.filterRules.add(rule); + } + } + } + } + + return merged; + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java deleted file mode 100644 index 31d24fd4df71c..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static java.lang.String.format; - -/** - * Implementation for the CLP package's MySQL metadata database. - */ -public class ClpMySqlSplitFilterProvider - extends ClpSplitFilterProvider -{ - @Inject - public ClpMySqlSplitFilterProvider(ClpConfig config) - { - super(config); - } - - /** - * Performs regex-based replacements to rewrite {@code pushDownExpression} according to the - * {@code "rangeMapping"} field in {@link ClpMySqlCustomSplitFilterOptions}. For example: - *

    - *
  • {@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}
  • - *
  • {@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}
  • - *
  • {@code "msg.timestamp" = 4567} → - * {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}
  • - *
- * - * @param scope the filter's scope - * @param pushDownExpression the expression to be rewritten - * @return the rewritten expression - */ - @Override - public String remapSplitFilterPushDownExpression(String scope, String pushDownExpression) - { - String[] splitScope = scope.split("\\."); - - Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0]))); - - if (1 < splitScope.length) { - mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1]))); - } - - if (3 == splitScope.length) { - mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope))); - } - - String remappedSql = pushDownExpression; - for (Map.Entry entry : mappings.entrySet()) { - String key = entry.getKey(); - ClpMySqlCustomSplitFilterOptions.RangeMapping value = entry.getValue(); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(>=?)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), - format("%s $2 $3", value.upperBound)); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(<=?)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), - format("%s $2 $3", value.lowerBound)); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(=)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), - format("(%s <= $3 AND %s >= $3)", value.lowerBound, value.upperBound)); - } - return remappedSql; - } - - @Override - protected Class getCustomSplitFilterOptionsClass() - { - return ClpMySqlCustomSplitFilterOptions.class; - } - - private Map getAllMappingsFromFilters(List filters) - { - return null != filters - ? filters.stream() - .filter(filter -> - filter.customOptions instanceof ClpMySqlCustomSplitFilterOptions && - ((ClpMySqlCustomSplitFilterOptions) filter.customOptions).rangeMapping != null) - .collect(toImmutableMap( - filter -> filter.columnName, - filter -> ((ClpMySqlCustomSplitFilterOptions) filter.customOptions).rangeMapping)) - : ImmutableMap.of(); - } - - /** - * Custom options: - *
    - *
  • {@code rangeMapping} (optional): an object with the following properties: - *
      - *
    • {@code lowerBound}: The numeric metadata column that represents the lower bound - * of values in a split for the numeric data column.
    • - *
    • {@code upperBound}: The numeric metadata column that represents the upper bound - * of values in a split for the numeric data column.
    • - *
    - *
  • - *
- */ - protected static class ClpMySqlCustomSplitFilterOptions - implements CustomSplitFilterOptions - { - @JsonProperty("rangeMapping") - public RangeMapping rangeMapping; - - public static class RangeMapping - { - @JsonProperty("lowerBound") - public String lowerBound; - - @JsonProperty("upperBound") - public String upperBound; - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (!(o instanceof RangeMapping)) { - return false; - } - RangeMapping that = (RangeMapping) o; - return Objects.equals(lowerBound, that.lowerBound) && - Objects.equals(upperBound, that.upperBound); - } - - @Override - public int hashCode() - { - return Objects.hash(lowerBound, upperBound); - } - } - } -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java deleted file mode 100644 index 0d7e37a803515..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.fasterxml.jackson.annotation.JsonProperty; - -/** - * Options for a how a column in a Presto query should be pushed down into a query against CLP's - * metadata database (during split pruning): - *
    - *
  • {@code columnName}: The column's name in the Presto query.
  • - * - *
  • {@code customOptions}: Options specific to the current - * {@link ClpSplitFilterProvider}.
  • - * - *
  • {@code required} (optional, defaults to {@code false}): Whether the filter must be - * present in the generated metadata query. If a required filter is missing or cannot be added to - * the metadata query, the original query will be rejected.
  • - *
- */ -public class ClpSplitFilterConfig -{ - @JsonProperty("columnName") - public String columnName; - - @JsonProperty("customOptions") - public CustomSplitFilterOptions customOptions; - - @JsonProperty("required") - public boolean required; - - public interface CustomSplitFilterOptions - {} -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java deleted file mode 100644 index 3464c14883e26..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import java.io.IOException; - -import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; - -/** - * Uses the given {@link CustomSplitFilterOptions} implementation to deserialize the - * {@code "customOptions"} field in a {@link ClpSplitFilterConfig}. The implementation is determined - * by the implemented {@link ClpSplitFilterProvider}. - */ -public class ClpSplitFilterConfigCustomOptionsDeserializer - extends JsonDeserializer -{ - private final Class actualCustomSplitFilterOptionsClass; - - public ClpSplitFilterConfigCustomOptionsDeserializer(Class actualCustomSplitFilterOptionsClass) - { - this.actualCustomSplitFilterOptionsClass = actualCustomSplitFilterOptionsClass; - } - - @Override - public CustomSplitFilterOptions deserialize(JsonParser p, DeserializationContext ctxt) throws IOException - { - ObjectNode node = p.getCodec().readTree(p); - ObjectMapper mapper = (ObjectMapper) p.getCodec(); - - return mapper.treeToValue(node, actualCustomSplitFilterOptionsClass); - } -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java deleted file mode 100644 index 0609843aaf22f..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import com.facebook.presto.spi.PrestoException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - -import java.io.File; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Function; - -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_FILTER_CONFIG_NOT_FOUND; -import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.util.Objects.requireNonNull; - -/** - * Loads and manages {@link ClpSplitFilterConfig}s from a config file. - *

- * The config file is specified by the {@code clp.split-filter-config} property. - *

- * Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under - * a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply - * to all tables within that schema). - *

- * Implementations of this class can customize filter configs through the {@code "customOptions"} - * field within each {@link ClpSplitFilterConfig}. - */ -public abstract class ClpSplitFilterProvider -{ - protected final Map> filterMap; - - public ClpSplitFilterProvider(ClpConfig config) - { - requireNonNull(config, "config is null"); - - if (null == config.getSplitFilterConfig()) { - filterMap = ImmutableMap.of(); - return; - } - ObjectMapper mapper = new ObjectMapper(); - SimpleModule module = new SimpleModule(); - module.addDeserializer( - CustomSplitFilterOptions.class, - new ClpSplitFilterConfigCustomOptionsDeserializer(getCustomSplitFilterOptionsClass())); - mapper.registerModule(module); - try { - filterMap = mapper.readValue( - new File(config.getSplitFilterConfig()), - new TypeReference>>() {}); - } - catch (IOException e) { - throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); - } - } - - /** - * Rewrites {@code pushDownExpression} to remap filter conditions based on the - * {@code "customOptions"} for the given scope. - *

- * {@code scope} follows the format {@code catalog[.schema][.table]}, and determines which - * filter mappings to apply, since mappings from more specific scopes (e.g., table-level) - * override or supplement those from broader scopes (e.g., catalog-level). For each scope - * (catalog, schema, table), this method collects all mappings defined in - * {@code "customOptions"}. - * - * @param scope the scope of the filter - * @param pushDownExpression the expression to be rewritten - * @return the rewritten expression - */ - public abstract String remapSplitFilterPushDownExpression(String scope, String pushDownExpression); - - /** - * Checks for the given table, if {@code splitFilterPushDownExpression} contains all required - * fields. - * - * @param tableScopeSet the set of scopes of the tables that are being queried - * @param splitFilterPushDownExpression the expression to be checked - */ - public void checkContainsRequiredFilters(Set tableScopeSet, String splitFilterPushDownExpression) - { - boolean hasRequiredSplitFilterColumns = true; - ImmutableList.Builder notFoundListBuilder = ImmutableList.builder(); - for (String tableScope : tableScopeSet) { - for (String columnName : getRequiredColumnNames(tableScope)) { - if (!splitFilterPushDownExpression.contains(columnName)) { - hasRequiredSplitFilterColumns = false; - notFoundListBuilder.add(columnName); - } - } - } - if (!hasRequiredSplitFilterColumns) { - throw new PrestoException( - CLP_MANDATORY_SPLIT_FILTER_NOT_VALID, - notFoundListBuilder.build() + " is a mandatory split filter column but not valid"); - } - } - - public Set getColumnNames(String scope) - { - return collectColumnNamesFromScopes(scope, this::getAllColumnNamesFromFilters); - } - - /** - * Returns the implemented {@link CustomSplitFilterOptions} class. To respect our code style, we - * recommend implementing a {@code protected static class} as an inner class in the implemented - * {@link ClpSplitFilterProvider} class. - * - * @return the implemented {@link CustomSplitFilterOptions} class - */ - protected abstract Class getCustomSplitFilterOptionsClass(); - - private Set getRequiredColumnNames(String scope) - { - return collectColumnNamesFromScopes(scope, this::getRequiredColumnNamesFromFilters); - } - - private Set collectColumnNamesFromScopes(String scope, Function, Set> extractor) - { - String[] splitScope = scope.split("\\."); - ImmutableSet.Builder builder = ImmutableSet.builder(); - - builder.addAll(extractor.apply(filterMap.get(splitScope[0]))); - - if (splitScope.length > 1) { - builder.addAll(extractor.apply(filterMap.get(splitScope[0] + "." + splitScope[1]))); - } - - if (splitScope.length == 3) { - builder.addAll(extractor.apply(filterMap.get(scope))); - } - - return builder.build(); - } - - private Set getAllColumnNamesFromFilters(List filters) - { - return null != filters ? filters.stream() - .map(filter -> filter.columnName) - .collect(toImmutableSet()) : ImmutableSet.of(); - } - - private Set getRequiredColumnNamesFromFilters(List filters) - { - return null != filters ? filters.stream() - .filter(filter -> filter.required) - .map(filter -> filter.columnName) - .collect(toImmutableSet()) : ImmutableSet.of(); - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java deleted file mode 100644 index d1d0ee6964c8e..0000000000000 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp; - -import com.facebook.airlift.log.Logger; -import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; -import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; -import com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType; -import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; -import org.apache.commons.io.FileUtils; -import org.apache.commons.math3.util.Pair; - -import java.io.File; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.List; -import java.util.Map; - -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.COLUMN_METADATA_TABLE_COLUMN_NAME; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.COLUMN_METADATA_TABLE_COLUMN_TYPE; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_COLUMN_ARCHIVE_STORAGE_DIRECTORY; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_COLUMN_NAME; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_SUFFIX; -import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_COLUMN_ID; -import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_SUFFIX; -import static java.lang.String.format; -import static java.util.UUID.randomUUID; -import static org.testng.Assert.fail; - -public final class ClpMetadataDbSetUp -{ - public static final String METADATA_DB_PASSWORD = ""; - public static final String METADATA_DB_TABLE_PREFIX = "clp_"; - public static final String METADATA_DB_URL_TEMPLATE = "jdbc:h2:file:%s;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; - public static final String METADATA_DB_USER = "sa"; - public static final String ARCHIVES_STORAGE_DIRECTORY_BASE = "/tmp/archives/"; - - private static final Logger log = Logger.get(ClpMetadataDbSetUp.class); - private static final String DATASETS_TABLE_NAME = METADATA_DB_TABLE_PREFIX + DATASETS_TABLE_SUFFIX; - private static final String ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP = "begin_timestamp"; - private static final String ARCHIVES_TABLE_COLUMN_PAGINATION_ID = "pagination_id"; - private static final String ARCHIVES_TABLE_COLUMN_END_TIMESTAMP = "end_timestamp"; - - private ClpMetadataDbSetUp() - { - throw new UnsupportedOperationException("This is a utility class and cannot be instantiated"); - } - - public static DbHandle getDbHandle(String dbName) - { - return new DbHandle(format("/tmp/presto-clp-test-%s/%s", randomUUID(), dbName)); - } - - public static ClpMetadata setupMetadata(DbHandle dbHandle, Map>> clpFields) - { - final String metadataDbUrl = format(METADATA_DB_URL_TEMPLATE, dbHandle.dbPath); - final String columnMetadataTableSuffix = "_column_metadata"; - - try (Connection conn = DriverManager.getConnection(metadataDbUrl, METADATA_DB_USER, METADATA_DB_PASSWORD); Statement stmt = conn.createStatement()) { - createDatasetsTable(stmt); - - for (Map.Entry>> entry : clpFields.entrySet()) { - String tableName = entry.getKey(); - String columnMetadataTableName = METADATA_DB_TABLE_PREFIX + tableName + columnMetadataTableSuffix; - String createColumnMetadataSQL = format( - "CREATE TABLE IF NOT EXISTS %s (" + - " %s VARCHAR(512) NOT NULL," + - " %s TINYINT NOT NULL," + - " PRIMARY KEY (%s, %s))", - columnMetadataTableName, - COLUMN_METADATA_TABLE_COLUMN_NAME, - COLUMN_METADATA_TABLE_COLUMN_TYPE, - COLUMN_METADATA_TABLE_COLUMN_NAME, - COLUMN_METADATA_TABLE_COLUMN_TYPE); - String insertColumnMetadataSQL = format( - "INSERT INTO %s (%s, %s) VALUES (?, ?)", - columnMetadataTableName, - COLUMN_METADATA_TABLE_COLUMN_NAME, - COLUMN_METADATA_TABLE_COLUMN_TYPE); - - stmt.execute(createColumnMetadataSQL); - updateDatasetsTable(conn, tableName); - - try (PreparedStatement pstmt = conn.prepareStatement(insertColumnMetadataSQL)) { - for (Pair record : entry.getValue()) { - pstmt.setString(1, record.getFirst()); - pstmt.setByte(2, record.getSecond().getType()); - pstmt.addBatch(); - } - pstmt.executeBatch(); - } - } - } - catch (SQLException e) { - fail(e.getMessage()); - } - - ClpConfig config = new ClpConfig() - .setPolymorphicTypeEnabled(true) - .setMetadataDbUrl(metadataDbUrl) - .setMetadataDbUser(METADATA_DB_USER) - .setMetadataDbPassword(METADATA_DB_PASSWORD) - .setMetadataTablePrefix(METADATA_DB_TABLE_PREFIX); - ClpMetadataProvider metadataProvider = new ClpMySqlMetadataProvider(config); - return new ClpMetadata(config, metadataProvider); - } - - public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map> splits) - { - final String metadataDbUrl = format(METADATA_DB_URL_TEMPLATE, dbHandle.dbPath); - final String archiveTableFormat = METADATA_DB_TABLE_PREFIX + "%s" + ARCHIVES_TABLE_SUFFIX; - - try (Connection conn = DriverManager.getConnection(metadataDbUrl, METADATA_DB_USER, METADATA_DB_PASSWORD); Statement stmt = conn.createStatement()) { - createDatasetsTable(stmt); - - // Create and populate archive tables - for (Map.Entry> tableSplits : splits.entrySet()) { - String tableName = tableSplits.getKey(); - updateDatasetsTable(conn, tableName); - - String archiveTableName = format(archiveTableFormat, tableSplits.getKey()); - String createArchiveTableSQL = format( - "CREATE TABLE IF NOT EXISTS %s (" + - "%s BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, " + - "%s VARCHAR(64) NOT NULL, " + - "%s BIGINT, " + - "%s BIGINT)", - archiveTableName, - ARCHIVES_TABLE_COLUMN_PAGINATION_ID, - ARCHIVES_TABLE_COLUMN_ID, - ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP, - ARCHIVES_TABLE_COLUMN_END_TIMESTAMP); - - stmt.execute(createArchiveTableSQL); - - String insertArchiveTableSQL = format( - "INSERT INTO %s (%s, %s, %s) VALUES (?, ?, ?)", - archiveTableName, - ARCHIVES_TABLE_COLUMN_ID, - ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP, - ARCHIVES_TABLE_COLUMN_END_TIMESTAMP); - try (PreparedStatement pstmt = conn.prepareStatement(insertArchiveTableSQL)) { - for (ArchivesTableRow split : tableSplits.getValue()) { - pstmt.setString(1, split.id); - pstmt.setLong(2, split.beginTimestamp); - pstmt.setLong(3, split.endTimestamp); - pstmt.addBatch(); - } - pstmt.executeBatch(); - } - } - } - catch (SQLException e) { - fail(e.getMessage()); - } - - return new ClpMySqlSplitProvider( - new ClpConfig() - .setPolymorphicTypeEnabled(true) - .setMetadataDbUrl(metadataDbUrl) - .setMetadataDbUser(METADATA_DB_USER) - .setMetadataDbPassword(METADATA_DB_PASSWORD) - .setMetadataTablePrefix(METADATA_DB_TABLE_PREFIX)); - } - - public static void tearDown(DbHandle dbHandle) - { - File dir = new File(dbHandle.dbPath).getParentFile(); - if (dir.exists()) { - try { - FileUtils.deleteDirectory(dir); - log.info("Deleted database dir" + dir.getAbsolutePath()); - } - catch (IOException e) { - log.warn("Failed to delete directory " + dir + ": " + e.getMessage()); - } - } - } - - private static void createDatasetsTable(Statement stmt) - throws SQLException - { - final String createDatasetsTableSql = format( - "CREATE TABLE IF NOT EXISTS %s (%s VARCHAR(255) PRIMARY KEY, %s VARCHAR(4096) NOT NULL)", - DATASETS_TABLE_NAME, - DATASETS_TABLE_COLUMN_NAME, - DATASETS_TABLE_COLUMN_ARCHIVE_STORAGE_DIRECTORY); - stmt.execute(createDatasetsTableSql); - } - - private static void updateDatasetsTable(Connection conn, String tableName) - throws SQLException - { - final String insertDatasetsTableSql = format( - "INSERT INTO %s (%s, %s) VALUES (?, ?)", - DATASETS_TABLE_NAME, - DATASETS_TABLE_COLUMN_NAME, - DATASETS_TABLE_COLUMN_ARCHIVE_STORAGE_DIRECTORY); - try (PreparedStatement pstmt = conn.prepareStatement(insertDatasetsTableSql)) { - pstmt.setString(1, tableName); - pstmt.setString(2, ARCHIVES_STORAGE_DIRECTORY_BASE + tableName); - pstmt.executeUpdate(); - } - } - - static final class DbHandle - { - private final String dbPath; - - DbHandle(String dbPath) - { - this.dbPath = dbPath; - } - - public String getDbPath() - { - return dbPath; - } - } - - static final class ArchivesTableRow - { - private final String id; - private final long beginTimestamp; - private final long endTimestamp; - - ArchivesTableRow(String id, long beginTimestamp, long endTimestamp) - { - this.id = id; - this.beginTimestamp = beginTimestamp; - this.endTimestamp = endTimestamp; - } - - public String getId() - { - return id; - } - - public long getBeginTimestamp() - { - return beginTimestamp; - } - - public long getEndTimestamp() - { - return endTimestamp; - } - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java index 413c9cd773a6d..29146e107bb2d 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java @@ -13,10 +13,13 @@ */ package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.optimization.ClpExpression; import com.facebook.presto.plugin.clp.optimization.ClpFilterToKqlConverter; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.planner.TypeProvider; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.testng.annotations.Test; @@ -25,6 +28,7 @@ import java.util.Optional; import java.util.Set; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -227,48 +231,62 @@ public void testComplexPushDown() } @Test - public void testMetadataSqlGeneration() + public void testMetadataExpressionGeneration() { SessionHolder sessionHolder = new SessionHolder(); Set testMetadataFilterColumns = ImmutableSet.of("fare"); + Set testDataColumnsWithRangeBounds = ImmutableSet.of("city.Region.Id"); // Normal case testPushDown( sessionHolder, "(fare > 0 AND city.Name like 'b%')", - "(fare > 0 AND city.Name: \"b*\")", - "(\"fare\" > 0)", - testMetadataFilterColumns); + "(city.Name: \"b*\")", + "fare > 0", + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); // With BETWEEN testPushDown( sessionHolder, "((fare BETWEEN 0 AND 5) AND city.Name like 'b%')", - "(fare >= 0 AND fare <= 5 AND city.Name: \"b*\")", - "(\"fare\" >= 0 AND \"fare\" <= 5)", - testMetadataFilterColumns); + "(city.Name: \"b*\")", + "fare >= 0 AND fare <= 5", + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); // The cases of that the metadata filter column exist but cannot be push down testPushDown( sessionHolder, "(fare > 0 OR city.Name like 'b%')", - "(fare > 0 OR city.Name: \"b*\")", + "(city.Name: \"b*\")", null, - testMetadataFilterColumns); + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); testPushDown( sessionHolder, "(fare > 0 AND city.Name like 'b%') OR city.Region.Id = 1", - "((fare > 0 AND city.Name: \"b*\") OR city.Region.Id: 1)", + "((city.Name: \"b*\") OR city.Region.Id: 1)", null, - testMetadataFilterColumns); + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); // Complicated case testPushDown( sessionHolder, "fare = 0 AND (city.Name like 'b%' OR city.Region.Id = 1)", - "(fare: 0 AND (city.Name: \"b*\" OR city.Region.Id: 1))", - "(\"fare\" = 0)", - testMetadataFilterColumns); + "((city.Name: \"b*\" OR city.Region.Id: 1))", + "fare = 0", + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); + testPushDown( + sessionHolder, + "city.Region.Id = 1 AND (city.Name like 'b%' OR fare = 0)", + "(city.Region.Id: 1 AND (city.Name: \"b*\"))", + "\"city.Region.Id\" = 1", + TypeProvider.viewOf(ImmutableMap.of("city.Region.Id", BIGINT)), + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); } @Test @@ -306,27 +324,55 @@ public void testClpWildcardUdf() private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedRemaining) { - ClpExpression clpExpression = tryPushDown(sql, sessionHolder, ImmutableSet.of()); + ClpExpression clpExpression = tryPushDown(sql, sessionHolder, ImmutableSet.of(), ImmutableSet.of()); testFilter(clpExpression, expectedKql, expectedRemaining, sessionHolder); } - private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedMetadataSqlQuery, Set metadataFilterColumns) + private void testPushDown( + SessionHolder sessionHolder, + String sql, + String expectedKql, + String expectedMetadataSqlQuery, + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) + { + testPushDown( + sessionHolder, + sql, + expectedKql, + expectedMetadataSqlQuery, + typeProvider, + metadataFilterColumns, + dataColumnsWithRangeBounds); + } + + private void testPushDown( + SessionHolder sessionHolder, + String sql, + String expectedKql, + String expectedMetadataSqlQuery, + TypeProvider typeProviderForMetadataExpression, + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) { - ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns); + ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns, dataColumnsWithRangeBounds); testFilter(clpExpression, expectedKql, null, sessionHolder); if (expectedMetadataSqlQuery != null) { - assertTrue(clpExpression.getMetadataSqlQuery().isPresent()); - assertEquals(clpExpression.getMetadataSqlQuery().get(), expectedMetadataSqlQuery); + assertTrue(clpExpression.getMetadataExpression().isPresent()); + assertEquals( + clpExpression.getMetadataExpression().get(), + getRowExpression(expectedMetadataSqlQuery, typeProviderForMetadataExpression, sessionHolder)); } else { - assertFalse(clpExpression.getMetadataSqlQuery().isPresent()); + assertFalse(clpExpression.getMetadataExpression().isPresent()); } } private ClpExpression tryPushDown( String sqlExpression, SessionHolder sessionHolder, - Set metadataFilterColumns) + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) { RowExpression pushDownExpression = getRowExpression(sqlExpression, sessionHolder); Map assignments = new HashMap<>(variableToColumnHandleMap); @@ -335,7 +381,8 @@ private ClpExpression tryPushDown( standardFunctionResolution, functionAndTypeManager, assignments, - metadataFilterColumns), + metadataFilterColumns, + dataColumnsWithRangeBounds), null); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java index 9350729aec2d7..681d70ab1f1a6 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java @@ -19,6 +19,7 @@ import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; import com.facebook.presto.plugin.clp.mockdb.ClpMockMetadataDatabase; import com.facebook.presto.plugin.clp.mockdb.table.ColumnMetadataTableRows; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.SchemaTableName; @@ -29,6 +30,10 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.io.FileNotFoundException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; import java.util.HashSet; import java.util.Optional; @@ -36,6 +41,7 @@ import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Boolean; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.ClpString; @@ -54,7 +60,7 @@ public class TestClpMetadata private ClpMetadata metadata; @BeforeMethod - public void setUp() + public void setUp() throws FileNotFoundException, URISyntaxException { mockMetadataDatabase = ClpMockMetadataDatabase .builder() @@ -83,7 +89,15 @@ public void setUp() .setMetadataDbUser(mockMetadataDatabase.getUsername()) .setMetadataDbPassword(mockMetadataDatabase.getPassword()) .setMetadataTablePrefix(mockMetadataDatabase.getTablePrefix()); - ClpMetadataProvider metadataProvider = new ClpMySqlMetadataProvider(config); + + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); + } + + config.setSplitMetadataConfigPath(Paths.get(resource.toURI()).toAbsolutePath().toString()); + ClpMetadataProvider metadataProvider = + new ClpMySqlMetadataProvider(config, new ClpSplitMetadataConfig(config, createTestFunctionAndTypeManager())); metadata = new ClpMetadata(config, metadataProvider); } @@ -115,6 +129,7 @@ public void testGetTableMetadata() ClpTableHandle clpTableHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME)); ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); ImmutableSet columnMetadata = ImmutableSet.builder() + // data columns .add(ColumnMetadata.builder() .setName("a_bigint") .setType(BIGINT) @@ -150,6 +165,17 @@ public void testGetTableMetadata() RowType.field("h", new ArrayType(VARCHAR)))))))) .setNullable(true) .build()) + // metadata columns + .add(ColumnMetadata.builder() + .setName("level") + .setType(BIGINT) + .setNullable(true) + .build()) + .add(ColumnMetadata.builder() + .setName("author") + .setType(VARCHAR) + .setNullable(true) + .build()) .build(); assertEquals(columnMetadata, ImmutableSet.copyOf(tableMetadata.getColumns())); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMySqlSplitMetadataExpressionConverter.java new file mode 100644 index 0000000000000..784d0938cdc38 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMySqlSplitMetadataExpressionConverter.java @@ -0,0 +1,156 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp; + +import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.sql.planner.TypeProvider; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static java.lang.String.format; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +@Test(singleThreaded = true) +public class TestClpMySqlSplitMetadataExpressionConverter + extends TestClpQueryBase +{ + private TypeProvider typeProvider; + private ClpSplitMetadataConfig splitMetadataConfig; + private ClpMySqlSplitMetadataExpressionConverter converter; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); + } + + typeProvider = TypeProvider.viewOf( + ImmutableMap.of("msg.timestamp", BIGINT, "begin_timestamp", BIGINT, "end_timestamp", BIGINT)); + + ClpConfig config = new ClpConfig(); + config.setSplitMetadataConfigPath(Paths.get(resource.toURI()).toAbsolutePath().toString()); + splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + + SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + converter = new ClpMySqlSplitMetadataExpressionConverter( + functionAndTypeManager, + standardFunctionResolution, + splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), + splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), + splitMetadataConfig.getRequiredColumns(schemaTableName)); + } + + @Test + public void checkRequiredColumns() + { + SessionHolder sessionHolder = new SessionHolder(); + SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + + ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( + functionAndTypeManager, + standardFunctionResolution, + splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), + splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), + splitMetadataConfig.getRequiredColumns(schemaTableName)); + + TypeProvider typeProvider = TypeProvider.viewOf(ImmutableMap.of("level", BIGINT, "msg.timestamp", BIGINT)); + assertThrows(PrestoException.class, () + -> converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", typeProvider, sessionHolder))); + + converter.transform( + getRowExpression("(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)", typeProvider, sessionHolder)); + } + + @Test + public void remapSplitFilterPushDownExpression() + { + // Integer + testRange(1234, 5678); + testRange(-5678, -1234); + + // Decimal + testRange(1234.001, 5678.999); + testRange(-5678.999, -1234.001); + + // Scientific + testRange("1.234E3", "5.678e3", 1234.0, 5678.0); + testRange("-1.234e-3", "-5.678E-3", -0.001234, -0.005678); + } + + private void testRange(T lowerBound, T upperBound) + { + testRange(lowerBound, upperBound, lowerBound, upperBound); + } + + private void testRange(T lowerBound, T upperBound, T expectedLowerBound, T expectedUpperBound) + { + SessionHolder sessionHolder = new SessionHolder(); + + String remappedSql1 = converter.transform( + getRowExpression( + format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql1, + format("(end_timestamp > %s) AND (begin_timestamp < %s)", expectedLowerBound, expectedUpperBound)); + + String remappedSql2 = converter.transform( + getRowExpression( + format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql2, + format("(end_timestamp >= %s) AND (begin_timestamp <= %s)", expectedLowerBound, expectedUpperBound)); + + String remappedSql3 = converter.transform( + getRowExpression( + format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql3, + format("(end_timestamp > %s) AND (begin_timestamp <= %s)", expectedLowerBound, expectedUpperBound)); + + String remappedSql4 = converter.transform( + getRowExpression( + format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql4, + format("(end_timestamp >= %s) AND (begin_timestamp < %s)", expectedLowerBound, expectedUpperBound)); + + String remappedSql5 = converter.transform( + getRowExpression(format("(\"msg.timestamp\" = %s)", lowerBound), typeProvider, sessionHolder)); + assertEquals( + remappedSql5, + format("(begin_timestamp <= %s) AND (end_timestamp >= %s)", expectedLowerBound, expectedLowerBound)); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java index ee259d67e17c9..adebc714b91e6 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java @@ -115,6 +115,11 @@ protected RowExpression toRowExpression(Expression expression, TypeProvider type return SqlToRowExpressionTranslator.translate(expression, expressionTypes, ImmutableMap.of(), functionAndTypeManager, session); } + protected RowExpression getRowExpression(String sqlExpression, TypeProvider typeProvider, SessionHolder sessionHolder) + { + return toRowExpression(expression(sqlExpression), typeProvider, sessionHolder.getSession()); + } + protected RowExpression getRowExpression(String sqlExpression, SessionHolder sessionHolder) { return toRowExpression(expression(sqlExpression), typeProvider, sessionHolder.getSession()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java index 60ce10b871cb5..4ee73f9680de6 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java @@ -16,8 +16,10 @@ import com.facebook.presto.plugin.clp.mockdb.ClpMockMetadataDatabase; import com.facebook.presto.plugin.clp.mockdb.table.ArchivesTableRows; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.plugin.clp.split.ClpSplitProvider; import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.sql.planner.TypeProvider; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.testng.annotations.AfterMethod; @@ -28,17 +30,19 @@ import java.util.Map; import java.util.Optional; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static org.testng.Assert.assertEquals; @Test(singleThreaded = true) public class TestClpSplit + extends TestClpQueryBase { private ClpMockMetadataDatabase mockMetadataDatabase; private ClpSplitProvider clpSplitProvider; + private TypeProvider typeProvider; private Map tableSplits; @BeforeMethod @@ -80,7 +84,12 @@ public void setUp() .setMetadataDbUser(mockMetadataDatabase.getUsername()) .setMetadataDbPassword(mockMetadataDatabase.getPassword()) .setMetadataTablePrefix(mockMetadataDatabase.getTablePrefix()); - clpSplitProvider = new ClpMySqlSplitProvider(config); + clpSplitProvider = new ClpMySqlSplitProvider( + config, + functionAndTypeManager, + standardFunctionResolution, + new ClpSplitMetadataConfig(config, functionAndTypeManager)); + typeProvider = TypeProvider.viewOf(ImmutableMap.of("begin_timestamp", BIGINT, "end_timestamp", BIGINT)); } @AfterMethod @@ -135,12 +144,13 @@ private void compareListSplitsResult( Optional metadataSql, List expectedSplitIndexes) { + final String storageBase = "/tmp/archives/"; String tableName = entry.getKey(); - String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName; + String tablePath = storageBase + tableName; ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath), Optional.empty(), - metadataSql); + metadataSql.map(sql -> getRowExpression(sql, typeProvider, new SessionHolder()))); List expectedSplitPaths = expectedSplitIndexes.stream() .map(expectedSplitIndex -> format("%s/%s", tablePath, entry.getValue().getIds().get(expectedSplitIndex))) .collect(toImmutableList()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfig.java new file mode 100644 index 0000000000000..7f8d2dd483411 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfig.java @@ -0,0 +1,121 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp; + +import com.facebook.presto.common.type.Type; +import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.sql.planner.TypeProvider; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; +import java.util.Map; +import java.util.Set; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static java.util.UUID.randomUUID; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestClpSplitMetadataConfig + extends TestClpQueryBase +{ + private String splitMetadataConfigPath; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); + } + + splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + } + + @Test + public void checkRequiredColumns() + { + SessionHolder sessionHolder = new SessionHolder(); + ClpConfig config = new ClpConfig(); + config.setSplitMetadataConfigPath(splitMetadataConfigPath); + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + + ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( + functionAndTypeManager, + standardFunctionResolution, + splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), + splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), + splitMetadataConfig.getRequiredColumns(schemaTableName)); + + TypeProvider typeProvider = TypeProvider.viewOf(ImmutableMap.of("level", BIGINT, "msg.timestamp", BIGINT)); + assertThrows(PrestoException.class, () + -> converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", typeProvider, sessionHolder))); + + converter.transform( + getRowExpression("(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)", typeProvider, sessionHolder)); + } + + @Test + public void getMetadataColumnNames() + { + ClpConfig config = new ClpConfig(); + config.setSplitMetadataConfigPath(splitMetadataConfigPath); + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + + Map columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("other", "test")); + assertEquals(ImmutableMap.of("level", BIGINT), columns); + columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "table_2")); + assertEquals(ImmutableMap.of("level", BIGINT, "author", VARCHAR), columns); + columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "table_1")); + assertEquals(columns, ImmutableMap.of( + "level", BIGINT, + "author", VARCHAR, + "begin_timestamp", BIGINT, + "end_timestamp", BIGINT, + "file_name", VARCHAR)); + Set dataColumnsWithRangeBounds = + splitMetadataConfig.getDataColumnsWithRangeBounds(new SchemaTableName("default", "table_1")); + assertEquals(dataColumnsWithRangeBounds, ImmutableSet.of("msg.timestamp")); + } + + @Test + public void handleEmptyAndInvalidSplitMetadataConfig() + { + ClpConfig config = new ClpConfig(); + + // Empty config + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + assertTrue(splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "clp")).isEmpty()); + assertTrue(splitMetadataConfig.getMetadataColumns(new SchemaTableName("abc", "xyz")).isEmpty()); + assertTrue(splitMetadataConfig.getMetadataColumns(new SchemaTableName("abc.opq", "xyz")).isEmpty()); + + // Invalid config + config.setSplitMetadataConfigPath(randomUUID().toString()); + assertThrows(PrestoException.class, () -> new ClpSplitMetadataConfig(config, functionAndTypeManager)); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index 3b83d1c95d10b..ddfacbc285f8e 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -21,10 +21,11 @@ import com.facebook.presto.cost.StatsProvider; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.plugin.clp.mockdb.ClpMockMetadataDatabase; +import com.facebook.presto.plugin.clp.mockdb.table.ColumnMetadataTableRows; import com.facebook.presto.plugin.clp.optimization.ClpComputePushDown; import com.facebook.presto.plugin.clp.optimization.ClpUdfRewriter; -import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.VariableAllocator; @@ -45,7 +46,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import org.apache.commons.math3.util.Pair; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -60,13 +60,6 @@ import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.metadata.FunctionExtractor.extractFunctions; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_PASSWORD; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_TABLE_PREFIX; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_URL_TEMPLATE; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_USER; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.getDbHandle; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupMetadata; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Boolean; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.ClpString; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Float; @@ -80,7 +73,6 @@ import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.node; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.project; import static com.facebook.presto.testing.TestingSession.testSessionBuilder; -import static java.lang.String.format; @Test(singleThreaded = true) public class TestClpUdfRewriter @@ -91,44 +83,49 @@ public class TestClpUdfRewriter .setSchema(ClpMetadata.DEFAULT_SCHEMA_NAME) .build(); - private ClpMetadataDbSetUp.DbHandle dbHandle; + private ClpMockMetadataDatabase mockMetadataDatabase; ClpTableHandle table; private LocalQueryRunner localQueryRunner; private FunctionAndTypeManager functionAndTypeManager; private FunctionResolution functionResolution; - private ClpSplitFilterProvider splitFilterProvider; + private ClpSplitMetadataConfig splitMetadataConfig; private PlanNodeIdAllocator planNodeIdAllocator; private VariableAllocator variableAllocator; @BeforeMethod public void setUp() { - dbHandle = getDbHandle("metadata_query_testdb"); final String tableName = "test"; - final String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName; - table = new ClpTableHandle(new SchemaTableName("default", tableName), tablePath); - - setupMetadata(dbHandle, - ImmutableMap.of( - tableName, + table = new ClpTableHandle(new SchemaTableName("default", tableName), "test"); + mockMetadataDatabase = ClpMockMetadataDatabase.builder().build(); + mockMetadataDatabase.addTableToDatasetsTableIfNotExist(ImmutableList.of(tableName)); + mockMetadataDatabase.addColumnMetadata(ImmutableMap.of( + tableName, + new ColumnMetadataTableRows( + ImmutableList.of( + "city.Name", + "city.Region.Id", + "city.Region.Name", + "fare", + "isHoliday"), ImmutableList.of( - new Pair<>("city.Name", ClpString), - new Pair<>("city.Region.Id", Integer), - new Pair<>("city.Region.Name", VarString), - new Pair<>("fare", Float), - new Pair<>("isHoliday", Boolean)))); + ClpString, + Integer, + VarString, + Float, + Boolean)))); localQueryRunner = new LocalQueryRunner(defaultSession); localQueryRunner.createCatalog("clp", new ClpConnectorFactory(), ImmutableMap.of( - "clp.metadata-db-url", format(METADATA_DB_URL_TEMPLATE, dbHandle.getDbPath()), - "clp.metadata-db-user", METADATA_DB_USER, - "clp.metadata-db-password", METADATA_DB_PASSWORD, - "clp.metadata-table-prefix", METADATA_DB_TABLE_PREFIX)); + "clp.metadata-db-url", mockMetadataDatabase.getUrl(), + "clp.metadata-db-user", mockMetadataDatabase.getUsername(), + "clp.metadata-db-password", mockMetadataDatabase.getPassword(), + "clp.metadata-table-prefix", mockMetadataDatabase.getTablePrefix())); localQueryRunner.getMetadata().registerBuiltInFunctions(extractFunctions(new ClpPlugin().getFunctions())); functionAndTypeManager = localQueryRunner.getMetadata().getFunctionAndTypeManager(); functionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); - splitFilterProvider = new ClpMySqlSplitFilterProvider(new ClpConfig()); + splitMetadataConfig = new ClpSplitMetadataConfig(new ClpConfig(), functionAndTypeManager); planNodeIdAllocator = new PlanNodeIdAllocator(); variableAllocator = new VariableAllocator(); } @@ -137,7 +134,9 @@ public void setUp() public void tearDown() { localQueryRunner.close(); - ClpMetadataDbSetUp.tearDown(dbHandle); + if (null != mockMetadataDatabase) { + mockMetadataDatabase.teardown(); + } } @Test @@ -153,7 +152,7 @@ public void testClpGetScanFilter() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( @@ -192,7 +191,7 @@ public void testClpGetScanProject() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( @@ -241,7 +240,7 @@ public void testClpGetScanProjectFilter() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( @@ -278,7 +277,7 @@ public void testClpGetJsonString() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java deleted file mode 100644 index 27d9c86ee0092..0000000000000 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Paths; - -import static java.lang.String.format; -import static org.testng.Assert.assertEquals; - -@Test(singleThreaded = true) -public class TestClpMySqlSplitFilterConfig -{ - private String filterConfigPath; - - @BeforeMethod - public void setUp() throws IOException, URISyntaxException - { - URL resource = getClass().getClassLoader().getResource("test-mysql-split-filter.json"); - if (resource == null) { - throw new FileNotFoundException("test-mysql-split-filter.json not found in resources"); - } - - filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); - } - - @Test - public void remapSplitFilterPushDownExpression() - { - ClpConfig config = new ClpConfig(); - config.setSplitFilterConfig(filterConfigPath); - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - - // Integer - testRange(1234, 5678, filterProvider); - testRange(-5678, -1234, filterProvider); - - // Decimal - testRange(1234.001, 5678.999, filterProvider); - testRange(-5678.999, -1234.001, filterProvider); - - // Scientific - testRange("1.234E3", "5.678e3", filterProvider); - testRange("-1.234e-3", "-5.678E-3", filterProvider); - } - - private void testRange(T lowerBound, T upperBound, ClpMySqlSplitFilterProvider filterProvider) - { - String splitFilterSql1 = format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound); - String remappedSql1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql1); - assertEquals(remappedSql1, format("(end_timestamp > %s AND begin_timestamp < %s)", lowerBound, upperBound)); - - String splitFilterSql2 = format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound); - String remappedSql2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql2); - assertEquals(remappedSql2, format("(end_timestamp >= %s AND begin_timestamp <= %s)", lowerBound, upperBound)); - - String splitFilterSql3 = format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound); - String remappedSql3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql3); - assertEquals(remappedSql3, format("(end_timestamp > %s AND begin_timestamp <= %s)", lowerBound, upperBound)); - - String splitFilterSql4 = format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound); - String remappedSql4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql4); - assertEquals(remappedSql4, format("(end_timestamp >= %s AND begin_timestamp < %s)", lowerBound, upperBound)); - - String splitFilterSql5 = format("(\"msg.timestamp\" = %s)", lowerBound); - String remappedSql5 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql5); - assertEquals(remappedSql5, format("((begin_timestamp <= %s AND end_timestamp >= %s))", lowerBound, lowerBound)); - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java deleted file mode 100644 index 7a4058f617d0c..0000000000000 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import com.facebook.presto.spi.PrestoException; -import com.facebook.presto.spi.SchemaTableName; -import com.google.common.collect.ImmutableSet; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Paths; -import java.util.Set; - -import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; -import static java.lang.String.format; -import static java.util.UUID.randomUUID; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertThrows; -import static org.testng.Assert.assertTrue; - -@Test(singleThreaded = true) -public class TestClpSplitFilterConfigCommon -{ - private String filterConfigPath; - - @BeforeMethod - public void setUp() throws IOException, URISyntaxException - { - URL resource = getClass().getClassLoader().getResource("test-mysql-split-filter.json"); - if (resource == null) { - throw new FileNotFoundException("test-mysql-split-filter.json not found in resources"); - } - - filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); - } - - @Test - public void checkRequiredFilters() - { - ClpConfig config = new ClpConfig(); - config.setSplitFilterConfig(filterConfigPath); - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1"))); - assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( - testTableScopeSet, - "(\"level\" >= 1 AND \"level\" <= 3)")); - filterProvider.checkContainsRequiredFilters( - testTableScopeSet, - "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"); - } - - @Test - public void getFilterNames() - { - ClpConfig config = new ClpConfig(); - config.setSplitFilterConfig(filterConfigPath); - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - Set catalogFilterNames = filterProvider.getColumnNames("clp"); - assertEquals(ImmutableSet.of("level"), catalogFilterNames); - Set schemaFilterNames = filterProvider.getColumnNames("clp.default"); - assertEquals(ImmutableSet.of("level", "author"), schemaFilterNames); - Set tableFilterNames = filterProvider.getColumnNames("clp.default.table_1"); - assertEquals(ImmutableSet.of("level", "author", "msg.timestamp", "file_name"), tableFilterNames); - } - - @Test - public void handleEmptyAndInvalidSplitFilterConfig() - { - ClpConfig config = new ClpConfig(); - - // Empty config - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - assertTrue(filterProvider.getColumnNames("clp").isEmpty()); - assertTrue(filterProvider.getColumnNames("abc.xyz").isEmpty()); - assertTrue(filterProvider.getColumnNames("abc.opq.xyz").isEmpty()); - - // Invalid config - config.setSplitFilterConfig(randomUUID().toString()); - assertThrows(PrestoException.class, () -> new ClpMySqlSplitFilterProvider(config)); - } -} diff --git a/presto-clp/src/test/resources/test-mysql-split-filter.json b/presto-clp/src/test/resources/test-mysql-split-filter.json deleted file mode 100644 index ea5be310aff63..0000000000000 --- a/presto-clp/src/test/resources/test-mysql-split-filter.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "clp": [ - { - "columnName": "level" - } - ], - "clp.default": [ - { - "columnName": "author" - } - ], - "clp.default.table_1": [ - { - "columnName": "msg.timestamp", - "customOptions": { - "rangeMapping": { - "lowerBound": "begin_timestamp", - "upperBound": "end_timestamp" - } - }, - "required": true - }, - { - "columnName": "file_name" - } - ] -} diff --git a/presto-clp/src/test/resources/test-mysql-split-metadata.json b/presto-clp/src/test/resources/test-mysql-split-metadata.json new file mode 100644 index 0000000000000..94c1e234ff717 --- /dev/null +++ b/presto-clp/src/test/resources/test-mysql-split-metadata.json @@ -0,0 +1,44 @@ +{ + "": { + "metaColumns": { + "level": { + "type": "BIGINT" + } + } + }, + "default": { + "metaColumns": { + "author": { + "type": "VARCHAR" + } + } + }, + "default.table_1": { + "metaColumns": { + "begin_timestamp": { + "type": "BIGINT", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "lower" + } + }, + "end_timestamp": { + "type": "BIGINT", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "upper" + } + }, + "file_name": { + "type": "VARCHAR" + } + }, + "filterRules": [ + { + "column": "msg.timestamp", + "required": true, + "reason": "Full scan would be too expensive without timestamp filtering." + } + ] + } +} diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index fc1631c3766f8..ab1e41930deb6 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -78,8 +78,8 @@ Property Name Description ``clp.metadata-refresh-interval`` Specifies how frequently metadata is refreshed from the source, in 60 seconds. Set this to a lower value for frequently changing datasets or to a higher value to reduce load. -``clp.split-filter-config`` The absolute path to an optional split filter config file. See the - :ref:`Split Filter Config File` section for +``clp.split-metadata-config-path`` The absolute path to an optional split metadata config file. See the + :ref:`Split Metadata Config File` section for details. ``clp.split-filter-provider-type`` Specifies the split filter provider type. Currently, the only supported ``mysql`` type is a MySQL database, which is also used by the CLP package to @@ -103,120 +103,176 @@ If you prefer to use a different source--or the same source with a custom implem implementations of the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector accordingly. -.. _split-filter-config-file: +.. _split-metadata-config-file: -*************************** -Split Filter Config File -*************************** +************************** +Split Metadata Config File +************************** -The split filter config file allows you to configure the set of columns that can be used to filter out irrelevant -splits (CLP archives) when querying CLP's metadata database. This can significantly improve performance by reducing the -amount of data that needs to be scanned. For a given query, the connector will translate any supported filter predicates -that involve the configured columns into a query against CLP's metadata database. +The split metadata config file allows you to configure the set of metadata columns that can be used in ``WHERE`` clause +to filter out irrelevant splits (CLP archives) when querying CLP's metadata database. This can significantly improve +performance by reducing the amount of data that needs to be scanned. For a given query, the connector will translate +any supported filter predicates that involve the configured columns into a query against CLP's metadata database. -The configuration is a JSON object where each key under the root represents a :ref:`scope` and each scope maps -to an array of :ref:`filter configs`. +Structure Overview +================== -.. _scopes: +The configuration is a JSON object where each top-level key represents a namespace. Each namespace defines metadata and +filtering behavior for one or more tables. -Scopes -====== +A *namespace* can be one of the following: -A *scope* can be one of the following: +- An empty string ``""`` (applies globally to all schemas and tables) +- A schema name (applies to all tables in the schema) +- A schema and table name delimited by ``"."`` (applies only to that table) -- A catalog name -- A fully-qualified schema name -- A fully-qualified table name +When resolving configuration for a specific table: -Filter configs under a particular scope will apply to all child scopes. For example, filter configs at the schema level -will apply to all tables within that schema. +1. The connector merges definitions from matching namespaces in this order: -.. _filter-configs: + - ``""`` (global) + - ``"schema"`` + - ``"schema.table"`` -Filter Configs -============== +2. Later (more specific) namespaces **override** or **extend** parent configurations. -Each filter config indicates how a *data column*---i.e., a column in the Presto table---should be mapped to one or -more *metadata columns*---i.e., columns in CLP's metadata database. +3. Duplicate filter rules are deduplicated by ``column`` name. -For example, an integer data column (e.g., ``timestamp``), may be remapped to a pair of metadata columns that represent -the range of possible values (e.g., ``begin_timestamp`` and ``end_timestamp``) of the data column within a split. +Namespace Configuration Format +============================== -Each filter config has the following options: +Each namespace maps to an object containing the following fields: -- ``columnName``: The data column's name. +- ``metaColumns``: an object defining metadata columns for this namespace. +- ``filterRules``: an array specifying filters that must or can be used in queries. -- ``customOptions`` *(optional)*: Custom options for a split filter provider. Options for the default split filter - provider (``ClpMySqlSplitFilterProvider``) are :ref:`below`. +Meta Columns +------------ -- ``required`` *(optional, defaults to false)*: Whether the filter **must** be present in the generated metadata query. - If a required filter is missing or cannot be added to the metadata query, the original query will be rejected. +Each entry under ``metaColumns`` describes a single metadata column. Metadata columns represent attributes of data +splits, such as file path, partition date, or timestamp range. The key of each entry is the column name used to filter +splits (e.g. in a metadata filter SQL query). However, we can expose a different name to Presto. -.. _clp-mysql-split-filter-provider-config: +Supported fields: -ClpMySqlSplitFilterProvider-Specific Filter Config ------------------------------------------------ +- ``type``: string, the Presto type of the metadata column (e.g., ``VARCHAR``, ``DATE``, ``BIGINT``) +- ``exposedAs``: string, optional, the name exposed to queries (defaults to the original column name) +- ``description``: string, optional, human-readable description +- ``filter.asRangeBoundOf``: string, optional, the data column name this metadata column bounds +- ``filter.boundType``: string, optional, must be ``"lower"`` or ``"upper"``, indicates bound type -For ``ClpMySqlSplitFilterProvider``, the ``customOptions`` option of the filter config has the following sub-options: +Example:: -- ``rangeMapping`` *(optional)*: an object with the following properties: + "metaColumns": { + "partition_date": { + "type": "DATE", + "exposedAs": "partition_date", + "description": "Logical partition of the data file" + }, + "$ir_path": { + "type": "STRING", + "exposedAs": "tpath", + "description": "Internal file path" + }, + "begin_timestamp": { + "type": "LONG", + "description": "Start of the timestamp range for the file", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "lower" + } + }, + "end_timestamp": { + "type": "LONG", + "description": "End of the timestamp range for the file", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "upper" + } + } + } + +Filter Rules +------------ - .. note:: This option is only valid if the column has a numeric type. +Each entry in ``filterRules`` specifies either a data column that has a range mapping to a metadata column or a metadata +column that must appear in query filters. These rules ensure that queries contain sufficient constraints for efficient +split selection. - - ``lowerBound``: The metadata column that represents the lower bound of values in a split for the data column. - - ``upperBound``: The metadata column that represents the upper bound of values in a split for the data column. +Supported fields: -Filter Config Example ---------------------- +- ``column``: string, the name of the data column or metadata column (as seen by Presto) +- ``required``: boolean, optional, whether this column **must** appear in query filters (default: ``false``) +- ``reason``: string, optional, explanation of why the column is required -The code block shows an example filter config file: +Example:: + + "filterRules": [ + { + "column": "msg.timestamp", + "required": true, + "reason": "Full scan would be too expensive without timestamp filtering." + } + ] + +Complete Example +---------------- + +The code block shows an example of split metadata config file: .. code-block:: json { - "clp": [ - { - "columnName": "level" - } - ], - "clp.default": [ - { - "columnName": "author" - } - ], - "clp.default.table_1": [ - { - "columnName": "msg.timestamp", - "customOptions": { - "rangeMapping": { - "lowerBound": "begin_timestamp", - "upperBound": "end_timestamp" - } + "": { + "metaColumns": { + "global_meta": { + "type": "STRING", + "description": "Global metadata column" + } + }, + "filterRules": [] + }, + "default": { + "metaColumns": { + "partition_date": { + "type": "DATE", + "exposedAs": "partition_date", + "description": "Logical partition of the data file" + } + }, + }, + "default.table_1": { + "metaColumns": { + "begin_timestamp": { + "type": "LONG", + "filter": { "asRangeBoundOf": "msg.timestamp", "boundType": "lower" } }, - "required": true + "end_timestamp": { + "type": "LONG", + "filter": { "asRangeBoundOf": "msg.timestamp", "boundType": "upper" } + } }, - { - "columnName": "file_name" - } - ] + "filterRules": [ + { + "column": "msg.timestamp", + "required": true, + "reason": "Full scan would be too expensive without timestamp filtering." + } + ] + } } -- The first key-value pair adds the following filter configs for all schemas and tables under the ``clp`` catalog: - - - The column ``level`` is used as-is without remapping. - -- The second key-value pair adds the following filter configs for all tables under the ``clp.default`` schema: - - - The column ``author`` is used as-is without remapping. - -- The third key-value pair adds two filter configs for the table ``clp.default.table_1``: +- The empty string namespace ``""`` defines **global metadata** that applies to all schemas and tables. In this case, + ``global_meta`` column is available everywhere. - - The column ``msg.timestamp`` is remapped via a ``rangeMapping`` to the metadata columns ``begin_timestamp`` and - ``end_timestamp``, and is required to exist in every query. - - The column ``file_name`` is used as-is without remapping. +- The schema namespace ``"default"`` defines metadata columns and filters for **all tables in the `default` schema**. + - ``partition_date`` is exposed as ``partition_date`` for query usage. -If you prefer to use a different format for ``customOptions``, you can provide your own implementation of the -``ClpSplitFilterProvider`` interface, and configure the connector accordingly. +- The table-specific namespace ``"default.table_1"`` defines metadata and filter rules for a **single table**: + - ``begin_timestamp`` and ``end_timestamp`` map to the data column ``msg.timestamp`` as lower and upper bounds. + This enables range-based filtering on timestamp values when generating split queries. + - The filter rule for ``msg.timestamp`` is marked as **required**, ensuring that queries without timestamp constraints + are rejected. Supported SQL Expressions =========================