From 68dde54da379f4481645f62869e4da009d65022c Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 27 Oct 2025 10:02:30 -0400 Subject: [PATCH 01/26] refactor progress --- .../clp/split/ClpSplitMetadataConfig.java | 202 ++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java new file mode 100644 index 0000000000000..f06a4b0ee383a --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -0,0 +1,202 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.presto.common.type.Type; +import com.facebook.presto.plugin.clp.ClpConfig; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +public class ClpSplitMetadataConfig +{ + /** Metadata column definition */ + public static class MetaColumn { + public String type; // STRING, DATE, BIGINT + public String exposedAs; // optional logical name + public Filter filter; // optional filter config + public String name; // original column name, assigned during load + + public static class Filter { + public Boolean allowDirectFilter; // default true + public String asRangeBoundOf; // optional: maps to data column + public String boundType; // lower/upper + } + + public String getExposedName() { + return exposedAs != null ? exposedAs : name; + } + } + + /** Filter rule (required filters, etc.) */ + public static class FilterRule { + public String column; + public Boolean required; // default false + public String reason; + } + + /** Per-dataset configuration */ + public static class TableConfig { + public String inherits; + public Map metaColumns = new HashMap<>(); + public List filterRules = new ArrayList<>(); + } + + private final Map rawConfigs = new HashMap<>(); + private final Map resolvedConfigs = new HashMap<>(); + + public ClpSplitMetadataConfig(ClpConfig config) throws Exception { + requireNonNull(config, "config is null"); + + if (null == config.getSplitFilterConfig()) { + return; + } + + ObjectMapper mapper = new ObjectMapper(); + Map map = mapper.readValue(new File(config.getSplitFilterConfig()), new TypeReference<>() {}); + rawConfigs.putAll(map); + validateInheritance(); + } + + /** Detect cycles in inheritance */ + private void validateInheritance() { + for (String key : rawConfigs.keySet()) { + Set visited = new HashSet<>(); + String current = key; + while (rawConfigs.get(current).inherits != null) { + current = rawConfigs.get(current).inherits; + if (visited.contains(current)) { + throw new RuntimeException("Cycle detected in inheritance: " + key); + } + visited.add(current); + } + } + } + + /** Resolve dataset config with nested inheritance */ + public TableConfig getConfig(String dataset) { + if (resolvedConfigs.containsKey(dataset)) { + return resolvedConfigs.get(dataset); + } + + TableConfig config = rawConfigs.get(dataset); + if (config == null) { + // fallback to parent/grandparent by dataset name + int lastDot = dataset.lastIndexOf('.'); + if (lastDot > 0) { + return getConfig(dataset.substring(0, lastDot)); + } + throw new RuntimeException("Dataset config not found: " + dataset); + } + + TableConfig merged = new TableConfig(); + if (config.inherits != null) { + TableConfig parent = getConfig(config.inherits); + merged.metaColumns.putAll(parent.metaColumns); + merged.filterRules.addAll(parent.filterRules); + } + + // Override child metaColumns and filterRules + merged.metaColumns.putAll(config.metaColumns); + merged.filterRules.addAll(config.filterRules); + + // Set default allowDirectFilter = true if not specified + merged.metaColumns.values().forEach(mc -> { + mc.name = mc.name == null ? getKeyForMeta(mc, config) : mc.name; + if (mc.filter == null) mc.filter = new MetaColumn.Filter(); + if (mc.filter.allowDirectFilter == null) mc.filter.allowDirectFilter = true; + }); + + resolvedConfigs.put(dataset, merged); + return merged; + } + + // Helper to assign original name + private String getKeyForMeta(MetaColumn mc, TableConfig datasetConfig) { + for (Map.Entry e : datasetConfig.metaColumns.entrySet()) { + if (e.getValue() == mc) return e.getKey(); + } + return null; + } + + /** Convert metadata columns to Presto types with exposed names */ + public Map resolveMetadataSchema(String dataset) { + TableConfig config = getConfig(dataset); + Map schema = new LinkedHashMap<>(); + for (MetaColumn mc : config.metaColumns.values()) { + String prestoType; + switch (mc.type.toUpperCase()) { + case "STRING": prestoType = "VARCHAR"; break; + case "DATE": prestoType = "DATE"; break; + case "BIGINT": prestoType = "BIGINT"; break; + default: prestoType = "VARCHAR"; break; + } + schema.put(mc.getExposedName(), prestoType); + } + return schema; + } + + /** Generate metadata filter SQL */ + public String generateMetadataFilterSql(String dataset, Map filters) { + TableConfig config = getConfig(dataset); + List conditions = new ArrayList<>(); + Set requiredColumns = new HashSet<>(); + for (FilterRule rule : config.filterRules) { + if (Boolean.TRUE.equals(rule.required)) requiredColumns.add(rule.column); + } + + for (Map.Entry e : filters.entrySet()) { + String col = e.getKey(); + Object val = e.getValue(); + + // Translate msg.timestamp -> begin_timestamp / end_timestamp + MetaColumn begin = null, end = null; + for (MetaColumn mc : config.metaColumns.values()) { + if (mc.filter != null && col.equals(mc.filter.asRangeBoundOf)) { + if ("lower".equals(mc.filter.boundType)) begin = mc; + if ("upper".equals(mc.filter.boundType)) end = mc; + } + } + if (begin != null && end != null && val instanceof List && ((List) val).size() == 2) { + Object lowerVal = ((List) val).get(0); + Object upperVal = ((List) val).get(1); + conditions.add(begin.name + " >= " + lowerVal); + conditions.add(end.name + " <= " + upperVal); + requiredColumns.remove(col); + } else { + // direct filter on metadata + if (config.metaColumns.containsKey(col)) { + conditions.add(col + " = '" + val + "'"); + requiredColumns.remove(col); + } + } + } + + if (!requiredColumns.isEmpty()) { + throw new RuntimeException("Required filters missing: " + requiredColumns); + } + + return String.join(" AND ", conditions); + } +} From aa85805fe0e9975ebfdf3bad12704c1d355617d3 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 3 Nov 2025 15:32:36 -0500 Subject: [PATCH 02/26] progress --- .../presto/plugin/clp/ClpExpression.java | 8 ++--- .../optimization/ClpFilterToKqlConverter.java | 31 ++++++++++++++++--- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java index e970f9848a9cf..cb5779cece24f 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java @@ -31,9 +31,9 @@ public class ClpExpression // Optional KQL query or column name representing the fully or partially translatable part of the expression. private final Optional pushDownExpression; - // Optional SQL string extracted from the query plan, which is only made of up of columns in + // Optional SQL expression extracted from the query plan, which is only made of up of columns in // CLP's metadata database. - private final Optional metadataSqlQuery; + private final Optional metadataExpression; // The remaining (non-translatable) portion of the RowExpression, if any. private final Optional remainingExpression; @@ -41,7 +41,7 @@ public class ClpExpression public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpression remainingExpression) { this.pushDownExpression = Optional.ofNullable(pushDownExpression); - this.metadataSqlQuery = Optional.ofNullable(metadataSqlQuery); + this.metadataExpression = Optional.ofNullable(metadataSqlQuery); this.remainingExpression = Optional.ofNullable(remainingExpression); } @@ -92,7 +92,7 @@ public Optional getPushDownExpression() public Optional getMetadataSqlQuery() { - return metadataSqlQuery; + return metadataExpression; } public Optional getRemainingExpression() diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index f99369b4a48bd..9371585fa1795 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -32,6 +32,7 @@ import com.facebook.presto.spi.relation.RowExpressionVisitor; import com.facebook.presto.spi.relation.SpecialFormExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; @@ -259,10 +260,32 @@ private ClpExpression handleBetween(CallExpression node) String lowerBound = getLiteralString((ConstantExpression) second); String upperBound = getLiteralString((ConstantExpression) third); String kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); - String metadataSqlQuery = metadataFilterColumns.contains(variable) - ? String.format("\"%s\" >= %s AND \"%s\" <= %s", variable, lowerBound, variable, upperBound) - : null; - return new ClpExpression(kql, metadataSqlQuery); + if (metadataFilterColumns.contains(variable)) { + VariableReferenceExpression varExpr = + new VariableReferenceExpression(first.getSourceLocation(), variable, first.getType()); + ConstantExpression lower = (ConstantExpression) second; + ConstantExpression upper = (ConstantExpression) third; + + // Build expression: (var >= lower) AND (var <= upper) + RowExpression lowerBoundExpr = new CallExpression( + GREATER_THAN_OR_EQUAL.name(), + standardFunctionResolution.comparisonFunction(GREATER_THAN_OR_EQUAL, varExpr.getType(), lower.getType()), + BOOLEAN, + ImmutableList.of(varExpr, lower)); + RowExpression upperBoundExpr = new CallExpression( + LESS_THAN_OR_EQUAL.name(), + standardFunctionResolution.comparisonFunction(LESS_THAN_OR_EQUAL, varExpr.getType(), upper.getType()), + BOOLEAN, + ImmutableList.of(varExpr, upper)); + RowExpression metadataExpr = new SpecialFormExpression( + AND, + BOOLEAN, + lowerBoundExpr, + upperBoundExpr); + + return new ClpExpression(kql, metadataExpr); + } + return new ClpExpression(kql); } /** From 71647677a419bee031194abcc1987e4b41aeaac3 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 4 Nov 2025 12:46:21 -0500 Subject: [PATCH 03/26] finish refactoring metadata expression generation --- presto-clp/pom.xml | 12 + .../facebook/presto/plugin/clp/ClpConfig.java | 18 -- .../facebook/presto/plugin/clp/ClpModule.java | 13 +- .../clp/optimization/ClpComputePushDown.java | 15 +- .../clp/{ => optimization}/ClpExpression.java | 16 +- .../optimization/ClpFilterToKqlConverter.java | 161 ++++++------ .../clp/split/ClpSplitMetadataConfig.java | 241 ++++++++---------- .../filter/ClpMySqlSplitFilterProvider.java | 155 ----------- .../split/filter/ClpSplitFilterConfig.java | 45 ---- ...FilterConfigCustomOptionsDeserializer.java | 49 ---- .../split/filter/ClpSplitFilterProvider.java | 171 ------------- .../presto/plugin/clp/TestClpFilterToKql.java | 7 +- 12 files changed, 219 insertions(+), 684 deletions(-) rename presto-clp/src/main/java/com/facebook/presto/plugin/clp/{ => optimization}/ClpExpression.java (83%) delete mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java delete mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java delete mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java delete mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index d13b59968aff5..54d9f8bc7d9d4 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -11,6 +11,18 @@ presto-clp Presto - CLP Connector + + + + org.apache.maven.plugins + maven-compiler-plugin + + 11 + 11 + + + + presto-plugin diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 121eb0d5ff17b..1e628c201a58b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -34,7 +34,6 @@ public class ClpConfig private long metadataExpireInterval = 600; private String splitFilterConfig; - private SplitFilterProviderType splitFilterProviderType = SplitFilterProviderType.MYSQL; private SplitProviderType splitProviderType = SplitProviderType.MYSQL; public boolean isPolymorphicTypeEnabled() @@ -163,18 +162,6 @@ public ClpConfig setSplitFilterConfig(String splitFilterConfig) return this; } - public SplitFilterProviderType getSplitFilterProviderType() - { - return splitFilterProviderType; - } - - @Config("clp.split-filter-provider-type") - public ClpConfig setSplitFilterProviderType(SplitFilterProviderType splitFilterProviderType) - { - this.splitFilterProviderType = splitFilterProviderType; - return this; - } - public SplitProviderType getSplitProviderType() { return splitProviderType; @@ -192,11 +179,6 @@ public enum MetadataProviderType MYSQL } - public enum SplitFilterProviderType - { - MYSQL - } - public enum SplitProviderType { MYSQL diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java index bf801d0d87242..ceb613d8f83e0 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java @@ -17,19 +17,16 @@ import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.plugin.clp.split.ClpSplitProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.PrestoException; import com.google.inject.Binder; import com.google.inject.Scopes; import static com.facebook.airlift.configuration.ConfigBinder.configBinder; import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType; -import static com.facebook.presto.plugin.clp.ClpConfig.SplitFilterProviderType; import static com.facebook.presto.plugin.clp.ClpConfig.SplitProviderType; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_METADATA_SOURCE; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_SPLIT_SOURCE; public class ClpModule @@ -42,17 +39,11 @@ protected void setup(Binder binder) binder.bind(ClpMetadata.class).in(Scopes.SINGLETON); binder.bind(ClpRecordSetProvider.class).in(Scopes.SINGLETON); binder.bind(ClpSplitManager.class).in(Scopes.SINGLETON); + binder.bind(ClpSplitMetadataConfig.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(ClpConfig.class); ClpConfig config = buildConfigObject(ClpConfig.class); - if (SplitFilterProviderType.MYSQL == config.getSplitFilterProviderType()) { - binder.bind(ClpSplitFilterProvider.class).to(ClpMySqlSplitFilterProvider.class).in(Scopes.SINGLETON); - } - else { - throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE, "Unsupported split filter provider type: " + config.getSplitFilterProviderType()); - } - if (config.getMetadataProviderType() == MetadataProviderType.MYSQL) { binder.bind(ClpMetadataProvider.class).to(ClpMySqlMetadataProvider.class).in(Scopes.SINGLETON); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 2c216614af10f..9bbcebd20949c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -14,9 +14,9 @@ package com.facebook.presto.plugin.clp.optimization; import com.facebook.airlift.log.Logger; -import com.facebook.presto.plugin.clp.ClpExpression; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorPlanOptimizer; @@ -50,13 +50,13 @@ public class ClpComputePushDown private static final Logger log = Logger.get(ClpComputePushDown.class); private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpSplitFilterProvider splitFilterProvider; + private final ClpSplitMetadataConfig metadataConfig; - public ClpComputePushDown(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) + public ClpComputePushDown(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitMetadataConfig metadataConfig) { this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.splitFilterProvider = requireNonNull(splitFilterProvider, "splitFilterProvider is null"); + this.metadataConfig = requireNonNull(metadataConfig, "metadataConfig is null"); } @Override @@ -112,7 +112,6 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString(); Map assignments = tableScanNode.getAssignments(); ClpExpression clpExpression = filterNode.getPredicate().accept( @@ -120,16 +119,16 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod functionResolution, functionManager, assignments, - splitFilterProvider.getColumnNames(tableScope)), + metadataConfig.getMetadataColumns(clpTableHandle.getSchemaTableName()).keySet()), null); Optional kqlQuery = clpExpression.getPushDownExpression(); - Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery(); + Optional metadataExpression = clpExpression.getMetadataExpression(); Optional remainingPredicate = clpExpression.getRemainingExpression(); // Perform required metadata filter checks before handling the KQL query (if kqlQuery // isn't present, we'll return early, skipping subsequent checks). splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse("")); - boolean hasMetadataFilter = metadataSqlQuery.isPresent() && !metadataSqlQuery.get().isEmpty(); + boolean hasMetadataFilter = metadataSqlQuery.isPresent(); if (hasMetadataFilter) { metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get())); log.debug("Metadata SQL query: %s", metadataSqlQuery.get()); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java similarity index 83% rename from presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java index cb5779cece24f..547f602071f2c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java @@ -11,7 +11,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.plugin.clp; +package com.facebook.presto.plugin.clp.optimization; import com.facebook.presto.spi.relation.RowExpression; @@ -33,15 +33,15 @@ public class ClpExpression // Optional SQL expression extracted from the query plan, which is only made of up of columns in // CLP's metadata database. - private final Optional metadataExpression; + private final Optional metadataExpression; // The remaining (non-translatable) portion of the RowExpression, if any. private final Optional remainingExpression; - public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpression remainingExpression) + public ClpExpression(String pushDownExpression, RowExpression metadataExpression, RowExpression remainingExpression) { this.pushDownExpression = Optional.ofNullable(pushDownExpression); - this.metadataExpression = Optional.ofNullable(metadataSqlQuery); + this.metadataExpression = Optional.ofNullable(metadataExpression); this.remainingExpression = Optional.ofNullable(remainingExpression); } @@ -68,11 +68,11 @@ public ClpExpression(String pushDownExpression) * metadata SQL string. * * @param pushDownExpression - * @param metadataSqlQuery + * @param metadataExpression */ - public ClpExpression(String pushDownExpression, String metadataSqlQuery) + public ClpExpression(String pushDownExpression, RowExpression metadataExpression) { - this(pushDownExpression, metadataSqlQuery, null); + this(pushDownExpression, metadataExpression, null); } /** @@ -90,7 +90,7 @@ public Optional getPushDownExpression() return pushDownExpression; } - public Optional getMetadataSqlQuery() + public Optional getMetadataExpression() { return metadataExpression; } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index 9371585fa1795..d07e7aedc86ff 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -19,7 +19,6 @@ import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.plugin.clp.ClpColumnHandle; -import com.facebook.presto.plugin.clp.ClpExpression; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.FunctionHandle; @@ -61,6 +60,7 @@ import static com.facebook.presto.common.type.TinyintType.TINYINT; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND; +import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.OR; import static java.lang.Integer.parseInt; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -310,8 +310,8 @@ private ClpExpression handleNot(CallExpression node) return new ClpExpression(node); } String notPushDownExpression = "NOT " + expression.getPushDownExpression().get(); - if (expression.getMetadataSqlQuery().isPresent()) { - return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery()); + if (expression.getMetadataExpression().isPresent()) { + return new ClpExpression(notPushDownExpression, node); } else { return new ClpExpression(notPushDownExpression); @@ -462,35 +462,27 @@ private ClpExpression buildClpExpression( Type literalType, RowExpression originalNode) { - String metadataSqlQuery = null; + boolean isVarchar = literalType instanceof VarcharType; + boolean isMetadataColumn = metadataFilterColumns.contains(variableName); + String formattedLiteral = isVarchar + ? "\"" + escapeKqlSpecialCharsForStringValue(literalString) + "\"" + : literalString; + + String expression = null; if (operator.equals(EQUAL)) { - if (literalType instanceof VarcharType) { - return new ClpExpression(format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); - } - else { - if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("\"%s\" = %s", variableName, literalString); - } - return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery); - } + expression = format("%s: %s", variableName, formattedLiteral); } else if (operator.equals(NOT_EQUAL)) { - if (literalType instanceof VarcharType) { - return new ClpExpression(format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); - } - else { - if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("NOT \"%s\" = %s", variableName, literalString); - } - return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery); - } + expression = format("NOT %s: %s", variableName, formattedLiteral); } - else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { - if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString); - } - return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery); + else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !isVarchar) { + expression = format("%s %s %s", variableName, operator.getOperator(), literalString); + } + + if (expression != null) { + return new ClpExpression(expression, isMetadataColumn ? originalNode : null); } + return new ClpExpression(originalNode); } @@ -688,49 +680,49 @@ private Optional parseLengthLiteral(RowExpression lengthExpression, Str */ private ClpExpression handleAnd(SpecialFormExpression node) { - StringBuilder metadataQueryBuilder = new StringBuilder(); - metadataQueryBuilder.append("("); - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); + List pushdownKql = new ArrayList<>(); List remainingExpressions = new ArrayList<>(); - boolean hasMetadataSql = false; - boolean hasPushDownExpression = false; + List metadataExpressions = new ArrayList<>(); + for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); if (expression.getPushDownExpression().isPresent()) { - hasPushDownExpression = true; - queryBuilder.append(expression.getPushDownExpression().get()); - queryBuilder.append(" AND "); - if (expression.getMetadataSqlQuery().isPresent()) { - hasMetadataSql = true; - metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); - metadataQueryBuilder.append(" AND "); - } - } - if (expression.getRemainingExpression().isPresent()) { - remainingExpressions.add(expression.getRemainingExpression().get()); + pushdownKql.add(expression.getPushDownExpression().get()); + expression.getMetadataExpression().ifPresent(metadataExpressions::add); } + expression.getRemainingExpression().ifPresent(remainingExpressions::add); } - if (!hasPushDownExpression) { + if (pushdownKql.isEmpty()) { return new ClpExpression(node); } - else if (!remainingExpressions.isEmpty()) { - if (remainingExpressions.size() == 1) { - return new ClpExpression( - queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, - remainingExpressions.get(0)); - } - else { - return new ClpExpression( - queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, - new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions)); - } + + String combinedKql = "(" + String.join(" AND ", pushdownKql) + ")"; + RowExpression combinedMetadataExpression = null; + + if (metadataExpressions.size() == 1) { + combinedMetadataExpression = metadataExpressions.get(0); } - // Remove the last " AND " from the query - return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null); + else if (metadataExpressions.size() > 1) { + combinedMetadataExpression = new SpecialFormExpression( + node.getSourceLocation(), + AND, + BOOLEAN, + metadataExpressions); + } + + RowExpression combinedRemainingExpression = null; + if (remainingExpressions.size() == 1) { + combinedRemainingExpression = remainingExpressions.get(0); + } + else if (remainingExpressions.size() > 1) { + combinedRemainingExpression = new SpecialFormExpression( + node.getSourceLocation(), + AND, + BOOLEAN, + remainingExpressions); + } + + return new ClpExpression(combinedKql, combinedMetadataExpression, combinedRemainingExpression); } /** @@ -747,12 +739,11 @@ else if (!remainingExpressions.isEmpty()) { */ private ClpExpression handleOr(SpecialFormExpression node) { - StringBuilder metadataQueryBuilder = new StringBuilder(); - metadataQueryBuilder.append("("); - StringBuilder queryBuilder = new StringBuilder(); - queryBuilder.append("("); + List pushdownKql = new ArrayList<>(); + List metadataExpressions = new ArrayList<>(); + boolean allPushedDown = true; - boolean hasAllMetadataSql = true; + boolean allHaveMetadataExpression = true; for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); // Note: It is possible in the future that an expression cannot be pushed down as a KQL query, but can be @@ -761,23 +752,37 @@ private ClpExpression handleOr(SpecialFormExpression node) allPushedDown = false; continue; } - queryBuilder.append(expression.getPushDownExpression().get()); - queryBuilder.append(" OR "); - if (hasAllMetadataSql && expression.getMetadataSqlQuery().isPresent()) { - metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); - metadataQueryBuilder.append(" OR "); + + pushdownKql.add(expression.getPushDownExpression().get()); + if (allHaveMetadataExpression && expression.getMetadataExpression().isPresent()) { + metadataExpressions.add(expression.getMetadataExpression().get()); } else { - hasAllMetadataSql = false; + allHaveMetadataExpression = false; } } - if (allPushedDown) { - // Remove the last " OR " from the query - return new ClpExpression( - queryBuilder.substring(0, queryBuilder.length() - 4) + ")", - hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null); + + if (!allPushedDown) { + return new ClpExpression(node); } - return new ClpExpression(node); + + String combinedKql = "(" + String.join(" OR ", pushdownKql) + ")"; + + RowExpression combinedMetadataExpression = null; + if (allHaveMetadataExpression && !metadataExpressions.isEmpty()) { + if (metadataExpressions.size() == 1) { + combinedMetadataExpression = metadataExpressions.get(0); + } + else { + combinedMetadataExpression = new SpecialFormExpression( + node.getSourceLocation(), + OR, + BOOLEAN, + metadataExpressions); + } + } + + return new ClpExpression(combinedKql, combinedMetadataExpression); } /** diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index f06a4b0ee383a..ff6cc329a9b84 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -13,190 +13,155 @@ */ package com.facebook.presto.plugin.clp.split; -import com.facebook.presto.common.type.Type; import com.facebook.presto.plugin.clp.ClpConfig; -import com.fasterxml.jackson.core.type.TypeReference; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; -import java.io.File; +import javax.inject.Inject; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_FILTER_CONFIG_NOT_FOUND; import static java.util.Objects.requireNonNull; public class ClpSplitMetadataConfig { - /** Metadata column definition */ - public static class MetaColumn { - public String type; // STRING, DATE, BIGINT - public String exposedAs; // optional logical name - public Filter filter; // optional filter config - public String name; // original column name, assigned during load - - public static class Filter { - public Boolean allowDirectFilter; // default true - public String asRangeBoundOf; // optional: maps to data column - public String boundType; // lower/upper - } + private final Map tableConfigs = new HashMap<>(); - public String getExposedName() { - return exposedAs != null ? exposedAs : name; + public static class MetaColumn { + public final String name; + public final String type; + public final String exposedAs; + public final String description; + public final String asRangeBoundOf; // optional + public final String boundType; // "lower" or "upper" + + public MetaColumn(String name, JsonNode node) { + this.name = name; + this.type = node.path("type").asText(); + this.exposedAs = node.path("exposedAs").asText(name); + this.description = node.path("description").asText(null); + JsonNode filter = node.path("filter"); + this.asRangeBoundOf = filter.path("asRangeBoundOf").isMissingNode() ? null : filter.path("asRangeBoundOf").asText(); + this.boundType = filter.path("boundType").isMissingNode() ? null : filter.path("boundType").asText(); } } - /** Filter rule (required filters, etc.) */ public static class FilterRule { - public String column; - public Boolean required; // default false - public String reason; + public final String column; + public final boolean required; + public final String reason; + + public FilterRule(JsonNode node) { + this.column = node.path("column").asText(); + this.required = node.path("required").asBoolean(false); + this.reason = node.path("reason").asText(null); + } } - /** Per-dataset configuration */ public static class TableConfig { - public String inherits; - public Map metaColumns = new HashMap<>(); - public List filterRules = new ArrayList<>(); + public final Map metaColumns = new HashMap<>(); + public final List filterRules = new ArrayList<>(); } - private final Map rawConfigs = new HashMap<>(); - private final Map resolvedConfigs = new HashMap<>(); - - public ClpSplitMetadataConfig(ClpConfig config) throws Exception { + @Inject + public ClpSplitMetadataConfig(ClpConfig config) { requireNonNull(config, "config is null"); - if (null == config.getSplitFilterConfig()) { - return; + ObjectMapper mapper = new ObjectMapper(); + JsonNode root; + try { + root = mapper.readTree(Files.readAllBytes(Paths.get(config.getSplitFilterConfig()))); + } + catch (IOException e) { + throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); } - ObjectMapper mapper = new ObjectMapper(); - Map map = mapper.readValue(new File(config.getSplitFilterConfig()), new TypeReference<>() {}); - rawConfigs.putAll(map); - validateInheritance(); - } + for (Iterator it = root.fieldNames(); it.hasNext();) { + String namespace = it.next(); + JsonNode tableNode = root.get(namespace); - /** Detect cycles in inheritance */ - private void validateInheritance() { - for (String key : rawConfigs.keySet()) { - Set visited = new HashSet<>(); - String current = key; - while (rawConfigs.get(current).inherits != null) { - current = rawConfigs.get(current).inherits; - if (visited.contains(current)) { - throw new RuntimeException("Cycle detected in inheritance: " + key); + TableConfig cfg = new TableConfig(); + if (tableNode.has("metaColumns")) { + for (Iterator m = tableNode.get("metaColumns").fieldNames(); m.hasNext();) { + String colName = m.next(); + cfg.metaColumns.put(colName, new MetaColumn(colName, tableNode.get("metaColumns").get(colName))); } - visited.add(current); } - } - } - - /** Resolve dataset config with nested inheritance */ - public TableConfig getConfig(String dataset) { - if (resolvedConfigs.containsKey(dataset)) { - return resolvedConfigs.get(dataset); - } - - TableConfig config = rawConfigs.get(dataset); - if (config == null) { - // fallback to parent/grandparent by dataset name - int lastDot = dataset.lastIndexOf('.'); - if (lastDot > 0) { - return getConfig(dataset.substring(0, lastDot)); + if (tableNode.has("filterRules")) { + for (JsonNode rule : tableNode.get("filterRules")) { + cfg.filterRules.add(new FilterRule(rule)); + } } - throw new RuntimeException("Dataset config not found: " + dataset); + tableConfigs.put(namespace, cfg); } + } - TableConfig merged = new TableConfig(); - if (config.inherits != null) { - TableConfig parent = getConfig(config.inherits); - merged.metaColumns.putAll(parent.metaColumns); - merged.filterRules.addAll(parent.filterRules); + public Map getMetadataColumns(SchemaTableName name) { + TableConfig cfg = getTableConfig(name); + Map result = new LinkedHashMap<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + result.put(c.exposedAs, c.type); } - - // Override child metaColumns and filterRules - merged.metaColumns.putAll(config.metaColumns); - merged.filterRules.addAll(config.filterRules); - - // Set default allowDirectFilter = true if not specified - merged.metaColumns.values().forEach(mc -> { - mc.name = mc.name == null ? getKeyForMeta(mc, config) : mc.name; - if (mc.filter == null) mc.filter = new MetaColumn.Filter(); - if (mc.filter.allowDirectFilter == null) mc.filter.allowDirectFilter = true; - }); - - resolvedConfigs.put(dataset, merged); - return merged; + return result; } - // Helper to assign original name - private String getKeyForMeta(MetaColumn mc, TableConfig datasetConfig) { - for (Map.Entry e : datasetConfig.metaColumns.entrySet()) { - if (e.getValue() == mc) return e.getKey(); + public Map getExposedToOriginalMapping(SchemaTableName name) { + TableConfig cfg = getTableConfig(name); + Map mapping = new HashMap<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + mapping.put(c.exposedAs, c.name); } - return null; + return mapping; } - /** Convert metadata columns to Presto types with exposed names */ - public Map resolveMetadataSchema(String dataset) { - TableConfig config = getConfig(dataset); - Map schema = new LinkedHashMap<>(); - for (MetaColumn mc : config.metaColumns.values()) { - String prestoType; - switch (mc.type.toUpperCase()) { - case "STRING": prestoType = "VARCHAR"; break; - case "DATE": prestoType = "DATE"; break; - case "BIGINT": prestoType = "BIGINT"; break; - default: prestoType = "VARCHAR"; break; + + public Map> getDataColumnRangeMapping(SchemaTableName name) { + TableConfig cfg = getTableConfig(name); + Map> mapping = new HashMap<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + if (c.asRangeBoundOf != null && c.boundType != null) { + mapping + .computeIfAbsent(c.asRangeBoundOf, k -> new HashMap<>()) + .put(c.boundType, c.name); } - schema.put(mc.getExposedName(), prestoType); } - return schema; + return mapping; } - /** Generate metadata filter SQL */ - public String generateMetadataFilterSql(String dataset, Map filters) { - TableConfig config = getConfig(dataset); - List conditions = new ArrayList<>(); - Set requiredColumns = new HashSet<>(); - for (FilterRule rule : config.filterRules) { - if (Boolean.TRUE.equals(rule.required)) requiredColumns.add(rule.column); - } - - for (Map.Entry e : filters.entrySet()) { - String col = e.getKey(); - Object val = e.getValue(); + private TableConfig getTableConfig(SchemaTableName name) { + TableConfig merged = new TableConfig(); - // Translate msg.timestamp -> begin_timestamp / end_timestamp - MetaColumn begin = null, end = null; - for (MetaColumn mc : config.metaColumns.values()) { - if (mc.filter != null && col.equals(mc.filter.asRangeBoundOf)) { - if ("lower".equals(mc.filter.boundType)) begin = mc; - if ("upper".equals(mc.filter.boundType)) end = mc; - } - } - if (begin != null && end != null && val instanceof List && ((List) val).size() == 2) { - Object lowerVal = ((List) val).get(0); - Object upperVal = ((List) val).get(1); - conditions.add(begin.name + " >= " + lowerVal); - conditions.add(end.name + " <= " + upperVal); - requiredColumns.remove(col); - } else { - // direct filter on metadata - if (config.metaColumns.containsKey(col)) { - conditions.add(col + " = '" + val + "'"); - requiredColumns.remove(col); + List namespaces = new ArrayList<>(); + namespaces.add(""); + namespaces.add(name.getSchemaName()); + namespaces.add(name.getSchemaName() + "." + name.getTableName()); + + for (String ns : namespaces) { + TableConfig cfg = tableConfigs.get(ns); + if (cfg != null) { + merged.metaColumns.putAll(cfg.metaColumns); + + for (FilterRule rule : cfg.filterRules) { + boolean exists = merged.filterRules.stream() + .anyMatch(r -> r.column.equals(rule.column)); + if (!exists) { + merged.filterRules.add(rule); + } } } } - if (!requiredColumns.isEmpty()) { - throw new RuntimeException("Required filters missing: " + requiredColumns); - } - - return String.join(" AND ", conditions); + return merged; } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java deleted file mode 100644 index 31d24fd4df71c..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableMap; -import com.google.inject.Inject; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static java.lang.String.format; - -/** - * Implementation for the CLP package's MySQL metadata database. - */ -public class ClpMySqlSplitFilterProvider - extends ClpSplitFilterProvider -{ - @Inject - public ClpMySqlSplitFilterProvider(ClpConfig config) - { - super(config); - } - - /** - * Performs regex-based replacements to rewrite {@code pushDownExpression} according to the - * {@code "rangeMapping"} field in {@link ClpMySqlCustomSplitFilterOptions}. For example: - *
    - *
  • {@code "msg.timestamp" >= 1234} → {@code end_timestamp >= 1234}
  • - *
  • {@code "msg.timestamp" <= 5678} → {@code begin_timestamp <= 5678}
  • - *
  • {@code "msg.timestamp" = 4567} → - * {@code (begin_timestamp <= 4567 AND end_timestamp >= 4567)}
  • - *
- * - * @param scope the filter's scope - * @param pushDownExpression the expression to be rewritten - * @return the rewritten expression - */ - @Override - public String remapSplitFilterPushDownExpression(String scope, String pushDownExpression) - { - String[] splitScope = scope.split("\\."); - - Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0]))); - - if (1 < splitScope.length) { - mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1]))); - } - - if (3 == splitScope.length) { - mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope))); - } - - String remappedSql = pushDownExpression; - for (Map.Entry entry : mappings.entrySet()) { - String key = entry.getKey(); - ClpMySqlCustomSplitFilterOptions.RangeMapping value = entry.getValue(); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(>=?)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), - format("%s $2 $3", value.upperBound)); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(<=?)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), - format("%s $2 $3", value.lowerBound)); - remappedSql = remappedSql.replaceAll( - format("\"(%s)\"\\s(=)\\s(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)", key), - format("(%s <= $3 AND %s >= $3)", value.lowerBound, value.upperBound)); - } - return remappedSql; - } - - @Override - protected Class getCustomSplitFilterOptionsClass() - { - return ClpMySqlCustomSplitFilterOptions.class; - } - - private Map getAllMappingsFromFilters(List filters) - { - return null != filters - ? filters.stream() - .filter(filter -> - filter.customOptions instanceof ClpMySqlCustomSplitFilterOptions && - ((ClpMySqlCustomSplitFilterOptions) filter.customOptions).rangeMapping != null) - .collect(toImmutableMap( - filter -> filter.columnName, - filter -> ((ClpMySqlCustomSplitFilterOptions) filter.customOptions).rangeMapping)) - : ImmutableMap.of(); - } - - /** - * Custom options: - *
    - *
  • {@code rangeMapping} (optional): an object with the following properties: - *
      - *
    • {@code lowerBound}: The numeric metadata column that represents the lower bound - * of values in a split for the numeric data column.
    • - *
    • {@code upperBound}: The numeric metadata column that represents the upper bound - * of values in a split for the numeric data column.
    • - *
    - *
  • - *
- */ - protected static class ClpMySqlCustomSplitFilterOptions - implements CustomSplitFilterOptions - { - @JsonProperty("rangeMapping") - public RangeMapping rangeMapping; - - public static class RangeMapping - { - @JsonProperty("lowerBound") - public String lowerBound; - - @JsonProperty("upperBound") - public String upperBound; - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (!(o instanceof RangeMapping)) { - return false; - } - RangeMapping that = (RangeMapping) o; - return Objects.equals(lowerBound, that.lowerBound) && - Objects.equals(upperBound, that.upperBound); - } - - @Override - public int hashCode() - { - return Objects.hash(lowerBound, upperBound); - } - } - } -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java deleted file mode 100644 index 0d7e37a803515..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfig.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.fasterxml.jackson.annotation.JsonProperty; - -/** - * Options for a how a column in a Presto query should be pushed down into a query against CLP's - * metadata database (during split pruning): - *
    - *
  • {@code columnName}: The column's name in the Presto query.
  • - * - *
  • {@code customOptions}: Options specific to the current - * {@link ClpSplitFilterProvider}.
  • - * - *
  • {@code required} (optional, defaults to {@code false}): Whether the filter must be - * present in the generated metadata query. If a required filter is missing or cannot be added to - * the metadata query, the original query will be rejected.
  • - *
- */ -public class ClpSplitFilterConfig -{ - @JsonProperty("columnName") - public String columnName; - - @JsonProperty("customOptions") - public CustomSplitFilterOptions customOptions; - - @JsonProperty("required") - public boolean required; - - public interface CustomSplitFilterOptions - {} -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java deleted file mode 100644 index 3464c14883e26..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterConfigCustomOptionsDeserializer.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import java.io.IOException; - -import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; - -/** - * Uses the given {@link CustomSplitFilterOptions} implementation to deserialize the - * {@code "customOptions"} field in a {@link ClpSplitFilterConfig}. The implementation is determined - * by the implemented {@link ClpSplitFilterProvider}. - */ -public class ClpSplitFilterConfigCustomOptionsDeserializer - extends JsonDeserializer -{ - private final Class actualCustomSplitFilterOptionsClass; - - public ClpSplitFilterConfigCustomOptionsDeserializer(Class actualCustomSplitFilterOptionsClass) - { - this.actualCustomSplitFilterOptionsClass = actualCustomSplitFilterOptionsClass; - } - - @Override - public CustomSplitFilterOptions deserialize(JsonParser p, DeserializationContext ctxt) throws IOException - { - ObjectNode node = p.getCodec().readTree(p); - ObjectMapper mapper = (ObjectMapper) p.getCodec(); - - return mapper.treeToValue(node, actualCustomSplitFilterOptionsClass); - } -} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java deleted file mode 100644 index 0609843aaf22f..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import com.facebook.presto.spi.PrestoException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - -import java.io.File; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Function; - -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_FILTER_CONFIG_NOT_FOUND; -import static com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterConfig.CustomSplitFilterOptions; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.util.Objects.requireNonNull; - -/** - * Loads and manages {@link ClpSplitFilterConfig}s from a config file. - *

- * The config file is specified by the {@code clp.split-filter-config} property. - *

- * Filter configs can be declared at either a catalog, schema, or table scope. Filter configs under - * a particular scope will apply to all child scopes (e.g., schema-level filter configs will apply - * to all tables within that schema). - *

- * Implementations of this class can customize filter configs through the {@code "customOptions"} - * field within each {@link ClpSplitFilterConfig}. - */ -public abstract class ClpSplitFilterProvider -{ - protected final Map> filterMap; - - public ClpSplitFilterProvider(ClpConfig config) - { - requireNonNull(config, "config is null"); - - if (null == config.getSplitFilterConfig()) { - filterMap = ImmutableMap.of(); - return; - } - ObjectMapper mapper = new ObjectMapper(); - SimpleModule module = new SimpleModule(); - module.addDeserializer( - CustomSplitFilterOptions.class, - new ClpSplitFilterConfigCustomOptionsDeserializer(getCustomSplitFilterOptionsClass())); - mapper.registerModule(module); - try { - filterMap = mapper.readValue( - new File(config.getSplitFilterConfig()), - new TypeReference>>() {}); - } - catch (IOException e) { - throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); - } - } - - /** - * Rewrites {@code pushDownExpression} to remap filter conditions based on the - * {@code "customOptions"} for the given scope. - *

- * {@code scope} follows the format {@code catalog[.schema][.table]}, and determines which - * filter mappings to apply, since mappings from more specific scopes (e.g., table-level) - * override or supplement those from broader scopes (e.g., catalog-level). For each scope - * (catalog, schema, table), this method collects all mappings defined in - * {@code "customOptions"}. - * - * @param scope the scope of the filter - * @param pushDownExpression the expression to be rewritten - * @return the rewritten expression - */ - public abstract String remapSplitFilterPushDownExpression(String scope, String pushDownExpression); - - /** - * Checks for the given table, if {@code splitFilterPushDownExpression} contains all required - * fields. - * - * @param tableScopeSet the set of scopes of the tables that are being queried - * @param splitFilterPushDownExpression the expression to be checked - */ - public void checkContainsRequiredFilters(Set tableScopeSet, String splitFilterPushDownExpression) - { - boolean hasRequiredSplitFilterColumns = true; - ImmutableList.Builder notFoundListBuilder = ImmutableList.builder(); - for (String tableScope : tableScopeSet) { - for (String columnName : getRequiredColumnNames(tableScope)) { - if (!splitFilterPushDownExpression.contains(columnName)) { - hasRequiredSplitFilterColumns = false; - notFoundListBuilder.add(columnName); - } - } - } - if (!hasRequiredSplitFilterColumns) { - throw new PrestoException( - CLP_MANDATORY_SPLIT_FILTER_NOT_VALID, - notFoundListBuilder.build() + " is a mandatory split filter column but not valid"); - } - } - - public Set getColumnNames(String scope) - { - return collectColumnNamesFromScopes(scope, this::getAllColumnNamesFromFilters); - } - - /** - * Returns the implemented {@link CustomSplitFilterOptions} class. To respect our code style, we - * recommend implementing a {@code protected static class} as an inner class in the implemented - * {@link ClpSplitFilterProvider} class. - * - * @return the implemented {@link CustomSplitFilterOptions} class - */ - protected abstract Class getCustomSplitFilterOptionsClass(); - - private Set getRequiredColumnNames(String scope) - { - return collectColumnNamesFromScopes(scope, this::getRequiredColumnNamesFromFilters); - } - - private Set collectColumnNamesFromScopes(String scope, Function, Set> extractor) - { - String[] splitScope = scope.split("\\."); - ImmutableSet.Builder builder = ImmutableSet.builder(); - - builder.addAll(extractor.apply(filterMap.get(splitScope[0]))); - - if (splitScope.length > 1) { - builder.addAll(extractor.apply(filterMap.get(splitScope[0] + "." + splitScope[1]))); - } - - if (splitScope.length == 3) { - builder.addAll(extractor.apply(filterMap.get(scope))); - } - - return builder.build(); - } - - private Set getAllColumnNamesFromFilters(List filters) - { - return null != filters ? filters.stream() - .map(filter -> filter.columnName) - .collect(toImmutableSet()) : ImmutableSet.of(); - } - - private Set getRequiredColumnNamesFromFilters(List filters) - { - return null != filters ? filters.stream() - .filter(filter -> filter.required) - .map(filter -> filter.columnName) - .collect(toImmutableSet()) : ImmutableSet.of(); - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java index 413c9cd773a6d..36ea4d785577b 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.optimization.ClpExpression; import com.facebook.presto.plugin.clp.optimization.ClpFilterToKqlConverter; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.relation.RowExpression; @@ -315,11 +316,11 @@ private void testPushDown(SessionHolder sessionHolder, String sql, String expect ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns); testFilter(clpExpression, expectedKql, null, sessionHolder); if (expectedMetadataSqlQuery != null) { - assertTrue(clpExpression.getMetadataSqlQuery().isPresent()); - assertEquals(clpExpression.getMetadataSqlQuery().get(), expectedMetadataSqlQuery); + assertTrue(clpExpression.getMetadataExpression().isPresent()); + assertEquals(clpExpression.getMetadataExpression().get(), expectedMetadataSqlQuery); } else { - assertFalse(clpExpression.getMetadataSqlQuery().isPresent()); + assertFalse(clpExpression.getMetadataExpression().isPresent()); } } From 8e077fcb0e7614f6594e6bd30e88f50841fadee9 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 4 Nov 2025 15:54:52 -0500 Subject: [PATCH 04/26] progress --- .../clp/optimization/ClpComputePushDown.java | 27 ++-- ...ClpSplitMetadataExpressionTransformer.java | 135 ++++++++++++++++++ .../clp/split/ClpSplitMetadataConfig.java | 17 +++ 3 files changed, 163 insertions(+), 16 deletions(-) create mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 9bbcebd20949c..ef2dae7c9efc9 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -17,11 +17,11 @@ import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.TableHandle; import com.facebook.presto.spi.VariableAllocator; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -40,6 +40,7 @@ import java.util.Set; import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -52,7 +53,10 @@ public class ClpComputePushDown private final StandardFunctionResolution functionResolution; private final ClpSplitMetadataConfig metadataConfig; - public ClpComputePushDown(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitMetadataConfig metadataConfig) + public ClpComputePushDown( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig metadataConfig) { this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); @@ -63,27 +67,17 @@ public ClpComputePushDown(FunctionMetadataManager functionManager, StandardFunct public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, VariableAllocator variableAllocator, PlanNodeIdAllocator idAllocator) { Rewriter rewriter = new Rewriter(idAllocator); - PlanNode optimizedPlanNode = rewriteWith(rewriter, maxSubplan); - - // Throw exception if any required split filters are missing - if (!rewriter.tableScopeSet.isEmpty() && !rewriter.hasVisitedFilter) { - splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, ""); - } - return optimizedPlanNode; + return rewriteWith(rewriter, maxSubplan); } private class Rewriter extends ConnectorPlanRewriter { private final PlanNodeIdAllocator idAllocator; - private final Set tableScopeSet; - private boolean hasVisitedFilter; public Rewriter(PlanNodeIdAllocator idAllocator) { this.idAllocator = idAllocator; - hasVisitedFilter = false; - tableScopeSet = new HashSet<>(); } @Override @@ -91,7 +85,10 @@ public PlanNode visitTableScan(TableScanNode node, RewriteContext context) { TableHandle tableHandle = node.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - tableScopeSet.add(format("%s.%s", CONNECTOR_NAME, clpTableHandle.getSchemaTableName())); + if (!metadataConfig.getFilterRules(clpTableHandle.getSchemaTableName()).isEmpty()) { + throw new PrestoException(CLP_MANDATORY_SPLIT_FILTER_NOT_VALID, "required filters must be specified"); + } + return super.visitTableScan(node, context); } @@ -107,8 +104,6 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNode) { - hasVisitedFilter = true; - TableHandle tableHandle = tableScanNode.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java new file mode 100644 index 0000000000000..e9fddd9b2ced5 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java @@ -0,0 +1,135 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.optimization; + +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.ConstantExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.RowExpressionVisitor; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class ClpSplitMetadataExpressionTransformer + implements RowExpressionVisitor +{ + private final Map exposedToOriginal; + private final Map> dataToMetadataBounds; + private final Set requiredColumns; + private final Set presentColumns = new HashSet<>(); + + public ClpSplitMetadataExpressionTransformer( + Map exposedToOriginal, + Map> dataToMetadataBounds, + Set requiredColumns) + { + this.exposedToOriginal = exposedToOriginal; + this.dataToMetadataBounds = dataToMetadataBounds; + this.requiredColumns = requiredColumns; + } + + public RowExpression rewrite(RowExpression expr) + { + return expr.accept(this, null); + } + + public boolean containsAllRequiredColumns() + { + return presentColumns.containsAll(requiredColumns); + } + + @Override + public RowExpression visitCall(CallExpression call, Void context) + { + List rewrittenArgs = call.getArguments().stream() + .map(arg -> arg.accept(this, context)) + .collect(Collectors.toList()); + return new CallExpression(call.getSourceLocation(), call.getDisplayName(), call.getFunctionHandle(), call.getType(), rewrittenArgs, call.getFunctionMetadata()); + } + + @Override + public RowExpression visitConstant(ConstantExpression constant, Void context) + { + return constant; + } + + @Override + public RowExpression visitInputReference(InputReferenceExpression reference, Void context) + { + return reference; + } + + @Override + public RowExpression visitFieldReference(FieldReferenceExpression reference, Void context) + { + // Replace column names if applicable + String originalName = reference.getField().toString(); + presentColumns.add(originalName); + + // Step 1: replace exposed name -> original + String mappedName = exposedToOriginal.getOrDefault(originalName, originalName); + + // Step 2: remap data column -> metadata column (range-based) + Map bounds = dataToMetadataBounds.get(mappedName); + if (bounds != null) { + // by convention: replace with begin_timestamp or end_timestamp depending on predicate direction + // this part might require operator context; here we can only do simple rename + // we could wrap it in metadata-specific placeholder for later replacement + } + + return new FieldReferenceExpression(mappedName, reference.getType()); + } + + @Override + public RowExpression visitSpecialForm(SpecialFormExpression specialForm, Void context) + { + List args = specialForm.getArguments().stream() + .map(arg -> arg.accept(this, context)) + .collect(Collectors.toList()); + return new SpecialFormExpression( + specialForm.getForm(), + specialForm.getType(), + args, + specialForm.getSourceLocation()); + } + + @Override + public RowExpression visitLambda(LambdaDefinitionExpression lambda, Void context) + { + return lambda; + } + + @Override + public RowExpression visitVariableReference(VariableReferenceExpression reference, Void context) + { + String name = reference.getName(); + presentColumns.add(name); + + // Step 1: replace exposed name → original + String mappedName = exposedToOriginal.getOrDefault(name, name); + + // Step 2: remap data column → metadata columns (range mapping) + Map bounds = dataToMetadataBounds.get(mappedName); + if (bounds != null && bounds.size() == 1) { + // Simple direct remap (not range) + mappedName = bounds.values().iterator().next(); + } + + return new VariableReferenceExpression( + reference.getSourceLocation(), + mappedName, + reference.getType()); + } +} \ No newline at end of file diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index ff6cc329a9b84..b8cd1932beeac 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -28,8 +28,10 @@ import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_FILTER_CONFIG_NOT_FOUND; import static java.util.Objects.requireNonNull; @@ -116,6 +118,11 @@ public Map getMetadataColumns(SchemaTableName name) { return result; } + public List getFilterRules(SchemaTableName name) { + TableConfig cfg = getTableConfig(name); + return cfg.filterRules; + } + public Map getExposedToOriginalMapping(SchemaTableName name) { TableConfig cfg = getTableConfig(name); Map mapping = new HashMap<>(); @@ -125,6 +132,16 @@ public Map getExposedToOriginalMapping(SchemaTableName name) { return mapping; } + public Set getDataColumnsWithRangeBounds(SchemaTableName name) { + TableConfig cfg = getTableConfig(name); + Set result = new LinkedHashSet<>(); + for (MetaColumn c : cfg.metaColumns.values()) { + if (c.asRangeBoundOf != null) { + result.add(c.asRangeBoundOf); + } + } + return result; + } public Map> getDataColumnRangeMapping(SchemaTableName name) { TableConfig cfg = getTableConfig(name); From b191322955c1e1d6a7c3014ec160841d740dde27 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 18:05:38 -0500 Subject: [PATCH 05/26] add converter --- .../presto/plugin/clp/ClpConnector.java | 8 +- .../plugin/clp/ClpTableLayoutHandle.java | 20 +- .../clp/optimization/ClpComputePushDown.java | 1 - .../optimization/ClpFilterToKqlConverter.java | 29 ++- .../ClpPlanOptimizerProvider.java | 6 +- ...ClpSplitMetadataExpressionTransformer.java | 135 -------------- ...MySqlSplitMetadataExpressionConverter.java | 173 ++++++++++++++++++ .../clp/split/ClpMySqlSplitProvider.java | 30 ++- .../clp/split/ClpSplitMetadataConfig.java | 14 +- 9 files changed, 248 insertions(+), 168 deletions(-) delete mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java create mode 100644 presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java index 3b81d6bb2062f..48598a4b18603 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java @@ -16,7 +16,6 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; import com.facebook.presto.plugin.clp.optimization.ClpPlanOptimizerProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; @@ -42,7 +41,6 @@ public class ClpConnector private final ClpSplitManager splitManager; private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpSplitFilterProvider splitFilterProvider; @Inject public ClpConnector( @@ -51,8 +49,7 @@ public ClpConnector( ClpRecordSetProvider recordSetProvider, ClpSplitManager splitManager, FunctionMetadataManager functionManager, - StandardFunctionResolution functionResolution, - ClpSplitFilterProvider splitFilterProvider) + StandardFunctionResolution functionResolution) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); @@ -60,13 +57,12 @@ public ClpConnector( this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); - this.splitFilterProvider = requireNonNull(splitFilterProvider, "splitFilterProvider is null"); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(functionManager, functionResolution, splitFilterProvider); + return new ClpPlanOptimizerProvider(functionManager, functionResolution); } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java index b82932f0c30fd..40950a3d00ca0 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java @@ -14,6 +14,7 @@ package com.facebook.presto.plugin.clp; import com.facebook.presto.spi.ConnectorTableLayoutHandle; +import com.facebook.presto.spi.relation.RowExpression; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -27,14 +28,17 @@ public class ClpTableLayoutHandle { private final ClpTableHandle table; private final Optional kqlQuery; - private final Optional metadataSql; + private final Optional metadataExpression; @JsonCreator - public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery, @JsonProperty("metadataFilterQuery") Optional metadataSql) + public ClpTableLayoutHandle( + @JsonProperty("table") ClpTableHandle table, + @JsonProperty("kqlQuery") Optional kqlQuery, + @JsonProperty("metadataExpression") Optional metadataSqlExpression) { this.table = table; this.kqlQuery = kqlQuery; - this.metadataSql = metadataSql; + this.metadataExpression = metadataSqlExpression; } @JsonProperty @@ -50,9 +54,9 @@ public Optional getKqlQuery() } @JsonProperty - public Optional getMetadataSql() + public Optional getMetadataExpression() { - return metadataSql; + return metadataExpression; } @Override @@ -67,13 +71,13 @@ public boolean equals(Object o) ClpTableLayoutHandle that = (ClpTableLayoutHandle) o; return Objects.equals(table, that.table) && Objects.equals(kqlQuery, that.kqlQuery) && - Objects.equals(metadataSql, that.metadataSql); + Objects.equals(metadataExpression, that.metadataExpression); } @Override public int hashCode() { - return Objects.hash(table, kqlQuery, metadataSql); + return Objects.hash(table, kqlQuery, metadataExpression); } @Override @@ -82,7 +86,7 @@ public String toString() return toStringHelper(this) .add("table", table) .add("kqlQuery", kqlQuery) - .add("metadataSql", metadataSql) + .add("metadataExpression", metadataExpression) .toString(); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index ef2dae7c9efc9..3d05ff66dc9f9 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -39,7 +39,6 @@ import java.util.Optional; import java.util.Set; -import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; import static java.lang.String.format; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index d07e7aedc86ff..9f2e5a80ac58b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -417,6 +417,7 @@ private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression return buildClpExpression( leftPushDownExpression.get(), // variable rightPushDownExpression.get(), // literal + left, // constant operator, rightType, node); @@ -426,6 +427,7 @@ else if (leftIsConstant) { return buildClpExpression( rightPushDownExpression.get(), // variable leftPushDownExpression.get(), // literal + right, // constant newOperator, leftType, node); @@ -449,6 +451,7 @@ else if (leftIsConstant) { * * @param variableName name of the variable * @param literalString string representation of the literal + * @param constant the original ConstantExpression of literalString * @param operator the comparison operator * @param literalType the type of the literal * @param originalNode the original RowExpression node @@ -458,6 +461,7 @@ else if (leftIsConstant) { private ClpExpression buildClpExpression( String variableName, String literalString, + RowExpression constant, OperatorType operator, Type literalType, RowExpression originalNode) @@ -468,22 +472,33 @@ private ClpExpression buildClpExpression( ? "\"" + escapeKqlSpecialCharsForStringValue(literalString) + "\"" : literalString; - String expression = null; + String pushDownExpression = null; if (operator.equals(EQUAL)) { - expression = format("%s: %s", variableName, formattedLiteral); + pushDownExpression = format("%s: %s", variableName, formattedLiteral); } else if (operator.equals(NOT_EQUAL)) { - expression = format("NOT %s: %s", variableName, formattedLiteral); + pushDownExpression = format("NOT %s: %s", variableName, formattedLiteral); } else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !isVarchar) { - expression = format("%s %s %s", variableName, operator.getOperator(), literalString); + pushDownExpression = format("%s %s %s", variableName, operator.getOperator(), literalString); } - if (expression != null) { - return new ClpExpression(expression, isMetadataColumn ? originalNode : null); + if (pushDownExpression == null) { + return new ClpExpression(originalNode); } - return new ClpExpression(originalNode); + CallExpression metadataExpression = null; + if (isMetadataColumn) { + metadataExpression = new CallExpression( + operator.name(), + standardFunctionResolution.comparisonFunction(operator, literalType, literalType), + BOOLEAN, + ImmutableList.of( + new VariableReferenceExpression(Optional.empty(), variableName, literalType), + constant)); + } + + return new ClpExpression(pushDownExpression, metadataExpression); } /** diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java index b536c95ad216a..628e5ebf119c0 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java @@ -29,14 +29,12 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; - private final ClpSplitFilterProvider splitFilterProvider; @Inject - public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) + public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution) { this.functionManager = functionManager; this.functionResolution = functionResolution; - this.splitFilterProvider = splitFilterProvider; } @Override @@ -48,6 +46,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpComputePushDown(functionManager, functionResolution, splitFilterProvider)); + return ImmutableSet.of(new ClpComputePushDown(functionManager, functionResolution)); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java deleted file mode 100644 index e9fddd9b2ced5..0000000000000 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpSplitMetadataExpressionTransformer.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.optimization; - -import com.facebook.presto.spi.relation.CallExpression; -import com.facebook.presto.spi.relation.ConstantExpression; -import com.facebook.presto.spi.relation.RowExpression; -import com.facebook.presto.spi.relation.RowExpressionVisitor; - -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -public class ClpSplitMetadataExpressionTransformer - implements RowExpressionVisitor -{ - private final Map exposedToOriginal; - private final Map> dataToMetadataBounds; - private final Set requiredColumns; - private final Set presentColumns = new HashSet<>(); - - public ClpSplitMetadataExpressionTransformer( - Map exposedToOriginal, - Map> dataToMetadataBounds, - Set requiredColumns) - { - this.exposedToOriginal = exposedToOriginal; - this.dataToMetadataBounds = dataToMetadataBounds; - this.requiredColumns = requiredColumns; - } - - public RowExpression rewrite(RowExpression expr) - { - return expr.accept(this, null); - } - - public boolean containsAllRequiredColumns() - { - return presentColumns.containsAll(requiredColumns); - } - - @Override - public RowExpression visitCall(CallExpression call, Void context) - { - List rewrittenArgs = call.getArguments().stream() - .map(arg -> arg.accept(this, context)) - .collect(Collectors.toList()); - return new CallExpression(call.getSourceLocation(), call.getDisplayName(), call.getFunctionHandle(), call.getType(), rewrittenArgs, call.getFunctionMetadata()); - } - - @Override - public RowExpression visitConstant(ConstantExpression constant, Void context) - { - return constant; - } - - @Override - public RowExpression visitInputReference(InputReferenceExpression reference, Void context) - { - return reference; - } - - @Override - public RowExpression visitFieldReference(FieldReferenceExpression reference, Void context) - { - // Replace column names if applicable - String originalName = reference.getField().toString(); - presentColumns.add(originalName); - - // Step 1: replace exposed name -> original - String mappedName = exposedToOriginal.getOrDefault(originalName, originalName); - - // Step 2: remap data column -> metadata column (range-based) - Map bounds = dataToMetadataBounds.get(mappedName); - if (bounds != null) { - // by convention: replace with begin_timestamp or end_timestamp depending on predicate direction - // this part might require operator context; here we can only do simple rename - // we could wrap it in metadata-specific placeholder for later replacement - } - - return new FieldReferenceExpression(mappedName, reference.getType()); - } - - @Override - public RowExpression visitSpecialForm(SpecialFormExpression specialForm, Void context) - { - List args = specialForm.getArguments().stream() - .map(arg -> arg.accept(this, context)) - .collect(Collectors.toList()); - return new SpecialFormExpression( - specialForm.getForm(), - specialForm.getType(), - args, - specialForm.getSourceLocation()); - } - - @Override - public RowExpression visitLambda(LambdaDefinitionExpression lambda, Void context) - { - return lambda; - } - - @Override - public RowExpression visitVariableReference(VariableReferenceExpression reference, Void context) - { - String name = reference.getName(); - presentColumns.add(name); - - // Step 1: replace exposed name → original - String mappedName = exposedToOriginal.getOrDefault(name, name); - - // Step 2: remap data column → metadata columns (range mapping) - Map bounds = dataToMetadataBounds.get(mappedName); - if (bounds != null && bounds.size() == 1) { - // Simple direct remap (not range) - mappedName = bounds.values().iterator().next(); - } - - return new VariableReferenceExpression( - reference.getSourceLocation(), - mappedName, - reference.getType()); - } -} \ No newline at end of file diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java new file mode 100644 index 0000000000000..92f5f984c6fa9 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -0,0 +1,173 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.plugin.clp.ClpErrorCode; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.function.FunctionHandle; +import com.facebook.presto.spi.function.FunctionMetadata; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.ConstantExpression; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.RowExpressionVisitor; +import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import io.airlift.slice.Slice; + +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.facebook.presto.common.function.OperatorType.IS_DISTINCT_FROM; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class ClpMySqlSplitMetadataExpressionConverter + implements RowExpressionVisitor +{ + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final Map exposedToOriginal; + private final Map> dataToMetadataBounds; + private final Set requiredColumns; + private final Set seenRequired = new HashSet<>(); + + public ClpMySqlSplitMetadataExpressionConverter( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + Map exposedToOriginal, + Map> dataToMetadataBounds, + Set requiredColumns) + { + this.functionManager = requireNonNull(functionManager, "functionManager is null"); + this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); + this.exposedToOriginal = exposedToOriginal; + this.dataToMetadataBounds = dataToMetadataBounds; + this.requiredColumns = requiredColumns; + } + + public String transform(RowExpression expression) + { + String sql = expression.accept(this, null); + Set missing = new HashSet<>(requiredColumns); + missing.removeAll(seenRequired); + if (!missing.isEmpty()) { + throw new IllegalStateException("Missing required filter columns: " + missing); + } + return sql; + } + + @Override + public String visitCall(CallExpression node, Void context) + { + FunctionHandle functionHandle = node.getFunctionHandle(); + if (functionResolution.isNotFunction(functionHandle)) { + return format("NOT " + node.getArguments().get(0).accept(this, null)); + } + + FunctionMetadata functionMetadata = functionManager.getFunctionMetadata(node.getFunctionHandle()); + Optional operatorTypeOptional = functionMetadata.getOperatorType(); + if (operatorTypeOptional.isPresent()) { + OperatorType operatorType = operatorTypeOptional.get(); + if (operatorType.isComparisonOperator() && operatorType != IS_DISTINCT_FROM) { + String variableName = node.getArguments().get(0).accept(this, null); + String literalString = node.getArguments().get(1).accept(this, null); + + String rewritten = rewriteComparisonWithBounds(variableName, operatorType, literalString); + if (rewritten != null) { + return rewritten; + } + + return format("%s %s %s", variableName, operatorType.getOperator(), literalString); + } + } + + throw new PrestoException(ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query" + node); + } + + @Override + public String visitSpecialForm(SpecialFormExpression node, Void context) + { + switch (node.getForm()) { + case AND: + case OR: + String op = node.getForm() == SpecialFormExpression.Form.AND ? "AND" : "OR"; + return node.getArguments().stream() + .map(arg -> "(" + arg.accept(this, context) + ")") + .collect(Collectors.joining(" " + op + " ")); + case IS_NULL: + return "(" + node.getArguments().get(0).accept(this, context) + " IS NULL)"; + default: + throw new PrestoException(ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query" + node); + } + } + + @Override + public String visitConstant(ConstantExpression node, Void context) + { + Object value = node.getValue(); + if (value instanceof Slice) { + return "'" + ((Slice) value).toStringUtf8().replace("'", "''") + "'"; + } + + return value.toString(); + } + + @Override + public String visitVariableReference(VariableReferenceExpression node, Void context) + { + String exposed = node.getName(); + seenRequired.add(exposed); + return exposedToOriginal.getOrDefault(exposed, exposed); + } + + private String rewriteComparisonWithBounds(String variableName, OperatorType operator, String literal) + { + String original = exposedToOriginal.getOrDefault(variableName, variableName); + Map bounds = dataToMetadataBounds.get(original); + if (bounds == null) { + return null; + } + + String lower = bounds.get("lower"); + String upper = bounds.get("upper"); + if (lower == null || upper == null) { + return null; + } + + switch (operator) { + case GREATER_THAN: + case GREATER_THAN_OR_EQUAL: + // "col >= 5" → "upper_col >= 5" + return format("%s %s %s", upper, operator.getOperator(), literal); + + case LESS_THAN: + case LESS_THAN_OR_EQUAL: + // "col <= 5" → "lower_col <= 5" + return format("%s %s %s", lower, operator.getOperator(), literal); + + case EQUAL: + // "col = 5" → "(lower_col <= 5 AND upper_col >= 5)" + return format("(%s <= %s AND %s >= %s)", lower, literal, upper, literal); + + default: + return null; + } + } +} \ No newline at end of file diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 6b54218509c7f..055efb0d480b1 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -18,6 +18,9 @@ import com.facebook.presto.plugin.clp.ClpSplit; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.function.FunctionMetadataManager; +import com.facebook.presto.spi.function.StandardFunctionResolution; import com.google.common.collect.ImmutableList; import javax.inject.Inject; @@ -31,6 +34,7 @@ import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; import static java.lang.String.format; +import static java.util.Objects.requireNonNull; public class ClpMySqlSplitProvider implements ClpSplitProvider @@ -47,9 +51,16 @@ public class ClpMySqlSplitProvider private static final Logger log = Logger.get(ClpMySqlSplitProvider.class); private final ClpConfig config; + private final FunctionMetadataManager functionManager; + private final StandardFunctionResolution functionResolution; + private final ClpSplitMetadataConfig metadataConfig; @Inject - public ClpMySqlSplitProvider(ClpConfig config) + public ClpMySqlSplitProvider( + ClpConfig config, + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig metadataConfig) { try { Class.forName("com.mysql.cj.jdbc.Driver"); @@ -59,6 +70,9 @@ public ClpMySqlSplitProvider(ClpConfig config) throw new RuntimeException("MySQL JDBC driver not found", e); } this.config = config; + this.functionManager = functionManager; + this.functionResolution = functionResolution; + this.metadataConfig = metadataConfig; } @Override @@ -70,8 +84,18 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) String tableName = clpTableHandle.getSchemaTableName().getTableName(); String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName); - if (clpTableLayoutHandle.getMetadataSql().isPresent()) { - String metadataFilterQuery = clpTableLayoutHandle.getMetadataSql().get(); + if (clpTableLayoutHandle.getMetadataExpression().isPresent()) { + SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); + + ClpMySqlSplitMetadataExpressionConverter converter = + new ClpMySqlSplitMetadataExpressionConverter( + functionManager, + functionResolution, + metadataConfig.getExposedToOriginalMapping(schemaTableName), + metadataConfig.getDataColumnRangeMapping(schemaTableName), + metadataConfig.getRequiredColumns(schemaTableName) + ); + String metadataFilterQuery = converter.transform(clpTableLayoutHandle.getMetadataExpression().get()); archivePathQuery += " AND (" + metadataFilterQuery + ")"; } log.debug("Query for archive: %s", archivePathQuery); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index b8cd1932beeac..c3e1b57608596 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -118,9 +118,16 @@ public Map getMetadataColumns(SchemaTableName name) { return result; } - public List getFilterRules(SchemaTableName name) { + public Set getRequiredColumns(SchemaTableName name) + { TableConfig cfg = getTableConfig(name); - return cfg.filterRules; + Set requiredColumns = new LinkedHashSet<>(); + for (FilterRule rule : cfg.filterRules) { + if (rule.required) { + requiredColumns.add(rule.column); + } + } + return requiredColumns; } public Map getExposedToOriginalMapping(SchemaTableName name) { @@ -148,8 +155,7 @@ public Map> getDataColumnRangeMapping(SchemaTableNam Map> mapping = new HashMap<>(); for (MetaColumn c : cfg.metaColumns.values()) { if (c.asRangeBoundOf != null && c.boundType != null) { - mapping - .computeIfAbsent(c.asRangeBoundOf, k -> new HashMap<>()) + mapping.computeIfAbsent(c.asRangeBoundOf, k -> new HashMap<>()) .put(c.boundType, c.name); } } From 9cd2d4bc4fc1e808c851a49724d4b13e9676d9ba Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 18:53:13 -0500 Subject: [PATCH 06/26] refactor tests --- presto-clp/pom.xml | 12 --- .../facebook/presto/plugin/clp/ClpConfig.java | 12 +-- .../clp/optimization/ClpComputePushDown.java | 23 ++-- ...MySqlSplitMetadataExpressionConverter.java | 3 - .../clp/split/ClpSplitMetadataConfig.java | 6 +- .../presto/plugin/clp/ClpMetadataDbSetUp.java | 15 +-- .../presto/plugin/clp/TestClpSplit.java | 10 +- .../presto/plugin/clp/TestClpUdfRewriter.java | 52 +++++---- .../filter/TestClpMySqlSplitFilterConfig.java | 87 --------------- .../TestClpMySqlSplitMetadataConfig.java | 101 ++++++++++++++++++ .../TestClpSplitFilterConfigCommon.java | 10 +- .../resources/test-mysql-split-filter.json | 27 ----- .../resources/test-mysql-split-metadata.json | 41 +++++++ 13 files changed, 212 insertions(+), 187 deletions(-) delete mode 100644 presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java create mode 100644 presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java delete mode 100644 presto-clp/src/test/resources/test-mysql-split-filter.json create mode 100644 presto-clp/src/test/resources/test-mysql-split-metadata.json diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index 54d9f8bc7d9d4..d13b59968aff5 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -11,18 +11,6 @@ presto-clp Presto - CLP Connector - - - - org.apache.maven.plugins - maven-compiler-plugin - - 11 - 11 - - - - presto-plugin diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 1e628c201a58b..734501408a3ee 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -33,7 +33,7 @@ public class ClpConfig private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; - private String splitFilterConfig; + private String splitFilterConfigPath; private SplitProviderType splitProviderType = SplitProviderType.MYSQL; public boolean isPolymorphicTypeEnabled() @@ -150,15 +150,15 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) return this; } - public String getSplitFilterConfig() + public String getSplitMetadataConfigPath() { - return splitFilterConfig; + return splitFilterConfigPath; } - @Config("clp.split-filter-config") - public ClpConfig setSplitFilterConfig(String splitFilterConfig) + @Config("clp.split-metadata-config-path") + public ClpConfig setSplitMetadataConfigPath(String splitMetadataConfigPath) { - this.splitFilterConfig = splitFilterConfig; + this.splitFilterConfigPath = splitMetadataConfigPath; return this; } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 3d05ff66dc9f9..a83215cf9a167 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -22,6 +22,7 @@ import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.TableHandle; import com.facebook.presto.spi.VariableAllocator; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -84,7 +85,7 @@ public PlanNode visitTableScan(TableScanNode node, RewriteContext context) { TableHandle tableHandle = node.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - if (!metadataConfig.getFilterRules(clpTableHandle.getSchemaTableName()).isEmpty()) { + if (!metadataConfig.getRequiredColumns(clpTableHandle.getSchemaTableName()).isEmpty()) { throw new PrestoException(CLP_MANDATORY_SPLIT_FILTER_NOT_VALID, "required filters must be specified"); } @@ -107,33 +108,27 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); Map assignments = tableScanNode.getAssignments(); - + SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); + ImmutableSet.Builder metadataFilterColumns = ImmutableSet.builder(); + metadataFilterColumns.addAll(metadataConfig.getMetadataColumns(schemaTableName).keySet()); + metadataFilterColumns.addAll(metadataConfig.getDataColumnsWithRangeBounds(schemaTableName)); ClpExpression clpExpression = filterNode.getPredicate().accept( new ClpFilterToKqlConverter( functionResolution, functionManager, assignments, - metadataConfig.getMetadataColumns(clpTableHandle.getSchemaTableName()).keySet()), + metadataFilterColumns.build()), null); Optional kqlQuery = clpExpression.getPushDownExpression(); Optional metadataExpression = clpExpression.getMetadataExpression(); Optional remainingPredicate = clpExpression.getRemainingExpression(); - // Perform required metadata filter checks before handling the KQL query (if kqlQuery - // isn't present, we'll return early, skipping subsequent checks). - splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse("")); - boolean hasMetadataFilter = metadataSqlQuery.isPresent(); - if (hasMetadataFilter) { - metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get())); - log.debug("Metadata SQL query: %s", metadataSqlQuery.get()); - } - - if (kqlQuery.isPresent() || hasMetadataFilter) { + if (kqlQuery.isPresent() || metadataExpression.isPresent()) { if (kqlQuery.isPresent()) { log.debug("KQL query: %s", kqlQuery.get()); } - ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery); + ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataExpression); TableHandle newTableHandle = new TableHandle( tableHandle.getConnectorId(), clpTableHandle, diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index 92f5f984c6fa9..63e36b34426b3 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -156,16 +156,13 @@ private String rewriteComparisonWithBounds(String variableName, OperatorType ope case GREATER_THAN_OR_EQUAL: // "col >= 5" → "upper_col >= 5" return format("%s %s %s", upper, operator.getOperator(), literal); - case LESS_THAN: case LESS_THAN_OR_EQUAL: // "col <= 5" → "lower_col <= 5" return format("%s %s %s", lower, operator.getOperator(), literal); - case EQUAL: // "col = 5" → "(lower_col <= 5 AND upper_col >= 5)" return format("(%s <= %s AND %s >= %s)", lower, literal, upper, literal); - default: return null; } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index c3e1b57608596..adf58feb1a2dc 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -80,10 +80,14 @@ public static class TableConfig { public ClpSplitMetadataConfig(ClpConfig config) { requireNonNull(config, "config is null"); + if (null == config.getSplitMetadataConfigPath()) { + return; + } + ObjectMapper mapper = new ObjectMapper(); JsonNode root; try { - root = mapper.readTree(Files.readAllBytes(Paths.get(config.getSplitFilterConfig()))); + root = mapper.readTree(Files.readAllBytes(Paths.get(config.getSplitMetadataConfigPath()))); } catch (IOException e) { throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java index d1d0ee6964c8e..6c4b10b2e0a4b 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java @@ -18,6 +18,7 @@ import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; import com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import org.apache.commons.io.FileUtils; import org.apache.commons.math3.util.Pair; @@ -169,13 +170,13 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map getRowExpression(sql, new SessionHolder()))); List expectedSplitPaths = expectedSplitIndexes.stream() .map(expectedSplitIndex -> format("%s/%s", tablePath, entry.getValue().getIds().get(expectedSplitIndex))) .collect(toImmutableList()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index b82866f3d8dd9..e630b765736a5 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -21,10 +21,11 @@ import com.facebook.presto.cost.StatsProvider; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.plugin.clp.mockdb.ClpMockMetadataDatabase; +import com.facebook.presto.plugin.clp.mockdb.table.ColumnMetadataTableRows; import com.facebook.presto.plugin.clp.optimization.ClpComputePushDown; import com.facebook.presto.plugin.clp.optimization.ClpUdfRewriter; -import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.VariableAllocator; @@ -90,44 +91,49 @@ public class TestClpUdfRewriter .setSchema(ClpMetadata.DEFAULT_SCHEMA_NAME) .build(); + private ClpMockMetadataDatabase mockMetadataDatabase; private ClpMetadataDbSetUp.DbHandle dbHandle; ClpTableHandle table; private LocalQueryRunner localQueryRunner; private FunctionAndTypeManager functionAndTypeManager; private FunctionResolution functionResolution; - private ClpSplitFilterProvider splitFilterProvider; + private ClpSplitMetadataConfig splitMetadataConfig; private PlanNodeIdAllocator planNodeIdAllocator; private VariableAllocator variableAllocator; @BeforeMethod public void setUp() { - dbHandle = getDbHandle("metadata_query_testdb"); final String tableName = "test"; - final String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName; - table = new ClpTableHandle(new SchemaTableName("default", tableName), tablePath); - - setupMetadata(dbHandle, - ImmutableMap.of( - tableName, + mockMetadataDatabase = ClpMockMetadataDatabase.builder().build(); + mockMetadataDatabase.addTableToDatasetsTableIfNotExist(ImmutableList.of(tableName)); + mockMetadataDatabase.addColumnMetadata(ImmutableMap.of( + tableName, + new ColumnMetadataTableRows( + ImmutableList.of( + "city.Name", + "city.Region.Id", + "city.Region.Name", + "fare", + "isHoliday"), ImmutableList.of( - new Pair<>("city.Name", ClpString), - new Pair<>("city.Region.Id", Integer), - new Pair<>("city.Region.Name", VarString), - new Pair<>("fare", Float), - new Pair<>("isHoliday", Boolean)))); + ClpString, + Integer, + VarString, + Float, + Boolean)))); localQueryRunner = new LocalQueryRunner(defaultSession); localQueryRunner.createCatalog("clp", new ClpConnectorFactory(), ImmutableMap.of( - "clp.metadata-db-url", format(METADATA_DB_URL_TEMPLATE, dbHandle.getDbPath()), - "clp.metadata-db-user", METADATA_DB_USER, - "clp.metadata-db-password", METADATA_DB_PASSWORD, - "clp.metadata-table-prefix", METADATA_DB_TABLE_PREFIX)); + "clp.metadata-db-url", mockMetadataDatabase.getUrl(), + "clp.metadata-db-user", mockMetadataDatabase.getUsername(), + "clp.metadata-db-password", mockMetadataDatabase.getPassword(), + "clp.metadata-table-prefix", mockMetadataDatabase.getTablePrefix())); localQueryRunner.getMetadata().registerBuiltInFunctions(extractFunctions(new ClpPlugin().getFunctions())); functionAndTypeManager = localQueryRunner.getMetadata().getFunctionAndTypeManager(); functionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); - splitFilterProvider = new ClpMySqlSplitFilterProvider(new ClpConfig()); + splitMetadataConfig = new ClpSplitMetadataConfig(new ClpConfig()); planNodeIdAllocator = new PlanNodeIdAllocator(); variableAllocator = new VariableAllocator(); } @@ -152,7 +158,7 @@ public void testScanFilter() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( @@ -191,7 +197,7 @@ public void testScanProject() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( @@ -240,7 +246,7 @@ public void testScanProjectFilter() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java deleted file mode 100644 index 27d9c86ee0092..0000000000000 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitFilterConfig.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Paths; - -import static java.lang.String.format; -import static org.testng.Assert.assertEquals; - -@Test(singleThreaded = true) -public class TestClpMySqlSplitFilterConfig -{ - private String filterConfigPath; - - @BeforeMethod - public void setUp() throws IOException, URISyntaxException - { - URL resource = getClass().getClassLoader().getResource("test-mysql-split-filter.json"); - if (resource == null) { - throw new FileNotFoundException("test-mysql-split-filter.json not found in resources"); - } - - filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); - } - - @Test - public void remapSplitFilterPushDownExpression() - { - ClpConfig config = new ClpConfig(); - config.setSplitFilterConfig(filterConfigPath); - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - - // Integer - testRange(1234, 5678, filterProvider); - testRange(-5678, -1234, filterProvider); - - // Decimal - testRange(1234.001, 5678.999, filterProvider); - testRange(-5678.999, -1234.001, filterProvider); - - // Scientific - testRange("1.234E3", "5.678e3", filterProvider); - testRange("-1.234e-3", "-5.678E-3", filterProvider); - } - - private void testRange(T lowerBound, T upperBound, ClpMySqlSplitFilterProvider filterProvider) - { - String splitFilterSql1 = format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound); - String remappedSql1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql1); - assertEquals(remappedSql1, format("(end_timestamp > %s AND begin_timestamp < %s)", lowerBound, upperBound)); - - String splitFilterSql2 = format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound); - String remappedSql2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql2); - assertEquals(remappedSql2, format("(end_timestamp >= %s AND begin_timestamp <= %s)", lowerBound, upperBound)); - - String splitFilterSql3 = format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound); - String remappedSql3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql3); - assertEquals(remappedSql3, format("(end_timestamp > %s AND begin_timestamp <= %s)", lowerBound, upperBound)); - - String splitFilterSql4 = format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound); - String remappedSql4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql4); - assertEquals(remappedSql4, format("(end_timestamp >= %s AND begin_timestamp < %s)", lowerBound, upperBound)); - - String splitFilterSql5 = format("(\"msg.timestamp\" = %s)", lowerBound); - String remappedSql5 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", splitFilterSql5); - assertEquals(remappedSql5, format("((begin_timestamp <= %s AND end_timestamp >= %s))", lowerBound, lowerBound)); - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java new file mode 100644 index 0000000000000..2794101a0db22 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java @@ -0,0 +1,101 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.TestClpQueryBase; +import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; +import com.facebook.presto.spi.SchemaTableName; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; + +import static java.lang.String.format; +import static org.testng.Assert.assertEquals; + +@Test(singleThreaded = true) +public class TestClpMySqlSplitMetadataConfig + extends TestClpQueryBase +{ + private String splitMetadataConfigPath; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); + } + + splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + } + + @Test + public void remapSplitFilterPushDownExpression() + { + ClpConfig config = new ClpConfig(); + config.setSplitMetadataConfigPath(splitMetadataConfigPath); + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config); + + final SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( + functionAndTypeManager, + standardFunctionResolution, + splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), + splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), + splitMetadataConfig.getRequiredColumns(schemaTableName)); + + // Integer + testRange(1234, 5678, converter); + testRange(-5678, -1234, converter); + + // Decimal + testRange(1234.001, 5678.999, converter); + testRange(-5678.999, -1234.001, converter); + + // Scientific + testRange("1.234E3", "5.678e3", converter); + testRange("-1.234e-3", "-5.678E-3", converter); + } + + private void testRange(T lowerBound, T upperBound, ClpMySqlSplitMetadataExpressionConverter converter) + { + SessionHolder sessionHolder = new SessionHolder(); + + String remappedSql1 = converter.transform( + getRowExpression(format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), sessionHolder)); + assertEquals(remappedSql1, format("(end_timestamp > %s AND begin_timestamp < %s)", lowerBound, upperBound)); + + String remappedSql2 = converter.transform( + getRowExpression(format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), sessionHolder)); + assertEquals(remappedSql2, format("(end_timestamp >= %s AND begin_timestamp <= %s)", lowerBound, upperBound)); + + String remappedSql3 = converter.transform( + getRowExpression(format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), sessionHolder)); + assertEquals(remappedSql3, format("(end_timestamp > %s AND begin_timestamp <= %s)", lowerBound, upperBound)); + + String remappedSql4 = converter.transform( + getRowExpression(format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), sessionHolder)); + assertEquals(remappedSql4, format("(end_timestamp >= %s AND begin_timestamp < %s)", lowerBound, upperBound)); + + String remappedSql5 = converter.transform(getRowExpression(format("(\"msg.timestamp\" = %s)", lowerBound), sessionHolder)); + assertEquals(remappedSql5, format("((begin_timestamp <= %s AND end_timestamp >= %s))", lowerBound, lowerBound)); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java index 7a4058f617d0c..d950d953d0cff 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java @@ -42,9 +42,9 @@ public class TestClpSplitFilterConfigCommon @BeforeMethod public void setUp() throws IOException, URISyntaxException { - URL resource = getClass().getClassLoader().getResource("test-mysql-split-filter.json"); + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); if (resource == null) { - throw new FileNotFoundException("test-mysql-split-filter.json not found in resources"); + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); } filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); @@ -54,7 +54,7 @@ public void setUp() throws IOException, URISyntaxException public void checkRequiredFilters() { ClpConfig config = new ClpConfig(); - config.setSplitFilterConfig(filterConfigPath); + config.setSplitMetadataConfig(filterConfigPath); ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1"))); assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( @@ -69,7 +69,7 @@ public void checkRequiredFilters() public void getFilterNames() { ClpConfig config = new ClpConfig(); - config.setSplitFilterConfig(filterConfigPath); + config.setSplitMetadataConfig(filterConfigPath); ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); Set catalogFilterNames = filterProvider.getColumnNames("clp"); assertEquals(ImmutableSet.of("level"), catalogFilterNames); @@ -91,7 +91,7 @@ public void handleEmptyAndInvalidSplitFilterConfig() assertTrue(filterProvider.getColumnNames("abc.opq.xyz").isEmpty()); // Invalid config - config.setSplitFilterConfig(randomUUID().toString()); + config.setSplitMetadataConfig(randomUUID().toString()); assertThrows(PrestoException.class, () -> new ClpMySqlSplitFilterProvider(config)); } } diff --git a/presto-clp/src/test/resources/test-mysql-split-filter.json b/presto-clp/src/test/resources/test-mysql-split-filter.json deleted file mode 100644 index ea5be310aff63..0000000000000 --- a/presto-clp/src/test/resources/test-mysql-split-filter.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "clp": [ - { - "columnName": "level" - } - ], - "clp.default": [ - { - "columnName": "author" - } - ], - "clp.default.table_1": [ - { - "columnName": "msg.timestamp", - "customOptions": { - "rangeMapping": { - "lowerBound": "begin_timestamp", - "upperBound": "end_timestamp" - } - }, - "required": true - }, - { - "columnName": "file_name" - } - ] -} diff --git a/presto-clp/src/test/resources/test-mysql-split-metadata.json b/presto-clp/src/test/resources/test-mysql-split-metadata.json new file mode 100644 index 0000000000000..19fee2a29c6c8 --- /dev/null +++ b/presto-clp/src/test/resources/test-mysql-split-metadata.json @@ -0,0 +1,41 @@ +{ + "": { + "metaColumns": { + "level": { + "type": "STRING" + } + } + }, + "clp.default": { + "metaColumns": { + "author": { + "type": "STRING" + } + } + }, + "clp.default.table_1": { + "metaColumns": { + "begin_timestamp": { + "type": "STRING", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "lower" + } + }, + "end_timestamp": { + "type": "STRING", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "upper" + } + } + }, + "filterRules": [ + { + "column": "msg.timestamp", + "required": true, + "reason": "Full scan would be too expensive without timestamp filtering." + } + ] + } +} From 999a6fa9a99ddfd8567b3bf3d49dcd86586a6e06 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 19:26:29 -0500 Subject: [PATCH 07/26] format fix --- .../presto/plugin/clp/ClpConnector.java | 8 +- .../clp/optimization/ClpComputePushDown.java | 3 - .../ClpPlanOptimizerProvider.java | 11 +- ...MySqlSplitMetadataExpressionConverter.java | 2 +- .../clp/split/ClpMySqlSplitProvider.java | 4 +- .../clp/split/ClpSplitMetadataConfig.java | 37 ++- .../presto/plugin/clp/ClpMetadataDbSetUp.java | 265 ------------------ .../presto/plugin/clp/TestClpSplit.java | 4 +- .../presto/plugin/clp/TestClpUdfRewriter.java | 15 +- .../TestClpMySqlSplitMetadataConfig.java | 4 +- .../TestClpSplitFilterConfigCommon.java | 13 +- 11 files changed, 54 insertions(+), 312 deletions(-) delete mode 100644 presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java index 48598a4b18603..c896dec96b862 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnector.java @@ -16,6 +16,7 @@ import com.facebook.airlift.bootstrap.LifeCycleManager; import com.facebook.airlift.log.Logger; import com.facebook.presto.plugin.clp.optimization.ClpPlanOptimizerProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.connector.Connector; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; @@ -41,6 +42,7 @@ public class ClpConnector private final ClpSplitManager splitManager; private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; + private final ClpSplitMetadataConfig clpSplitMetadataConfig; @Inject public ClpConnector( @@ -49,7 +51,8 @@ public ClpConnector( ClpRecordSetProvider recordSetProvider, ClpSplitManager splitManager, FunctionMetadataManager functionManager, - StandardFunctionResolution functionResolution) + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig clpSplitMetadataConfig) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); @@ -57,12 +60,13 @@ public ClpConnector( this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.functionManager = requireNonNull(functionManager, "functionManager is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); + this.clpSplitMetadataConfig = requireNonNull(clpSplitMetadataConfig); } @Override public ConnectorPlanOptimizerProvider getConnectorPlanOptimizerProvider() { - return new ClpPlanOptimizerProvider(functionManager, functionResolution); + return new ClpPlanOptimizerProvider(functionManager, functionResolution, clpSplitMetadataConfig); } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index a83215cf9a167..4664ca6345877 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -35,14 +35,11 @@ import com.facebook.presto.spi.relation.VariableReferenceExpression; import com.google.common.collect.ImmutableSet; -import java.util.HashSet; import java.util.Map; import java.util.Optional; -import java.util.Set; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; -import static java.lang.String.format; import static java.util.Objects.requireNonNull; public class ClpComputePushDown diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java index 628e5ebf119c0..e58276c948175 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java @@ -13,7 +13,7 @@ */ package com.facebook.presto.plugin.clp.optimization; -import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider; import com.facebook.presto.spi.function.FunctionMetadataManager; @@ -29,12 +29,17 @@ public class ClpPlanOptimizerProvider { private final FunctionMetadataManager functionManager; private final StandardFunctionResolution functionResolution; + private final ClpSplitMetadataConfig clpSplitMetadataConfig; @Inject - public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution) + public ClpPlanOptimizerProvider( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitMetadataConfig clpSplitMetadataConfig) { this.functionManager = functionManager; this.functionResolution = functionResolution; + this.clpSplitMetadataConfig = clpSplitMetadataConfig; } @Override @@ -46,6 +51,6 @@ public Set getLogicalPlanOptimizers() @Override public Set getPhysicalPlanOptimizers() { - return ImmutableSet.of(new ClpComputePushDown(functionManager, functionResolution)); + return ImmutableSet.of(new ClpComputePushDown(functionManager, functionResolution, clpSplitMetadataConfig)); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index 63e36b34426b3..9da1aa5c8bc19 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -167,4 +167,4 @@ private String rewriteComparisonWithBounds(String variableName, OperatorType ope return null; } } -} \ No newline at end of file +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 055efb0d480b1..529ccd6d9b1ab 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -34,7 +34,6 @@ import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; import static java.lang.String.format; -import static java.util.Objects.requireNonNull; public class ClpMySqlSplitProvider implements ClpSplitProvider @@ -93,8 +92,7 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) functionResolution, metadataConfig.getExposedToOriginalMapping(schemaTableName), metadataConfig.getDataColumnRangeMapping(schemaTableName), - metadataConfig.getRequiredColumns(schemaTableName) - ); + metadataConfig.getRequiredColumns(schemaTableName)); String metadataFilterQuery = converter.transform(clpTableLayoutHandle.getMetadataExpression().get()); archivePathQuery += " AND (" + metadataFilterQuery + ")"; } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index adf58feb1a2dc..3858dbec0270b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -40,7 +40,8 @@ public class ClpSplitMetadataConfig { private final Map tableConfigs = new HashMap<>(); - public static class MetaColumn { + public static class MetaColumn + { public final String name; public final String type; public final String exposedAs; @@ -48,7 +49,8 @@ public static class MetaColumn { public final String asRangeBoundOf; // optional public final String boundType; // "lower" or "upper" - public MetaColumn(String name, JsonNode node) { + public MetaColumn(String name, JsonNode node) + { this.name = name; this.type = node.path("type").asText(); this.exposedAs = node.path("exposedAs").asText(name); @@ -59,25 +61,29 @@ public MetaColumn(String name, JsonNode node) { } } - public static class FilterRule { + public static class FilterRule + { public final String column; public final boolean required; public final String reason; - public FilterRule(JsonNode node) { + public FilterRule(JsonNode node) + { this.column = node.path("column").asText(); this.required = node.path("required").asBoolean(false); this.reason = node.path("reason").asText(null); } } - public static class TableConfig { + public static class TableConfig + { public final Map metaColumns = new HashMap<>(); public final List filterRules = new ArrayList<>(); } @Inject - public ClpSplitMetadataConfig(ClpConfig config) { + public ClpSplitMetadataConfig(ClpConfig config) + { requireNonNull(config, "config is null"); if (null == config.getSplitMetadataConfigPath()) { @@ -93,13 +99,13 @@ public ClpSplitMetadataConfig(ClpConfig config) { throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); } - for (Iterator it = root.fieldNames(); it.hasNext();) { + for (Iterator it = root.fieldNames(); it.hasNext(); ) { String namespace = it.next(); JsonNode tableNode = root.get(namespace); TableConfig cfg = new TableConfig(); if (tableNode.has("metaColumns")) { - for (Iterator m = tableNode.get("metaColumns").fieldNames(); m.hasNext();) { + for (Iterator m = tableNode.get("metaColumns").fieldNames(); m.hasNext(); ) { String colName = m.next(); cfg.metaColumns.put(colName, new MetaColumn(colName, tableNode.get("metaColumns").get(colName))); } @@ -113,7 +119,8 @@ public ClpSplitMetadataConfig(ClpConfig config) { } } - public Map getMetadataColumns(SchemaTableName name) { + public Map getMetadataColumns(SchemaTableName name) + { TableConfig cfg = getTableConfig(name); Map result = new LinkedHashMap<>(); for (MetaColumn c : cfg.metaColumns.values()) { @@ -134,7 +141,8 @@ public Set getRequiredColumns(SchemaTableName name) return requiredColumns; } - public Map getExposedToOriginalMapping(SchemaTableName name) { + public Map getExposedToOriginalMapping(SchemaTableName name) + { TableConfig cfg = getTableConfig(name); Map mapping = new HashMap<>(); for (MetaColumn c : cfg.metaColumns.values()) { @@ -143,7 +151,8 @@ public Map getExposedToOriginalMapping(SchemaTableName name) { return mapping; } - public Set getDataColumnsWithRangeBounds(SchemaTableName name) { + public Set getDataColumnsWithRangeBounds(SchemaTableName name) + { TableConfig cfg = getTableConfig(name); Set result = new LinkedHashSet<>(); for (MetaColumn c : cfg.metaColumns.values()) { @@ -154,7 +163,8 @@ public Set getDataColumnsWithRangeBounds(SchemaTableName name) { return result; } - public Map> getDataColumnRangeMapping(SchemaTableName name) { + public Map> getDataColumnRangeMapping(SchemaTableName name) + { TableConfig cfg = getTableConfig(name); Map> mapping = new HashMap<>(); for (MetaColumn c : cfg.metaColumns.values()) { @@ -166,7 +176,8 @@ public Map> getDataColumnRangeMapping(SchemaTableNam return mapping; } - private TableConfig getTableConfig(SchemaTableName name) { + private TableConfig getTableConfig(SchemaTableName name) + { TableConfig merged = new TableConfig(); List namespaces = new ArrayList<>(); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java deleted file mode 100644 index 6c4b10b2e0a4b..0000000000000 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp; - -import com.facebook.airlift.log.Logger; -import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; -import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; -import com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType; -import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; -import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; -import org.apache.commons.io.FileUtils; -import org.apache.commons.math3.util.Pair; - -import java.io.File; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.List; -import java.util.Map; - -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.COLUMN_METADATA_TABLE_COLUMN_NAME; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.COLUMN_METADATA_TABLE_COLUMN_TYPE; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_COLUMN_ARCHIVE_STORAGE_DIRECTORY; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_COLUMN_NAME; -import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_SUFFIX; -import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_COLUMN_ID; -import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_SUFFIX; -import static java.lang.String.format; -import static java.util.UUID.randomUUID; -import static org.testng.Assert.fail; - -public final class ClpMetadataDbSetUp -{ - public static final String METADATA_DB_PASSWORD = ""; - public static final String METADATA_DB_TABLE_PREFIX = "clp_"; - public static final String METADATA_DB_URL_TEMPLATE = "jdbc:h2:file:%s;MODE=MySQL;DATABASE_TO_UPPER=FALSE"; - public static final String METADATA_DB_USER = "sa"; - public static final String ARCHIVES_STORAGE_DIRECTORY_BASE = "/tmp/archives/"; - - private static final Logger log = Logger.get(ClpMetadataDbSetUp.class); - private static final String DATASETS_TABLE_NAME = METADATA_DB_TABLE_PREFIX + DATASETS_TABLE_SUFFIX; - private static final String ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP = "begin_timestamp"; - private static final String ARCHIVES_TABLE_COLUMN_PAGINATION_ID = "pagination_id"; - private static final String ARCHIVES_TABLE_COLUMN_END_TIMESTAMP = "end_timestamp"; - - private ClpMetadataDbSetUp() - { - throw new UnsupportedOperationException("This is a utility class and cannot be instantiated"); - } - - public static DbHandle getDbHandle(String dbName) - { - return new DbHandle(format("/tmp/presto-clp-test-%s/%s", randomUUID(), dbName)); - } - - public static ClpMetadata setupMetadata(DbHandle dbHandle, Map>> clpFields) - { - final String metadataDbUrl = format(METADATA_DB_URL_TEMPLATE, dbHandle.dbPath); - final String columnMetadataTableSuffix = "_column_metadata"; - - try (Connection conn = DriverManager.getConnection(metadataDbUrl, METADATA_DB_USER, METADATA_DB_PASSWORD); Statement stmt = conn.createStatement()) { - createDatasetsTable(stmt); - - for (Map.Entry>> entry : clpFields.entrySet()) { - String tableName = entry.getKey(); - String columnMetadataTableName = METADATA_DB_TABLE_PREFIX + tableName + columnMetadataTableSuffix; - String createColumnMetadataSQL = format( - "CREATE TABLE IF NOT EXISTS %s (" + - " %s VARCHAR(512) NOT NULL," + - " %s TINYINT NOT NULL," + - " PRIMARY KEY (%s, %s))", - columnMetadataTableName, - COLUMN_METADATA_TABLE_COLUMN_NAME, - COLUMN_METADATA_TABLE_COLUMN_TYPE, - COLUMN_METADATA_TABLE_COLUMN_NAME, - COLUMN_METADATA_TABLE_COLUMN_TYPE); - String insertColumnMetadataSQL = format( - "INSERT INTO %s (%s, %s) VALUES (?, ?)", - columnMetadataTableName, - COLUMN_METADATA_TABLE_COLUMN_NAME, - COLUMN_METADATA_TABLE_COLUMN_TYPE); - - stmt.execute(createColumnMetadataSQL); - updateDatasetsTable(conn, tableName); - - try (PreparedStatement pstmt = conn.prepareStatement(insertColumnMetadataSQL)) { - for (Pair record : entry.getValue()) { - pstmt.setString(1, record.getFirst()); - pstmt.setByte(2, record.getSecond().getType()); - pstmt.addBatch(); - } - pstmt.executeBatch(); - } - } - } - catch (SQLException e) { - fail(e.getMessage()); - } - - ClpConfig config = new ClpConfig() - .setPolymorphicTypeEnabled(true) - .setMetadataDbUrl(metadataDbUrl) - .setMetadataDbUser(METADATA_DB_USER) - .setMetadataDbPassword(METADATA_DB_PASSWORD) - .setMetadataTablePrefix(METADATA_DB_TABLE_PREFIX); - ClpMetadataProvider metadataProvider = new ClpMySqlMetadataProvider(config); - return new ClpMetadata(config, metadataProvider); - } - - public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map> splits) - { - final String metadataDbUrl = format(METADATA_DB_URL_TEMPLATE, dbHandle.dbPath); - final String archiveTableFormat = METADATA_DB_TABLE_PREFIX + "%s" + ARCHIVES_TABLE_SUFFIX; - - try (Connection conn = DriverManager.getConnection(metadataDbUrl, METADATA_DB_USER, METADATA_DB_PASSWORD); Statement stmt = conn.createStatement()) { - createDatasetsTable(stmt); - - // Create and populate archive tables - for (Map.Entry> tableSplits : splits.entrySet()) { - String tableName = tableSplits.getKey(); - updateDatasetsTable(conn, tableName); - - String archiveTableName = format(archiveTableFormat, tableSplits.getKey()); - String createArchiveTableSQL = format( - "CREATE TABLE IF NOT EXISTS %s (" + - "%s BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, " + - "%s VARCHAR(64) NOT NULL, " + - "%s BIGINT, " + - "%s BIGINT)", - archiveTableName, - ARCHIVES_TABLE_COLUMN_PAGINATION_ID, - ARCHIVES_TABLE_COLUMN_ID, - ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP, - ARCHIVES_TABLE_COLUMN_END_TIMESTAMP); - - stmt.execute(createArchiveTableSQL); - - String insertArchiveTableSQL = format( - "INSERT INTO %s (%s, %s, %s) VALUES (?, ?, ?)", - archiveTableName, - ARCHIVES_TABLE_COLUMN_ID, - ARCHIVES_TABLE_COLUMN_BEGIN_TIMESTAMP, - ARCHIVES_TABLE_COLUMN_END_TIMESTAMP); - try (PreparedStatement pstmt = conn.prepareStatement(insertArchiveTableSQL)) { - for (ArchivesTableRow split : tableSplits.getValue()) { - pstmt.setString(1, split.id); - pstmt.setLong(2, split.beginTimestamp); - pstmt.setLong(3, split.endTimestamp); - pstmt.addBatch(); - } - pstmt.executeBatch(); - } - } - } - catch (SQLException e) { - fail(e.getMessage()); - } - - ClpConfig config = new ClpConfig() - .setPolymorphicTypeEnabled(true) - .setMetadataDbUrl(metadataDbUrl) - .setMetadataDbUser(METADATA_DB_USER) - .setMetadataDbPassword(METADATA_DB_PASSWORD) - .setMetadataTablePrefix(METADATA_DB_TABLE_PREFIX); - return new ClpMySqlSplitProvider(config, new ClpSplitMetadataConfig(config)); - } - - public static void tearDown(DbHandle dbHandle) - { - File dir = new File(dbHandle.dbPath).getParentFile(); - if (dir.exists()) { - try { - FileUtils.deleteDirectory(dir); - log.info("Deleted database dir" + dir.getAbsolutePath()); - } - catch (IOException e) { - log.warn("Failed to delete directory " + dir + ": " + e.getMessage()); - } - } - } - - private static void createDatasetsTable(Statement stmt) - throws SQLException - { - final String createDatasetsTableSql = format( - "CREATE TABLE IF NOT EXISTS %s (%s VARCHAR(255) PRIMARY KEY, %s VARCHAR(4096) NOT NULL)", - DATASETS_TABLE_NAME, - DATASETS_TABLE_COLUMN_NAME, - DATASETS_TABLE_COLUMN_ARCHIVE_STORAGE_DIRECTORY); - stmt.execute(createDatasetsTableSql); - } - - private static void updateDatasetsTable(Connection conn, String tableName) - throws SQLException - { - final String insertDatasetsTableSql = format( - "INSERT INTO %s (%s, %s) VALUES (?, ?)", - DATASETS_TABLE_NAME, - DATASETS_TABLE_COLUMN_NAME, - DATASETS_TABLE_COLUMN_ARCHIVE_STORAGE_DIRECTORY); - try (PreparedStatement pstmt = conn.prepareStatement(insertDatasetsTableSql)) { - pstmt.setString(1, tableName); - pstmt.setString(2, ARCHIVES_STORAGE_DIRECTORY_BASE + tableName); - pstmt.executeUpdate(); - } - } - - static final class DbHandle - { - private final String dbPath; - - DbHandle(String dbPath) - { - this.dbPath = dbPath; - } - - public String getDbPath() - { - return dbPath; - } - } - - static final class ArchivesTableRow - { - private final String id; - private final long beginTimestamp; - private final long endTimestamp; - - ArchivesTableRow(String id, long beginTimestamp, long endTimestamp) - { - this.id = id; - this.beginTimestamp = beginTimestamp; - this.endTimestamp = endTimestamp; - } - - public String getId() - { - return id; - } - - public long getBeginTimestamp() - { - return beginTimestamp; - } - - public long getEndTimestamp() - { - return endTimestamp; - } - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java index fb8a1505c7b89..3395ccb29a800 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java @@ -30,7 +30,6 @@ import java.util.Optional; import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static org.testng.Assert.assertEquals; @@ -141,8 +140,9 @@ private void compareListSplitsResult( Optional metadataSql, List expectedSplitIndexes) { + final String storageBase = "/tmp/archives/"; String tableName = entry.getKey(); - String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName; + String tablePath = storageBase + tableName; ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath), Optional.empty(), diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index e630b765736a5..397db270471ee 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -27,7 +27,6 @@ import com.facebook.presto.plugin.clp.optimization.ClpUdfRewriter; import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnHandle; -import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.VariableAllocator; import com.facebook.presto.spi.WarningCollector; import com.facebook.presto.spi.plan.PlanNode; @@ -46,7 +45,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import org.apache.commons.math3.util.Pair; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -61,13 +59,6 @@ import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.metadata.FunctionExtractor.extractFunctions; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_PASSWORD; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_TABLE_PREFIX; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_URL_TEMPLATE; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_USER; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.getDbHandle; -import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupMetadata; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Boolean; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.ClpString; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Float; @@ -80,7 +71,6 @@ import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.node; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.project; import static com.facebook.presto.testing.TestingSession.testSessionBuilder; -import static java.lang.String.format; @Test(singleThreaded = true) public class TestClpUdfRewriter @@ -92,7 +82,6 @@ public class TestClpUdfRewriter .build(); private ClpMockMetadataDatabase mockMetadataDatabase; - private ClpMetadataDbSetUp.DbHandle dbHandle; ClpTableHandle table; private LocalQueryRunner localQueryRunner; @@ -142,7 +131,9 @@ public void setUp() public void tearDown() { localQueryRunner.close(); - ClpMetadataDbSetUp.tearDown(dbHandle); + if (null != mockMetadataDatabase) { + mockMetadataDatabase.teardown(); + } } @Test diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java index 2794101a0db22..0a4e2fe60b46e 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java @@ -32,7 +32,7 @@ @Test(singleThreaded = true) public class TestClpMySqlSplitMetadataConfig - extends TestClpQueryBase + extends TestClpQueryBase { private String splitMetadataConfigPath; @@ -54,7 +54,7 @@ public void remapSplitFilterPushDownExpression() config.setSplitMetadataConfigPath(splitMetadataConfigPath); ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config); - final SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + final SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( functionAndTypeManager, standardFunctionResolution, diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java index d950d953d0cff..2154673049278 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java @@ -14,6 +14,7 @@ package com.facebook.presto.plugin.clp.split.filter; import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableSet; @@ -35,9 +36,9 @@ import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) -public class TestClpSplitFilterConfigCommon +public class TestClpSplitMetadataConfigCommon { - private String filterConfigPath; + private String splitMetadataConfigPath; @BeforeMethod public void setUp() throws IOException, URISyntaxException @@ -47,15 +48,15 @@ public void setUp() throws IOException, URISyntaxException throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); } - filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); } @Test public void checkRequiredFilters() { ClpConfig config = new ClpConfig(); - config.setSplitMetadataConfig(filterConfigPath); - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); + config.setSplitMetadataConfigPath(splitMetadataConfigPath); + ClpSplitMetadataConfig filterProvider = new ClpSplitMetadataConfig(config); Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1"))); assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( testTableScopeSet, @@ -69,7 +70,7 @@ public void checkRequiredFilters() public void getFilterNames() { ClpConfig config = new ClpConfig(); - config.setSplitMetadataConfig(filterConfigPath); + config.setSplitMetadataConfig(splitMetadataConfigPath); ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); Set catalogFilterNames = filterProvider.getColumnNames("clp"); assertEquals(ImmutableSet.of("level"), catalogFilterNames); From 422ff69b49040c303a1c92fd2d431dc8841caf02 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 19:26:49 -0500 Subject: [PATCH 08/26] doc update --- presto-docs/src/main/sphinx/connector/clp.rst | 96 ++++++++++++++++--- 1 file changed, 83 insertions(+), 13 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index fc1631c3766f8..09db9eba06717 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -78,8 +78,8 @@ Property Name Description ``clp.metadata-refresh-interval`` Specifies how frequently metadata is refreshed from the source, in 60 seconds. Set this to a lower value for frequently changing datasets or to a higher value to reduce load. -``clp.split-filter-config`` The absolute path to an optional split filter config file. See the - :ref:`Split Filter Config File` section for +``clp.split-metadata-config-path`` The absolute path to an optional split metadata config file. See the + :ref:`Split Metadata Config File` section for details. ``clp.split-filter-provider-type`` Specifies the split filter provider type. Currently, the only supported ``mysql`` type is a MySQL database, which is also used by the CLP package to @@ -103,16 +103,86 @@ If you prefer to use a different source--or the same source with a custom implem implementations of the ``ClpMetadataProvider`` and ``ClpSplitProvider`` interfaces, and configure the connector accordingly. -.. _split-filter-config-file: +.. _split-metadata-config-file: + +************************** +Split Metadata Config File +************************** + +The split metadata config file allows you to configure the set of metadata columns that can be used in ``WHERE`` clause +to filter out irrelevant splits (CLP archives) when querying CLP's metadata database. This can significantly improve +performance by reducing the amount of data that needs to be scanned. For a given query, the connector will translate +any supported filter predicates that involve the configured columns into a query against CLP's metadata database. + +Structure Overview +================== + +The configuration is a JSON object where each top-level key represents a namespace. Each namespace defines metadata and +filtering behavior for one or more tables. + +A *namespace* can be one of the following: + +- An empty string ``""`` (applies globally to all schemas and tables) +- A schema name (applies to all tables in the schema) +- A schema and table name delimited by ``"."`` (applies only to that table) + +Rules are **merged hierarchically**, with more specific namespaces overriding or extending parent ones. + + +Namespace Configuration Format +============================== + +Each namespace maps to an object containing the following fields: + +- ``metaColumns``: an object defining metadata columns for this namespace. +- ``filterRules``: an array specifying filters that must or can be used in queries. + +Meta Columns +------------ + +Each entry under ``metaColumns`` describes a single metadata column. Metadata columns represent attributes of data +splits, such as file path, partition date, or timestamp range. + +Supported fields: + +- ``type``: string, the Presto type of the metadata column (e.g., ``STRING``, ``DATE``, ``LONG``) +- ``exposedAs``: string, optional, the name exposed to queries (defaults to the original column name) +- ``description``: string, optional, human-readable description +- ``filter.asRangeBoundOf``: string, optional, the data column name this metadata column bounds +- ``filter.boundType``: string, optional, must be ``"lower"`` or ``"upper"``, indicates bound type + +Example:: + + "metaColumns": { + "partition_date": { + "type": "DATE", + "exposedAs": "partition_date", + "description": "Logical partition of the data file" + }, + "$ir_path": { + "type": "STRING", + "exposedAs": "tpath", + "description": "Internal file path" + }, + "begin_timestamp": { + "type": "LONG", + "description": "Start of the timestamp range for the file", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "lower" + } + }, + "end_timestamp": { + "type": "LONG", + "description": "End of the timestamp range for the file", + "filter": { + "asRangeBoundOf": "msg.timestamp", + "boundType": "upper" + } + } + } -*************************** -Split Filter Config File -*************************** -The split filter config file allows you to configure the set of columns that can be used to filter out irrelevant -splits (CLP archives) when querying CLP's metadata database. This can significantly improve performance by reducing the -amount of data that needs to be scanned. For a given query, the connector will translate any supported filter predicates -that involve the configured columns into a query against CLP's metadata database. The configuration is a JSON object where each key under the root represents a :ref:`scope` and each scope maps to an array of :ref:`filter configs`. @@ -124,9 +194,9 @@ Scopes A *scope* can be one of the following: -- A catalog name -- A fully-qualified schema name -- A fully-qualified table name +- An empty string +- A schema name +- A name with schema name and table name delimited by ".". Filter configs under a particular scope will apply to all child scopes. For example, filter configs at the schema level will apply to all tables within that schema. From e517b17b7c64fe9dde6dcbddb3699354e6f3f0f7 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 19:59:41 -0500 Subject: [PATCH 09/26] doc update --- presto-docs/src/main/sphinx/connector/clp.rst | 174 +++++++++--------- 1 file changed, 87 insertions(+), 87 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index 09db9eba06717..dcf803724aee6 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -126,8 +126,17 @@ A *namespace* can be one of the following: - A schema name (applies to all tables in the schema) - A schema and table name delimited by ``"."`` (applies only to that table) -Rules are **merged hierarchically**, with more specific namespaces overriding or extending parent ones. +When resolving configuration for a specific table: +1. The connector merges definitions from matching namespaces in this order: + + - ``""`` (global) + - ``schema`` + - ``schema.table`` + +2. Later (more specific) namespaces **override** or **extend** parent configurations. + +3. Duplicate filter rules are deduplicated by ``column`` name. Namespace Configuration Format ============================== @@ -141,11 +150,12 @@ Meta Columns ------------ Each entry under ``metaColumns`` describes a single metadata column. Metadata columns represent attributes of data -splits, such as file path, partition date, or timestamp range. +splits, such as file path, partition date, or timestamp range. The key of each entry is the column name used to filter +splits (e.g. in a metadata filter SQL query). However, we can expose a different name to Presto. Supported fields: -- ``type``: string, the Presto type of the metadata column (e.g., ``STRING``, ``DATE``, ``LONG``) +- ``type``: string, the Presto type of the metadata column (e.g., ``VARCHAR``, ``DATE``, ``BIGINT``) - ``exposedAs``: string, optional, the name exposed to queries (defaults to the original column name) - ``description``: string, optional, human-readable description - ``filter.asRangeBoundOf``: string, optional, the data column name this metadata column bounds @@ -182,111 +192,101 @@ Example:: } } +Filter Rules +------------ +Each entry in ``filterRules`` specifies a data column or a metadata column that must or can appear in query filters. +These rules ensure that queries contain sufficient constraints for efficient split selection. -The configuration is a JSON object where each key under the root represents a :ref:`scope` and each scope maps -to an array of :ref:`filter configs`. - -.. _scopes: - -Scopes -====== - -A *scope* can be one of the following: - -- An empty string -- A schema name -- A name with schema name and table name delimited by ".". - -Filter configs under a particular scope will apply to all child scopes. For example, filter configs at the schema level -will apply to all tables within that schema. - -.. _filter-configs: - -Filter Configs -============== - -Each filter config indicates how a *data column*---i.e., a column in the Presto table---should be mapped to one or -more *metadata columns*---i.e., columns in CLP's metadata database. - -For example, an integer data column (e.g., ``timestamp``), may be remapped to a pair of metadata columns that represent -the range of possible values (e.g., ``begin_timestamp`` and ``end_timestamp``) of the data column within a split. - -Each filter config has the following options: - -- ``columnName``: The data column's name. - -- ``customOptions`` *(optional)*: Custom options for a split filter provider. Options for the default split filter - provider (``ClpMySqlSplitFilterProvider``) are :ref:`below`. - -- ``required`` *(optional, defaults to false)*: Whether the filter **must** be present in the generated metadata query. - If a required filter is missing or cannot be added to the metadata query, the original query will be rejected. - -.. _clp-mysql-split-filter-provider-config: - -ClpMySqlSplitFilterProvider-Specific Filter Config ------------------------------------------------ - -For ``ClpMySqlSplitFilterProvider``, the ``customOptions`` option of the filter config has the following sub-options: +Supported fields: -- ``rangeMapping`` *(optional)*: an object with the following properties: +- ``column``: string, the name of the data column or metadata column (as seen by Presto) +- ``required``: boolean, optional, whether this column **must** appear in query filters (default: ``false``) +- ``reason``: string, optional, explanation of why the column is required - .. note:: This option is only valid if the column has a numeric type. +Example:: - - ``lowerBound``: The metadata column that represents the lower bound of values in a split for the data column. - - ``upperBound``: The metadata column that represents the upper bound of values in a split for the data column. + "filterRules": [ + { + "column": "msg.timestamp", + "required": true, + "reason": "Full scan would be too expensive without timestamp filtering." + } + ] -Filter Config Example ---------------------- +Complete Example +---------------- -The code block shows an example filter config file: +The code block shows an example of split metadata config file: .. code-block:: json { - "clp": [ - { - "columnName": "level" - } - ], - "clp.default": [ - { - "columnName": "author" - } - ], - "clp.default.table_1": [ - { - "columnName": "msg.timestamp", - "customOptions": { - "rangeMapping": { - "lowerBound": "begin_timestamp", - "upperBound": "end_timestamp" - } + "": { + "metaColumns": { + "global_meta": { + "type": "STRING", + "description": "Global metadata column" + } + }, + "filterRules": [] + }, + "default": { + "metaColumns": { + "partition_date": { + "type": "DATE", + "exposedAs": "partition_date", + "description": "Logical partition of the data file" + } + }, + }, + "default.table_1": { + "metaColumns": { + "begin_timestamp": { + "type": "LONG", + "filter": { "asRangeBoundOf": "msg.timestamp", "boundType": "lower" } }, - "required": true + "end_timestamp": { + "type": "LONG", + "filter": { "asRangeBoundOf": "msg.timestamp", "boundType": "upper" } + } }, - { - "columnName": "file_name" - } - ] + "filterRules": [ + { + "column": "msg.timestamp", + "required": true, + "reason": "Full scan would be too expensive without timestamp filtering." + } + ] + } } -- The first key-value pair adds the following filter configs for all schemas and tables under the ``clp`` catalog: +- The empty string namespace ``""`` defines **global metadata** that applies to all schemas and tables. In this case, + ``global_meta`` column is available everywhere. + +- The schema namespace ``"default"`` defines metadata columns and filters for **all tables in the `default` schema**. + - ``partition_date`` is exposed as ``partition_date`` for query usage. + +- The table-specific namespace ``"default.table_1"`` defines metadata and filter rules for a **single table**: + - ``begin_timestamp`` and ``end_timestamp`` map to the data column ``msg.timestamp`` as lower and upper bounds. + This enables range-based filtering on timestamp values when generating split queries. + - The filter rule for ``msg.timestamp`` is marked as **required**, ensuring that queries without timestamp constraints + are rejected. - - The column ``level`` is used as-is without remapping. +Merging Behavior +---------------- -- The second key-value pair adds the following filter configs for all tables under the ``clp.default`` schema: +When resolving configuration for a specific table: - - The column ``author`` is used as-is without remapping. +1. The connector merges definitions from matching namespaces in this order: -- The third key-value pair adds two filter configs for the table ``clp.default.table_1``: + - ``""`` (global) + - ``schema`` + - ``schema.table`` - - The column ``msg.timestamp`` is remapped via a ``rangeMapping`` to the metadata columns ``begin_timestamp`` and - ``end_timestamp``, and is required to exist in every query. - - The column ``file_name`` is used as-is without remapping. +2. Later (more specific) namespaces **override** or **extend** parent configurations. -If you prefer to use a different format for ``customOptions``, you can provide your own implementation of the -``ClpSplitFilterProvider`` interface, and configure the connector accordingly. +3. Duplicate filter rules are deduplicated by ``column`` name. Supported SQL Expressions ========================= From 01aefab752a9e4abd3eabfb68579cef02d3cf221 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 20:00:07 -0500 Subject: [PATCH 10/26] refactor --- .../presto/plugin/clp/ClpConnectorFactory.java | 1 + .../plugin/clp/split/ClpSplitMetadataConfig.java | 13 +++++++++---- .../facebook/presto/plugin/clp/TestClpSplit.java | 2 +- .../presto/plugin/clp/TestClpUdfRewriter.java | 2 +- .../filter/TestClpMySqlSplitMetadataConfig.java | 2 +- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java index b3b802e058f58..3ef4451437cf1 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConnectorFactory.java @@ -54,6 +54,7 @@ public Connector create(String catalogName, Map config, Connecto try { Bootstrap app = new Bootstrap(new JsonModule(), new ClpModule(), binder -> { binder.bind(FunctionMetadataManager.class).toInstance(context.getFunctionMetadataManager()); + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); binder.bind(StandardFunctionResolution.class).toInstance(context.getStandardFunctionResolution()); binder.bind(TypeManager.class).toInstance(context.getTypeManager()); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index 3858dbec0270b..fdac6b918bd1d 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -13,6 +13,9 @@ */ package com.facebook.presto.plugin.clp.split; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.common.type.TypeSignature; import com.facebook.presto.plugin.clp.ClpConfig; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; @@ -39,6 +42,7 @@ public class ClpSplitMetadataConfig { private final Map tableConfigs = new HashMap<>(); + private final TypeManager typeManager; public static class MetaColumn { @@ -82,9 +86,10 @@ public static class TableConfig } @Inject - public ClpSplitMetadataConfig(ClpConfig config) + public ClpSplitMetadataConfig(ClpConfig config, TypeManager typeManager) { requireNonNull(config, "config is null"); + this.typeManager = requireNonNull(typeManager, "typeManager is null"); if (null == config.getSplitMetadataConfigPath()) { return; @@ -119,12 +124,12 @@ public ClpSplitMetadataConfig(ClpConfig config) } } - public Map getMetadataColumns(SchemaTableName name) + public Map getMetadataColumns(SchemaTableName name) { TableConfig cfg = getTableConfig(name); - Map result = new LinkedHashMap<>(); + Map result = new LinkedHashMap<>(); for (MetaColumn c : cfg.metaColumns.values()) { - result.put(c.exposedAs, c.type); + result.put(c.exposedAs, typeManager.getType(TypeSignature.parseTypeSignature(c.type))); } return result; } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java index 3395ccb29a800..1c8fc26b3be40 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java @@ -85,7 +85,7 @@ public void setUp() config, functionAndTypeManager, standardFunctionResolution, - new ClpSplitMetadataConfig(config)); + new ClpSplitMetadataConfig(config, functionAndTypeManager)); } @AfterMethod diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index 397db270471ee..4a49f72066045 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -122,7 +122,7 @@ public void setUp() localQueryRunner.getMetadata().registerBuiltInFunctions(extractFunctions(new ClpPlugin().getFunctions())); functionAndTypeManager = localQueryRunner.getMetadata().getFunctionAndTypeManager(); functionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); - splitMetadataConfig = new ClpSplitMetadataConfig(new ClpConfig()); + splitMetadataConfig = new ClpSplitMetadataConfig(new ClpConfig(), functionAndTypeManager); planNodeIdAllocator = new PlanNodeIdAllocator(); variableAllocator = new VariableAllocator(); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java index 0a4e2fe60b46e..1e67372d3316e 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java @@ -52,7 +52,7 @@ public void remapSplitFilterPushDownExpression() { ClpConfig config = new ClpConfig(); config.setSplitMetadataConfigPath(splitMetadataConfigPath); - ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config); + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); final SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( From cdebd8da74ca5efb876e9b5be1785e74a1069a21 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 20:26:22 -0500 Subject: [PATCH 11/26] fix compilation errors --- presto-clp/pom.xml | 5 - .../presto/plugin/clp/ClpErrorCode.java | 5 +- .../clp/optimization/ClpComputePushDown.java | 4 +- ...MySqlSplitMetadataExpressionConverter.java | 3 +- .../clp/split/ClpSplitMetadataConfig.java | 4 +- .../TestClpSplitFilterConfigCommon.java | 98 -------------- .../TestClpSplitMetadataConfigCommon.java | 120 ++++++++++++++++++ .../resources/test-mysql-split-metadata.json | 11 +- 8 files changed, 135 insertions(+), 115 deletions(-) delete mode 100644 presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java create mode 100644 presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java diff --git a/presto-clp/pom.xml b/presto-clp/pom.xml index d13b59968aff5..8ee37434861e2 100644 --- a/presto-clp/pom.xml +++ b/presto-clp/pom.xml @@ -65,11 +65,6 @@ provided - - com.fasterxml.jackson.core - jackson-core - - com.fasterxml.jackson.core jackson-databind diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java index 2530c013455cc..e2a68349be6e6 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java @@ -29,9 +29,8 @@ public enum ClpErrorCode CLP_UNSUPPORTED_TYPE(3, EXTERNAL), CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL), - CLP_SPLIT_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR), - CLP_MANDATORY_SPLIT_FILTER_NOT_VALID(11, USER_ERROR), - CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE(12, EXTERNAL); + CLP_SPLIT_METADATA_CONFIG_NOT_FOUND(10, USER_ERROR), + CLP_MANDATORY_COLUMN_NOT_IN_FILTER(11, USER_ERROR); private final ErrorCode errorCode; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 4664ca6345877..5999ce5341d9a 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -38,7 +38,7 @@ import java.util.Map; import java.util.Optional; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_SPLIT_FILTER_NOT_VALID; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; import static java.util.Objects.requireNonNull; @@ -83,7 +83,7 @@ public PlanNode visitTableScan(TableScanNode node, RewriteContext context) TableHandle tableHandle = node.getTable(); ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); if (!metadataConfig.getRequiredColumns(clpTableHandle.getSchemaTableName()).isEmpty()) { - throw new PrestoException(CLP_MANDATORY_SPLIT_FILTER_NOT_VALID, "required filters must be specified"); + throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "required filters must be specified"); } return super.visitTableScan(node, context); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index 9da1aa5c8bc19..36f10f196a4e4 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -35,6 +35,7 @@ import java.util.stream.Collectors; import static com.facebook.presto.common.function.OperatorType.IS_DISTINCT_FROM; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -68,7 +69,7 @@ public String transform(RowExpression expression) Set missing = new HashSet<>(requiredColumns); missing.removeAll(seenRequired); if (!missing.isEmpty()) { - throw new IllegalStateException("Missing required filter columns: " + missing); + throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "Missing required filter columns: " + missing); } return sql; } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index fdac6b918bd1d..de39ff0b40278 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -36,7 +36,7 @@ import java.util.Map; import java.util.Set; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_FILTER_CONFIG_NOT_FOUND; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_METADATA_CONFIG_NOT_FOUND; import static java.util.Objects.requireNonNull; public class ClpSplitMetadataConfig @@ -101,7 +101,7 @@ public ClpSplitMetadataConfig(ClpConfig config, TypeManager typeManager) root = mapper.readTree(Files.readAllBytes(Paths.get(config.getSplitMetadataConfigPath()))); } catch (IOException e) { - throw new PrestoException(CLP_SPLIT_FILTER_CONFIG_NOT_FOUND, "Failed to open split filter config file", e); + throw new PrestoException(CLP_SPLIT_METADATA_CONFIG_NOT_FOUND, "Failed to open split metadata config file", e); } for (Iterator it = root.fieldNames(); it.hasNext(); ) { diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java deleted file mode 100644 index 2154673049278..0000000000000 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.plugin.clp.split.filter; - -import com.facebook.presto.plugin.clp.ClpConfig; -import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; -import com.facebook.presto.spi.PrestoException; -import com.facebook.presto.spi.SchemaTableName; -import com.google.common.collect.ImmutableSet; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Paths; -import java.util.Set; - -import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; -import static java.lang.String.format; -import static java.util.UUID.randomUUID; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertThrows; -import static org.testng.Assert.assertTrue; - -@Test(singleThreaded = true) -public class TestClpSplitMetadataConfigCommon -{ - private String splitMetadataConfigPath; - - @BeforeMethod - public void setUp() throws IOException, URISyntaxException - { - URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); - if (resource == null) { - throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); - } - - splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); - } - - @Test - public void checkRequiredFilters() - { - ClpConfig config = new ClpConfig(); - config.setSplitMetadataConfigPath(splitMetadataConfigPath); - ClpSplitMetadataConfig filterProvider = new ClpSplitMetadataConfig(config); - Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1"))); - assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( - testTableScopeSet, - "(\"level\" >= 1 AND \"level\" <= 3)")); - filterProvider.checkContainsRequiredFilters( - testTableScopeSet, - "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"); - } - - @Test - public void getFilterNames() - { - ClpConfig config = new ClpConfig(); - config.setSplitMetadataConfig(splitMetadataConfigPath); - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - Set catalogFilterNames = filterProvider.getColumnNames("clp"); - assertEquals(ImmutableSet.of("level"), catalogFilterNames); - Set schemaFilterNames = filterProvider.getColumnNames("clp.default"); - assertEquals(ImmutableSet.of("level", "author"), schemaFilterNames); - Set tableFilterNames = filterProvider.getColumnNames("clp.default.table_1"); - assertEquals(ImmutableSet.of("level", "author", "msg.timestamp", "file_name"), tableFilterNames); - } - - @Test - public void handleEmptyAndInvalidSplitFilterConfig() - { - ClpConfig config = new ClpConfig(); - - // Empty config - ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); - assertTrue(filterProvider.getColumnNames("clp").isEmpty()); - assertTrue(filterProvider.getColumnNames("abc.xyz").isEmpty()); - assertTrue(filterProvider.getColumnNames("abc.opq.xyz").isEmpty()); - - // Invalid config - config.setSplitMetadataConfig(randomUUID().toString()); - assertThrows(PrestoException.class, () -> new ClpMySqlSplitFilterProvider(config)); - } -} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java new file mode 100644 index 0000000000000..8b9fbf22c5c6b --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java @@ -0,0 +1,120 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.common.type.Type; +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.TestClpQueryBase; +import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; +import java.util.Map; +import java.util.Set; + +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static java.util.UUID.randomUUID; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestClpSplitMetadataConfigCommon + extends TestClpQueryBase +{ + private String splitMetadataConfigPath; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); + } + + splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + } + + @Test + public void checkRequiredFilters() + { + SessionHolder sessionHolder = new SessionHolder(); + ClpConfig config = new ClpConfig(); + config.setSplitMetadataConfigPath(splitMetadataConfigPath); + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + + ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( + functionAndTypeManager, + standardFunctionResolution, + splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), + splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), + splitMetadataConfig.getRequiredColumns(schemaTableName)); + converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", sessionHolder)); + assertThrows(PrestoException.class, () + -> converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", sessionHolder))); + + converter.transform(getRowExpression("(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)", sessionHolder)); + } + + @Test + public void getFilterNames() + { + ClpConfig config = new ClpConfig(); + config.setSplitMetadataConfigPath(splitMetadataConfigPath); + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + + Map columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("other", "test")); + assertEquals(ImmutableMap.of("level", VARCHAR), columns); + columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "table_2")); + assertEquals(ImmutableMap.of("level", VARCHAR, "author", VARCHAR), columns); + columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "table_1")); + assertEquals(columns, ImmutableMap.of( + "level", VARCHAR, + "author", VARCHAR, + "begin_timestamp", BIGINT, + "end_timestamp", BIGINT, + "file_name", VARCHAR)); + Set dataColumnsWithRangeBounds = + splitMetadataConfig.getDataColumnsWithRangeBounds(new SchemaTableName("default", "table_1")); + assertEquals(dataColumnsWithRangeBounds, ImmutableSet.of("msg.timestamp")); + } + + @Test + public void handleEmptyAndInvalidSplitFilterConfig() + { + ClpConfig config = new ClpConfig(); + + // Empty config + ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + assertTrue(splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "clp")).isEmpty()); + assertTrue(splitMetadataConfig.getMetadataColumns(new SchemaTableName("abc", "xyz")).isEmpty()); + assertTrue(splitMetadataConfig.getMetadataColumns(new SchemaTableName("abc.opq", "xyz")).isEmpty()); + + // Invalid config + config.setSplitMetadataConfigPath(randomUUID().toString()); + assertThrows(PrestoException.class, () -> new ClpSplitMetadataConfig(config, functionAndTypeManager)); + } +} diff --git a/presto-clp/src/test/resources/test-mysql-split-metadata.json b/presto-clp/src/test/resources/test-mysql-split-metadata.json index 19fee2a29c6c8..de0a47f094cb6 100644 --- a/presto-clp/src/test/resources/test-mysql-split-metadata.json +++ b/presto-clp/src/test/resources/test-mysql-split-metadata.json @@ -2,32 +2,35 @@ "": { "metaColumns": { "level": { - "type": "STRING" + "type": "VARCHAR" } } }, "clp.default": { "metaColumns": { "author": { - "type": "STRING" + "type": "VARCHAR" } } }, "clp.default.table_1": { "metaColumns": { "begin_timestamp": { - "type": "STRING", + "type": "BIGINT", "filter": { "asRangeBoundOf": "msg.timestamp", "boundType": "lower" } }, "end_timestamp": { - "type": "STRING", + "type": "BIGINT", "filter": { "asRangeBoundOf": "msg.timestamp", "boundType": "upper" } + }, + "file_name": { + "type": "VARCHAR" } }, "filterRules": [ From 6430711ad65716b934ee9838f3c9baa130233cd7 Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 20:36:58 -0500 Subject: [PATCH 12/26] fix issues in TestClpSplitMetadataConfigCommon --- .../presto/plugin/clp/TestClpQueryBase.java | 5 +++++ .../filter/TestClpSplitMetadataConfigCommon.java | 15 +++++++++------ .../test/resources/test-mysql-split-metadata.json | 6 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java index ee259d67e17c9..adebc714b91e6 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpQueryBase.java @@ -115,6 +115,11 @@ protected RowExpression toRowExpression(Expression expression, TypeProvider type return SqlToRowExpressionTranslator.translate(expression, expressionTypes, ImmutableMap.of(), functionAndTypeManager, session); } + protected RowExpression getRowExpression(String sqlExpression, TypeProvider typeProvider, SessionHolder sessionHolder) + { + return toRowExpression(expression(sqlExpression), typeProvider, sessionHolder.getSession()); + } + protected RowExpression getRowExpression(String sqlExpression, SessionHolder sessionHolder) { return toRowExpression(expression(sqlExpression), typeProvider, sessionHolder.getSession()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java index 8b9fbf22c5c6b..a7a531253c9a3 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java @@ -20,6 +20,7 @@ import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.sql.planner.TypeProvider; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.testng.annotations.BeforeMethod; @@ -72,11 +73,13 @@ public void checkRequiredFilters() splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), splitMetadataConfig.getRequiredColumns(schemaTableName)); - converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", sessionHolder)); + + TypeProvider typeProvider = TypeProvider.viewOf(ImmutableMap.of("level", BIGINT, "msg.timestamp", BIGINT)); assertThrows(PrestoException.class, () - -> converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", sessionHolder))); + -> converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", typeProvider, sessionHolder))); - converter.transform(getRowExpression("(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)", sessionHolder)); + converter.transform( + getRowExpression("(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)", typeProvider, sessionHolder)); } @Test @@ -87,12 +90,12 @@ public void getFilterNames() ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); Map columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("other", "test")); - assertEquals(ImmutableMap.of("level", VARCHAR), columns); + assertEquals(ImmutableMap.of("level", BIGINT), columns); columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "table_2")); - assertEquals(ImmutableMap.of("level", VARCHAR, "author", VARCHAR), columns); + assertEquals(ImmutableMap.of("level", BIGINT, "author", VARCHAR), columns); columns = splitMetadataConfig.getMetadataColumns(new SchemaTableName("default", "table_1")); assertEquals(columns, ImmutableMap.of( - "level", VARCHAR, + "level", BIGINT, "author", VARCHAR, "begin_timestamp", BIGINT, "end_timestamp", BIGINT, diff --git a/presto-clp/src/test/resources/test-mysql-split-metadata.json b/presto-clp/src/test/resources/test-mysql-split-metadata.json index de0a47f094cb6..94c1e234ff717 100644 --- a/presto-clp/src/test/resources/test-mysql-split-metadata.json +++ b/presto-clp/src/test/resources/test-mysql-split-metadata.json @@ -2,18 +2,18 @@ "": { "metaColumns": { "level": { - "type": "VARCHAR" + "type": "BIGINT" } } }, - "clp.default": { + "default": { "metaColumns": { "author": { "type": "VARCHAR" } } }, - "clp.default.table_1": { + "default.table_1": { "metaColumns": { "begin_timestamp": { "type": "BIGINT", From c5b67762da56a52a419ce412f86bdfaeec8b8d4a Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 20:44:57 -0500 Subject: [PATCH 13/26] fix TestClpUdfRewriter --- .../java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index 4a49f72066045..c41eab09e629c 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -27,6 +27,7 @@ import com.facebook.presto.plugin.clp.optimization.ClpUdfRewriter; import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.VariableAllocator; import com.facebook.presto.spi.WarningCollector; import com.facebook.presto.spi.plan.PlanNode; @@ -95,6 +96,7 @@ public class TestClpUdfRewriter public void setUp() { final String tableName = "test"; + table = new ClpTableHandle(new SchemaTableName("default", tableName), "test"); mockMetadataDatabase = ClpMockMetadataDatabase.builder().build(); mockMetadataDatabase.addTableToDatasetsTableIfNotExist(ImmutableList.of(tableName)); mockMetadataDatabase.addColumnMetadata(ImmutableMap.of( From d816d795a821a4bf6a2871c3c01394af57184bee Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 21:21:01 -0500 Subject: [PATCH 14/26] fix TestClpMySqlSplitMetadataConfig and TestClpSplit --- .../optimization/ClpFilterToKqlConverter.java | 24 +++++- ...MySqlSplitMetadataExpressionConverter.java | 25 +++++- .../presto/plugin/clp/TestClpSplit.java | 6 +- .../TestClpMySqlSplitMetadataConfig.java | 82 +++++++++++++------ 4 files changed, 108 insertions(+), 29 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index 9f2e5a80ac58b..2e16132b0b79d 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -15,6 +15,7 @@ import com.facebook.presto.common.function.OperatorType; import com.facebook.presto.common.type.DecimalType; +import com.facebook.presto.common.type.Decimals; import com.facebook.presto.common.type.RowType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; @@ -35,6 +36,8 @@ import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; +import java.math.BigDecimal; +import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -199,10 +202,25 @@ public ClpExpression visitExpression(RowExpression node, Void context) */ private String getLiteralString(ConstantExpression literal) { - if (literal.getValue() instanceof Slice) { - return ((Slice) literal.getValue()).toStringUtf8(); + Object value = literal.getValue(); + Type type = literal.getType(); + if (value instanceof Slice) { + if (type instanceof DecimalType) { + DecimalType decimalType = (DecimalType) type; + BigInteger unscaled = Decimals.decodeUnscaledValue((Slice) value); + BigDecimal decimalValue = new BigDecimal(unscaled, decimalType.getScale()); + return decimalValue.toPlainString(); + } + return "'" + ((Slice) value).toStringUtf8().replace("'", "''") + "'"; + } + + if (type instanceof DecimalType && value instanceof Long) { + DecimalType decimalType = (DecimalType) type; + BigDecimal decimalValue = new BigDecimal(BigInteger.valueOf((Long) value), decimalType.getScale()); + return decimalValue.toPlainString(); } - return literal.toString(); + + return value.toString(); } /** diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index 36f10f196a4e4..65b09ef26dea2 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -14,6 +14,9 @@ package com.facebook.presto.plugin.clp.split; import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.common.type.DecimalType; +import com.facebook.presto.common.type.Decimals; +import com.facebook.presto.common.type.Type; import com.facebook.presto.plugin.clp.ClpErrorCode; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.FunctionHandle; @@ -28,6 +31,8 @@ import com.facebook.presto.spi.relation.VariableReferenceExpression; import io.airlift.slice.Slice; +import java.math.BigDecimal; +import java.math.BigInteger; import java.util.HashSet; import java.util.Map; import java.util.Optional; @@ -86,6 +91,11 @@ public String visitCall(CallExpression node, Void context) Optional operatorTypeOptional = functionMetadata.getOperatorType(); if (operatorTypeOptional.isPresent()) { OperatorType operatorType = operatorTypeOptional.get(); + if (operatorType == OperatorType.NEGATION) { + String value = node.getArguments().get(0).accept(this, null); + return "-" + value; + } + if (operatorType.isComparisonOperator() && operatorType != IS_DISTINCT_FROM) { String variableName = node.getArguments().get(0).accept(this, null); String literalString = node.getArguments().get(1).accept(this, null); @@ -123,10 +133,23 @@ public String visitSpecialForm(SpecialFormExpression node, Void context) public String visitConstant(ConstantExpression node, Void context) { Object value = node.getValue(); + Type type = node.getType(); if (value instanceof Slice) { + if (type instanceof DecimalType) { + DecimalType decimalType = (DecimalType) type; + BigInteger unscaled = Decimals.decodeUnscaledValue((Slice) value); + BigDecimal decimalValue = new BigDecimal(unscaled, decimalType.getScale()); + return decimalValue.toPlainString(); + } return "'" + ((Slice) value).toStringUtf8().replace("'", "''") + "'"; } + if (type instanceof DecimalType && value instanceof Long) { + DecimalType decimalType = (DecimalType) type; + BigDecimal decimalValue = new BigDecimal(BigInteger.valueOf((Long) value), decimalType.getScale()); + return decimalValue.toPlainString(); + } + return value.toString(); } @@ -163,7 +186,7 @@ private String rewriteComparisonWithBounds(String variableName, OperatorType ope return format("%s %s %s", lower, operator.getOperator(), literal); case EQUAL: // "col = 5" → "(lower_col <= 5 AND upper_col >= 5)" - return format("(%s <= %s AND %s >= %s)", lower, literal, upper, literal); + return format("(%s <= %s) AND (%s >= %s)", lower, literal, upper, literal); default: return null; } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java index 1c8fc26b3be40..4ee73f9680de6 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplit.java @@ -19,6 +19,7 @@ import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.plugin.clp.split.ClpSplitProvider; import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.sql.planner.TypeProvider; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.testng.annotations.AfterMethod; @@ -29,6 +30,7 @@ import java.util.Map; import java.util.Optional; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; @@ -40,6 +42,7 @@ public class TestClpSplit { private ClpMockMetadataDatabase mockMetadataDatabase; private ClpSplitProvider clpSplitProvider; + private TypeProvider typeProvider; private Map tableSplits; @BeforeMethod @@ -86,6 +89,7 @@ public void setUp() functionAndTypeManager, standardFunctionResolution, new ClpSplitMetadataConfig(config, functionAndTypeManager)); + typeProvider = TypeProvider.viewOf(ImmutableMap.of("begin_timestamp", BIGINT, "end_timestamp", BIGINT)); } @AfterMethod @@ -146,7 +150,7 @@ private void compareListSplitsResult( ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, tableName), tablePath), Optional.empty(), - metadataSql.map(sql -> getRowExpression(sql, new SessionHolder()))); + metadataSql.map(sql -> getRowExpression(sql, typeProvider, new SessionHolder()))); List expectedSplitPaths = expectedSplitIndexes.stream() .map(expectedSplitIndex -> format("%s/%s", tablePath, entry.getValue().getIds().get(expectedSplitIndex))) .collect(toImmutableList()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java index 1e67372d3316e..925bfa7c00dab 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java @@ -18,6 +18,8 @@ import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.sql.planner.TypeProvider; +import com.google.common.collect.ImmutableMap; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -27,6 +29,7 @@ import java.net.URL; import java.nio.file.Paths; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static java.lang.String.format; import static org.testng.Assert.assertEquals; @@ -35,6 +38,8 @@ public class TestClpMySqlSplitMetadataConfig extends TestClpQueryBase { private String splitMetadataConfigPath; + private TypeProvider typeProvider; + private ClpMySqlSplitMetadataExpressionConverter converter; @BeforeMethod public void setUp() throws IOException, URISyntaxException @@ -45,57 +50,86 @@ public void setUp() throws IOException, URISyntaxException } splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); - } + typeProvider = TypeProvider.viewOf( + ImmutableMap.of("msg.timestamp", BIGINT,"begin_timestamp", BIGINT, "end_timestamp", BIGINT)); - @Test - public void remapSplitFilterPushDownExpression() - { ClpConfig config = new ClpConfig(); config.setSplitMetadataConfigPath(splitMetadataConfigPath); ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); - final SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); - ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( + SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + converter = new ClpMySqlSplitMetadataExpressionConverter( functionAndTypeManager, standardFunctionResolution, splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), splitMetadataConfig.getRequiredColumns(schemaTableName)); + } + @Test + public void remapSplitFilterPushDownExpression() + { // Integer - testRange(1234, 5678, converter); - testRange(-5678, -1234, converter); + testRange(1234, 5678); + testRange(-5678, -1234); // Decimal - testRange(1234.001, 5678.999, converter); - testRange(-5678.999, -1234.001, converter); + testRange(1234.001, 5678.999); + testRange(-5678.999, -1234.001); // Scientific - testRange("1.234E3", "5.678e3", converter); - testRange("-1.234e-3", "-5.678E-3", converter); + testRange("1.234E3", "5.678e3", 1234.0, 5678.0); + testRange("-1.234e-3", "-5.678E-3", -0.001234, -0.005678); } - private void testRange(T lowerBound, T upperBound, ClpMySqlSplitMetadataExpressionConverter converter) + private void testRange(T lowerBound, T upperBound) { + testRange(lowerBound, upperBound, lowerBound, upperBound); + } + + private void testRange(T lowerBound, T upperBound, T expectedLowerBound, T expectedUpperBound) { SessionHolder sessionHolder = new SessionHolder(); String remappedSql1 = converter.transform( - getRowExpression(format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), sessionHolder)); - assertEquals(remappedSql1, format("(end_timestamp > %s AND begin_timestamp < %s)", lowerBound, upperBound)); + getRowExpression( + format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql1, + format("(end_timestamp > %s) AND (begin_timestamp < %s)", expectedLowerBound, expectedUpperBound)); String remappedSql2 = converter.transform( - getRowExpression(format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), sessionHolder)); - assertEquals(remappedSql2, format("(end_timestamp >= %s AND begin_timestamp <= %s)", lowerBound, upperBound)); + getRowExpression( + format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql2, + format("(end_timestamp >= %s) AND (begin_timestamp <= %s)", expectedLowerBound, expectedUpperBound)); String remappedSql3 = converter.transform( - getRowExpression(format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), sessionHolder)); - assertEquals(remappedSql3, format("(end_timestamp > %s AND begin_timestamp <= %s)", lowerBound, upperBound)); + getRowExpression( + format("(\"msg.timestamp\" > %s AND \"msg.timestamp\" <= %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql3, + format("(end_timestamp > %s) AND (begin_timestamp <= %s)", expectedLowerBound, expectedUpperBound)); String remappedSql4 = converter.transform( - getRowExpression(format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), sessionHolder)); - assertEquals(remappedSql4, format("(end_timestamp >= %s AND begin_timestamp < %s)", lowerBound, upperBound)); - - String remappedSql5 = converter.transform(getRowExpression(format("(\"msg.timestamp\" = %s)", lowerBound), sessionHolder)); - assertEquals(remappedSql5, format("((begin_timestamp <= %s AND end_timestamp >= %s))", lowerBound, lowerBound)); + getRowExpression( + format("(\"msg.timestamp\" >= %s AND \"msg.timestamp\" < %s)", lowerBound, upperBound), + typeProvider, + sessionHolder)); + assertEquals( + remappedSql4, + format("(end_timestamp >= %s) AND (begin_timestamp < %s)", expectedLowerBound, expectedUpperBound)); + + String remappedSql5 = converter.transform( + getRowExpression(format("(\"msg.timestamp\" = %s)", lowerBound), typeProvider, sessionHolder)); + assertEquals( + remappedSql5, + format("(begin_timestamp <= %s) AND (end_timestamp >= %s)", expectedLowerBound, expectedLowerBound)); } } From e8e8334e9a4956f1ffe4c61d2b014973b72834cd Mon Sep 17 00:00:00 2001 From: wraymo Date: Thu, 6 Nov 2025 21:27:13 -0500 Subject: [PATCH 15/26] clean up the docs --- presto-docs/src/main/sphinx/connector/clp.rst | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index dcf803724aee6..fc0d7f17a957a 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -273,21 +273,6 @@ The code block shows an example of split metadata config file: - The filter rule for ``msg.timestamp`` is marked as **required**, ensuring that queries without timestamp constraints are rejected. -Merging Behavior ----------------- - -When resolving configuration for a specific table: - -1. The connector merges definitions from matching namespaces in this order: - - - ``""`` (global) - - ``schema`` - - ``schema.table`` - -2. Later (more specific) namespaces **override** or **extend** parent configurations. - -3. Duplicate filter rules are deduplicated by ``column`` name. - Supported SQL Expressions ========================= From 9b54813f14d02af70f5bdac6aaf5ade6acd34b41 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 10:12:14 -0500 Subject: [PATCH 16/26] fix bugs --- .../optimization/ClpFilterToKqlConverter.java | 24 +++++++++---------- .../presto/plugin/clp/TestClpFilterToKql.java | 2 +- .../TestClpMySqlSplitMetadataConfig.java | 9 ++++--- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index 2e16132b0b79d..06988488396d4 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -211,7 +211,7 @@ private String getLiteralString(ConstantExpression literal) BigDecimal decimalValue = new BigDecimal(unscaled, decimalType.getScale()); return decimalValue.toPlainString(); } - return "'" + ((Slice) value).toStringUtf8().replace("'", "''") + "'"; + return ((Slice) value).toStringUtf8(); } if (type instanceof DecimalType && value instanceof Long) { @@ -435,9 +435,9 @@ private ClpExpression handleLogicalBinary(OperatorType operator, CallExpression return buildClpExpression( leftPushDownExpression.get(), // variable rightPushDownExpression.get(), // literal - left, // constant + right, // constant operator, - rightType, + leftType, node); } else if (leftIsConstant) { @@ -445,9 +445,9 @@ else if (leftIsConstant) { return buildClpExpression( rightPushDownExpression.get(), // variable leftPushDownExpression.get(), // literal - right, // constant + left, // constant newOperator, - leftType, + rightType, node); } // fallback @@ -471,8 +471,8 @@ else if (leftIsConstant) { * @param literalString string representation of the literal * @param constant the original ConstantExpression of literalString * @param operator the comparison operator - * @param literalType the type of the literal - * @param originalNode the original RowExpression node + * @param variableType the type of the variable + * @param originalNode the original CallExpression node * @return a ClpExpression containing either the equivalent KQL query, or the original * expression if it couldn't be translated */ @@ -481,10 +481,10 @@ private ClpExpression buildClpExpression( String literalString, RowExpression constant, OperatorType operator, - Type literalType, - RowExpression originalNode) + Type variableType, + CallExpression originalNode) { - boolean isVarchar = literalType instanceof VarcharType; + boolean isVarchar = constant.getType() instanceof VarcharType; boolean isMetadataColumn = metadataFilterColumns.contains(variableName); String formattedLiteral = isVarchar ? "\"" + escapeKqlSpecialCharsForStringValue(literalString) + "\"" @@ -509,10 +509,10 @@ else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !isVarchar) { if (isMetadataColumn) { metadataExpression = new CallExpression( operator.name(), - standardFunctionResolution.comparisonFunction(operator, literalType, literalType), + originalNode.getFunctionHandle(), BOOLEAN, ImmutableList.of( - new VariableReferenceExpression(Optional.empty(), variableName, literalType), + new VariableReferenceExpression(Optional.empty(), variableName, variableType), constant)); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java index 36ea4d785577b..7e66934974653 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java @@ -317,7 +317,7 @@ private void testPushDown(SessionHolder sessionHolder, String sql, String expect testFilter(clpExpression, expectedKql, null, sessionHolder); if (expectedMetadataSqlQuery != null) { assertTrue(clpExpression.getMetadataExpression().isPresent()); - assertEquals(clpExpression.getMetadataExpression().get(), expectedMetadataSqlQuery); + assertEquals(clpExpression.getMetadataExpression().get(), getRowExpression(expectedMetadataSqlQuery, sessionHolder)); } else { assertFalse(clpExpression.getMetadataExpression().isPresent()); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java index 925bfa7c00dab..c65295c1a2402 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java @@ -37,7 +37,6 @@ public class TestClpMySqlSplitMetadataConfig extends TestClpQueryBase { - private String splitMetadataConfigPath; private TypeProvider typeProvider; private ClpMySqlSplitMetadataExpressionConverter converter; @@ -49,12 +48,11 @@ public void setUp() throws IOException, URISyntaxException throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); } - splitMetadataConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); typeProvider = TypeProvider.viewOf( - ImmutableMap.of("msg.timestamp", BIGINT,"begin_timestamp", BIGINT, "end_timestamp", BIGINT)); + ImmutableMap.of("msg.timestamp", BIGINT, "begin_timestamp", BIGINT, "end_timestamp", BIGINT)); ClpConfig config = new ClpConfig(); - config.setSplitMetadataConfigPath(splitMetadataConfigPath); + config.setSplitMetadataConfigPath(Paths.get(resource.toURI()).toAbsolutePath().toString()); ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); @@ -82,7 +80,8 @@ public void remapSplitFilterPushDownExpression() testRange("-1.234e-3", "-5.678E-3", -0.001234, -0.005678); } - private void testRange(T lowerBound, T upperBound) { + private void testRange(T lowerBound, T upperBound) + { testRange(lowerBound, upperBound, lowerBound, upperBound); } From f08107d8ec5480e827413c3ee14f768a576886e5 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 13:48:24 -0500 Subject: [PATCH 17/26] add comments --- ...MySqlSplitMetadataExpressionConverter.java | 69 ++++++++++++++++--- .../clp/split/ClpSplitMetadataConfig.java | 67 ++++++++++++++++++ .../presto/plugin/clp/TestClpFilterToKql.java | 2 +- 3 files changed, 127 insertions(+), 11 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index 65b09ef26dea2..c3bf8ec4fa319 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -44,6 +44,19 @@ import static java.lang.String.format; import static java.util.Objects.requireNonNull; +/** + * A converter that converts Presto {@link RowExpression} trees representing metadata predicates + * into SQL filter strings that can be pushed down to MySQL for split-level metadata filtering. + *

+ * The converter: + *
    + *
  • Handles standard logical and comparison operators (AND, OR, =, >, >=, <, <=, IS NULL).
  • + *
  • Supports range-bound rewriting for data columns that have metadata columns representing + * lower and upper bounds.
  • + *
  • Tracks required columns and throws an exception if any are missing in the filter + * expression.
  • + *
+ */ public class ClpMySqlSplitMetadataExpressionConverter implements RowExpressionVisitor { @@ -68,6 +81,16 @@ public ClpMySqlSplitMetadataExpressionConverter( this.requiredColumns = requiredColumns; } + /** + * Converts the given {@link RowExpression} into an equivalent SQL WHERE clause string. + *

+ * After conversion, validates that all required columns were referenced in the expression. If + * any required columns are missing, a {@link PrestoException} is thrown. + * + * @param expression the row expression to convert + * @return a SQL string representing the equivalent predicate + * @throws PrestoException if required columns are missing from the expression + */ public String transform(RowExpression expression) { String sql = expression.accept(this, null); @@ -161,6 +184,23 @@ public String visitVariableReference(VariableReferenceExpression node, Void cont return exposedToOriginal.getOrDefault(exposed, exposed); } + /** + * Rewrites a comparison operator involving a data column into an equivalent expression using + * its associated range-bound metadata columns (if configured). + *

+ * Examples: + *
    + *
  • col >= 5upper_col >= 5
  • + *
  • col <= 5lower_col <= 5
  • + *
  • col = 5(lower_col <= 5) AND (upper_col >= 5)
  • + *
+ * Returns null if no rewrite is applicable. + * + * @param variableName the name of the column being compared + * @param operator the comparison operator + * @param literal the literal value as a SQL string + * @return a rewritten SQL expression string, or null if no rewrite applies + */ private String rewriteComparisonWithBounds(String variableName, OperatorType operator, String literal) { String original = exposedToOriginal.getOrDefault(variableName, variableName); @@ -171,24 +211,33 @@ private String rewriteComparisonWithBounds(String variableName, OperatorType ope String lower = bounds.get("lower"); String upper = bounds.get("upper"); - if (lower == null || upper == null) { - return null; - } switch (operator) { case GREATER_THAN: case GREATER_THAN_OR_EQUAL: - // "col >= 5" → "upper_col >= 5" - return format("%s %s %s", upper, operator.getOperator(), literal); + if (upper != null) { + return format("%s %s %s", upper, operator.getOperator(), literal); + } + break; case LESS_THAN: case LESS_THAN_OR_EQUAL: - // "col <= 5" → "lower_col <= 5" - return format("%s %s %s", lower, operator.getOperator(), literal); + if (lower != null) { + return format("%s %s %s", lower, operator.getOperator(), literal); + } + break; case EQUAL: - // "col = 5" → "(lower_col <= 5 AND upper_col >= 5)" - return format("(%s <= %s) AND (%s >= %s)", lower, literal, upper, literal); + if (lower != null && upper != null) { + return format("(%s <= %s) AND (%s >= %s)", lower, literal, upper, literal); + } else if (lower != null) { + return format("%s <= %s", lower, literal); + } else if (upper != null) { + return format("%s >= %s", upper, literal); + } + break; default: - return null; + break; } + + return null; } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java index de39ff0b40278..c0223fe5a8017 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpSplitMetadataConfig.java @@ -39,11 +39,31 @@ import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_SPLIT_METADATA_CONFIG_NOT_FOUND; import static java.util.Objects.requireNonNull; +/** + * A class that loads and manages split-level metadata configuration. + *

+ * The configuration file defines metadata columns and filtering rules that can be applied in query + * planning to prune irrelevant splits based on metadata (e.g., timestamps, partitions, etc.). + *

+ * The configuration supports a hierarchical namespace structure: + *
    + *
  • Global defaults under an empty string key ("").
  • + *
  • Schema-level overrides under the schema name (e.g., "logs").
  • + *
  • Table-level overrides under the full name (e.g., "logs.events"}).
  • + *
+ *

+ * Configurations from broader scopes are merged with more specific ones, with table-level + * definitions overriding schema-level and global definitions. + */ public class ClpSplitMetadataConfig { private final Map tableConfigs = new HashMap<>(); private final TypeManager typeManager; + /** + * Represents a metadata column entry defined in the split metadata configuration file. Each + * {@link MetaColumn} corresponds to one metadata column that can be exposed in query filters. + */ public static class MetaColumn { public final String name; @@ -65,6 +85,11 @@ public MetaColumn(String name, JsonNode node) } } + /** + * Represents a rule that defines how a metadata column or a data column should be used in + * filtering. A rule may indicate that a column is required for query pruning and include an + * explanation of why it is necessary. + */ public static class FilterRule { public final String column; @@ -124,6 +149,13 @@ public ClpSplitMetadataConfig(ClpConfig config, TypeManager typeManager) } } + /** + * Returns the mapping of exposed metadata column names to their Presto {@link Type}s for the + * given table. + * + * @param name the {@link SchemaTableName} of the target table + * @return a map from exposed column name → type + */ public Map getMetadataColumns(SchemaTableName name) { TableConfig cfg = getTableConfig(name); @@ -134,6 +166,12 @@ public Map getMetadataColumns(SchemaTableName name) return result; } + /** + * Returns the set of metadata columns marked as required in the configuration. + * + * @param name the {@link SchemaTableName} of the target table + * @return a set of column names that are required for filtering + */ public Set getRequiredColumns(SchemaTableName name) { TableConfig cfg = getTableConfig(name); @@ -146,6 +184,12 @@ public Set getRequiredColumns(SchemaTableName name) return requiredColumns; } + /** + * Returns a mapping of exposed metadata column names to their original internal names. + * + * @param name the {@link SchemaTableName} of the target table + * @return a map from exposed column name → original column name + */ public Map getExposedToOriginalMapping(SchemaTableName name) { TableConfig cfg = getTableConfig(name); @@ -156,6 +200,13 @@ public Map getExposedToOriginalMapping(SchemaTableName name) return mapping; } + /** + * Returns the set of data column names that have associated range bounds defined in the + * metadata configuration. + * + * @param name the {@link SchemaTableName} of the target table + * @return a set of data column names that have range bounds + */ public Set getDataColumnsWithRangeBounds(SchemaTableName name) { TableConfig cfg = getTableConfig(name); @@ -168,6 +219,15 @@ public Set getDataColumnsWithRangeBounds(SchemaTableName name) return result; } + /** + * Returns a mapping from data column names to their associated range bound metadata columns. + *

+ * Each entry maps a data column name to another map with keys {@code "lower"} and/or + * {@code "upper"}, representing the metadata column names that define those bounds. + * + * @param name the {@link SchemaTableName} of the target table + * @return a nested mapping from data column name → ("lower"/"upper" → metadata column name) + */ public Map> getDataColumnRangeMapping(SchemaTableName name) { TableConfig cfg = getTableConfig(name); @@ -181,6 +241,13 @@ public Map> getDataColumnRangeMapping(SchemaTableNam return mapping; } + /** + * Merges and returns the effective {@link TableConfig} for the given table, taking into account + * the hierarchical configuration structure: global → schema → table. + * + * @param name the {@link SchemaTableName} of the target table + * @return the merged table configuration + */ private TableConfig getTableConfig(SchemaTableName name) { TableConfig merged = new TableConfig(); diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java index 7e66934974653..3d6d3aa5ce186 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java @@ -228,7 +228,7 @@ public void testComplexPushDown() } @Test - public void testMetadataSqlGeneration() + public void testMetadataExpressionGeneration() { SessionHolder sessionHolder = new SessionHolder(); Set testMetadataFilterColumns = ImmutableSet.of("fare"); From 11cf7529fd9269f824de8cb08071c095df75d0df Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 14:04:08 -0500 Subject: [PATCH 18/26] minor work --- .../facebook/presto/plugin/clp/ClpConfig.java | 6 ++-- ...MySqlSplitMetadataExpressionConverter.java | 6 ++-- ...ySqlSplitMetadataExpressionConverter.java} | 32 ++++++++++++++++--- .../TestClpSplitMetadataConfigCommon.java | 10 +++--- 4 files changed, 38 insertions(+), 16 deletions(-) rename presto-clp/src/test/java/com/facebook/presto/plugin/clp/{split/filter/TestClpMySqlSplitMetadataConfig.java => TestClpMySqlSplitMetadataExpressionConverter.java} (79%) rename presto-clp/src/test/java/com/facebook/presto/plugin/clp/{split/filter => }/TestClpSplitMetadataConfigCommon.java (94%) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 734501408a3ee..f148757da5522 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -33,7 +33,7 @@ public class ClpConfig private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; - private String splitFilterConfigPath; + private String splitMetadataConfigPath; private SplitProviderType splitProviderType = SplitProviderType.MYSQL; public boolean isPolymorphicTypeEnabled() @@ -152,13 +152,13 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) public String getSplitMetadataConfigPath() { - return splitFilterConfigPath; + return splitMetadataConfigPath; } @Config("clp.split-metadata-config-path") public ClpConfig setSplitMetadataConfigPath(String splitMetadataConfigPath) { - this.splitFilterConfigPath = splitMetadataConfigPath; + this.splitMetadataConfigPath = splitMetadataConfigPath; return this; } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index c3bf8ec4fa319..1c60bcf6b3b51 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -228,9 +228,11 @@ private String rewriteComparisonWithBounds(String variableName, OperatorType ope case EQUAL: if (lower != null && upper != null) { return format("(%s <= %s) AND (%s >= %s)", lower, literal, upper, literal); - } else if (lower != null) { + } + else if (lower != null) { return format("%s <= %s", lower, literal); - } else if (upper != null) { + } + else if (upper != null) { return format("%s >= %s", upper, literal); } break; diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMySqlSplitMetadataExpressionConverter.java similarity index 79% rename from presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMySqlSplitMetadataExpressionConverter.java index c65295c1a2402..784d0938cdc38 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpMySqlSplitMetadataConfig.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMySqlSplitMetadataExpressionConverter.java @@ -11,12 +11,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.plugin.clp.split.filter; +package com.facebook.presto.plugin.clp; -import com.facebook.presto.plugin.clp.ClpConfig; -import com.facebook.presto.plugin.clp.TestClpQueryBase; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.sql.planner.TypeProvider; import com.google.common.collect.ImmutableMap; @@ -32,12 +31,14 @@ import static com.facebook.presto.common.type.BigintType.BIGINT; import static java.lang.String.format; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; @Test(singleThreaded = true) -public class TestClpMySqlSplitMetadataConfig +public class TestClpMySqlSplitMetadataExpressionConverter extends TestClpQueryBase { private TypeProvider typeProvider; + private ClpSplitMetadataConfig splitMetadataConfig; private ClpMySqlSplitMetadataExpressionConverter converter; @BeforeMethod @@ -53,7 +54,7 @@ public void setUp() throws IOException, URISyntaxException ClpConfig config = new ClpConfig(); config.setSplitMetadataConfigPath(Paths.get(resource.toURI()).toAbsolutePath().toString()); - ClpSplitMetadataConfig splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); + splitMetadataConfig = new ClpSplitMetadataConfig(config, functionAndTypeManager); SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); converter = new ClpMySqlSplitMetadataExpressionConverter( @@ -64,6 +65,27 @@ public void setUp() throws IOException, URISyntaxException splitMetadataConfig.getRequiredColumns(schemaTableName)); } + @Test + public void checkRequiredColumns() + { + SessionHolder sessionHolder = new SessionHolder(); + SchemaTableName schemaTableName = new SchemaTableName("default", "table_1"); + + ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( + functionAndTypeManager, + standardFunctionResolution, + splitMetadataConfig.getExposedToOriginalMapping(schemaTableName), + splitMetadataConfig.getDataColumnRangeMapping(schemaTableName), + splitMetadataConfig.getRequiredColumns(schemaTableName)); + + TypeProvider typeProvider = TypeProvider.viewOf(ImmutableMap.of("level", BIGINT, "msg.timestamp", BIGINT)); + assertThrows(PrestoException.class, () + -> converter.transform(getRowExpression("(\"level\" >= 1 AND \"level\" <= 3)", typeProvider, sessionHolder))); + + converter.transform( + getRowExpression("(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)", typeProvider, sessionHolder)); + } + @Test public void remapSplitFilterPushDownExpression() { diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfigCommon.java similarity index 94% rename from presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfigCommon.java index a7a531253c9a3..f27ee576c1c8c 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitMetadataConfigCommon.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfigCommon.java @@ -11,11 +11,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.plugin.clp.split.filter; +package com.facebook.presto.plugin.clp; import com.facebook.presto.common.type.Type; -import com.facebook.presto.plugin.clp.ClpConfig; -import com.facebook.presto.plugin.clp.TestClpQueryBase; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitMetadataExpressionConverter; import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.PrestoException; @@ -59,7 +57,7 @@ public void setUp() throws IOException, URISyntaxException } @Test - public void checkRequiredFilters() + public void checkRequiredColumns() { SessionHolder sessionHolder = new SessionHolder(); ClpConfig config = new ClpConfig(); @@ -83,7 +81,7 @@ public void checkRequiredFilters() } @Test - public void getFilterNames() + public void getMetadataColumnNames() { ClpConfig config = new ClpConfig(); config.setSplitMetadataConfigPath(splitMetadataConfigPath); @@ -106,7 +104,7 @@ public void getFilterNames() } @Test - public void handleEmptyAndInvalidSplitFilterConfig() + public void handleEmptyAndInvalidSplitMetadataConfig() { ClpConfig config = new ClpConfig(); From 86f629c1c1ab1ec7ec095d6c9fa524b021add516 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 14:06:26 -0500 Subject: [PATCH 19/26] doc improvement --- presto-docs/src/main/sphinx/connector/clp.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index fc0d7f17a957a..bf7f345348811 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -131,8 +131,8 @@ When resolving configuration for a specific table: 1. The connector merges definitions from matching namespaces in this order: - ``""`` (global) - - ``schema`` - - ``schema.table`` + - ``"schema"`` + - ``"schema.table"`` 2. Later (more specific) namespaces **override** or **extend** parent configurations. From 7a283a78b2de9a5dc76e826b05bc3df279a177c5 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 15:20:38 -0500 Subject: [PATCH 20/26] rename --- ...etadataConfigCommon.java => TestClpSplitMetadataConfig.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename presto-clp/src/test/java/com/facebook/presto/plugin/clp/{TestClpSplitMetadataConfigCommon.java => TestClpSplitMetadataConfig.java} (99%) diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfig.java similarity index 99% rename from presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfigCommon.java rename to presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfig.java index f27ee576c1c8c..7f8d2dd483411 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfigCommon.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpSplitMetadataConfig.java @@ -40,7 +40,7 @@ import static org.testng.Assert.assertTrue; @Test(singleThreaded = true) -public class TestClpSplitMetadataConfigCommon +public class TestClpSplitMetadataConfig extends TestClpQueryBase { private String splitMetadataConfigPath; From a6b71f6888bfba493848437da9649c8b584cdff6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 20:27:44 -0500 Subject: [PATCH 21/26] add more test cases and fix bugs --- .../clp/optimization/ClpComputePushDown.java | 7 +- .../optimization/ClpFilterToKqlConverter.java | 261 +++++++++++------- .../presto/plugin/clp/TestClpFilterToKql.java | 84 ++++-- .../presto/plugin/clp/TestClpUdfRewriter.java | 2 +- 4 files changed, 233 insertions(+), 121 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 5999ce5341d9a..557e68967e398 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -33,7 +33,6 @@ import com.facebook.presto.spi.plan.TableScanNode; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; -import com.google.common.collect.ImmutableSet; import java.util.Map; import java.util.Optional; @@ -106,15 +105,13 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod Map assignments = tableScanNode.getAssignments(); SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); - ImmutableSet.Builder metadataFilterColumns = ImmutableSet.builder(); - metadataFilterColumns.addAll(metadataConfig.getMetadataColumns(schemaTableName).keySet()); - metadataFilterColumns.addAll(metadataConfig.getDataColumnsWithRangeBounds(schemaTableName)); ClpExpression clpExpression = filterNode.getPredicate().accept( new ClpFilterToKqlConverter( functionResolution, functionManager, assignments, - metadataFilterColumns.build()), + metadataConfig.getMetadataColumns(schemaTableName).keySet(), + metadataConfig.getDataColumnsWithRangeBounds(schemaTableName)), null); Optional kqlQuery = clpExpression.getPushDownExpression(); Optional metadataExpression = clpExpression.getMetadataExpression(); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index ce3e325e252bb..b1c93946889d8 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -113,17 +113,20 @@ public class ClpFilterToKqlConverter private final FunctionMetadataManager functionMetadataManager; private final Map assignments; private final Set metadataFilterColumns; + private final Set dataColumnsWithRangeBounds; public ClpFilterToKqlConverter( StandardFunctionResolution standardFunctionResolution, FunctionMetadataManager functionMetadataManager, Map assignments, - Set metadataFilterColumns) + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) { this.standardFunctionResolution = requireNonNull(standardFunctionResolution, "standardFunctionResolution is null"); this.functionMetadataManager = requireNonNull(functionMetadataManager, "function metadata manager is null"); this.assignments = requireNonNull(assignments, "assignments is null"); this.metadataFilterColumns = requireNonNull(metadataFilterColumns, "metadataFilterColumns is null"); + this.dataColumnsWithRangeBounds = requireNonNull(dataColumnsWithRangeBounds, "dataColumnsWithRangeBounds is null"); } @Override @@ -260,52 +263,81 @@ private ClpExpression handleBetween(CallExpression node) "BETWEEN operator must have exactly three arguments. Received: " + node); } - RowExpression first = arguments.get(0); - RowExpression second = arguments.get(1); - RowExpression third = arguments.get(2); - if (!isClpCompatibleNumericType(first.getType()) - || !isClpCompatibleNumericType(second.getType()) - || !isClpCompatibleNumericType(third.getType())) { + RowExpression lhs = arguments.get(0); + RowExpression lower = arguments.get(1); + RowExpression upper = arguments.get(2); + + Optional variableOpt = lhs.accept(this, null).getPushDownExpression(); + if (!variableOpt.isPresent()) { return new ClpExpression(node); } - Optional variableOpt = first.accept(this, null).getPushDownExpression(); - if (!variableOpt.isPresent() - || !(second instanceof ConstantExpression) - || !(third instanceof ConstantExpression)) { + String variable = variableOpt.get(); + boolean isMetadataColumn = metadataFilterColumns.contains(variable); + + // Type validation + boolean numericCompatible = + isClpCompatibleNumericType(lhs.getType()) + && isClpCompatibleNumericType(lower.getType()) + && isClpCompatibleNumericType(upper.getType()); + + if (!numericCompatible) { + if (isMetadataColumn) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata BETWEEN requires numeric-compatible types. Received: " + node); + } + // Non-metadata columns just fallback (cannot push down) return new ClpExpression(node); } - String variable = variableOpt.get(); - String lowerBound = getLiteralString((ConstantExpression) second); - String upperBound = getLiteralString((ConstantExpression) third); - String kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); - if (metadataFilterColumns.contains(variable)) { + // Metadata columns must have constant bounds + if (isMetadataColumn && + (!(lower instanceof ConstantExpression) || !(upper instanceof ConstantExpression))) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata BETWEEN requires constant bounds. Received: " + node); + } + + // Pushdown only if both bounds are constants + if (!(lower instanceof ConstantExpression) || !(upper instanceof ConstantExpression)) { + return new ClpExpression(node); + } + + RowExpression metadataExpr = null; + if (isMetadataColumn || dataColumnsWithRangeBounds.contains(variable)) { VariableReferenceExpression varExpr = - new VariableReferenceExpression(first.getSourceLocation(), variable, first.getType()); - ConstantExpression lower = (ConstantExpression) second; - ConstantExpression upper = (ConstantExpression) third; - - // Build expression: (var >= lower) AND (var <= upper) - RowExpression lowerBoundExpr = new CallExpression( - GREATER_THAN_OR_EQUAL.name(), - standardFunctionResolution.comparisonFunction(GREATER_THAN_OR_EQUAL, varExpr.getType(), lower.getType()), - BOOLEAN, - ImmutableList.of(varExpr, lower)); - RowExpression upperBoundExpr = new CallExpression( - LESS_THAN_OR_EQUAL.name(), - standardFunctionResolution.comparisonFunction(LESS_THAN_OR_EQUAL, varExpr.getType(), upper.getType()), - BOOLEAN, - ImmutableList.of(varExpr, upper)); - RowExpression metadataExpr = new SpecialFormExpression( + new VariableReferenceExpression(lhs.getSourceLocation(), variable, lhs.getType()); + ConstantExpression lowerConst = (ConstantExpression) lower; + ConstantExpression upperConst = (ConstantExpression) upper; + + // (var >= lower) AND (var <= upper) + metadataExpr = new SpecialFormExpression( AND, BOOLEAN, - lowerBoundExpr, - upperBoundExpr); - - return new ClpExpression(kql, metadataExpr); - } - return new ClpExpression(kql); + ImmutableList.of( + new CallExpression( + GREATER_THAN_OR_EQUAL.name(), + standardFunctionResolution.comparisonFunction( + GREATER_THAN_OR_EQUAL, varExpr.getType(), lowerConst.getType()), + BOOLEAN, + ImmutableList.of(varExpr, lowerConst)), + new CallExpression( + LESS_THAN_OR_EQUAL.name(), + standardFunctionResolution.comparisonFunction( + LESS_THAN_OR_EQUAL, varExpr.getType(), upperConst.getType()), + BOOLEAN, + ImmutableList.of(varExpr, upperConst)))); + } + + String kql = null; + if (!isMetadataColumn) { + String lowerBound = getLiteralString((ConstantExpression) lower); + String upperBound = getLiteralString((ConstantExpression) upper); + kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); + } + + return new ClpExpression(kql, metadataExpr); } /** @@ -324,18 +356,27 @@ private ClpExpression handleNot(CallExpression node) "NOT operator must have exactly one argument. Received: " + node); } - RowExpression input = node.getArguments().get(0); - ClpExpression expression = input.accept(this, null); - if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { + ClpExpression input = node.getArguments().get(0).accept(this, null); + if ((input.getRemainingExpression().isPresent() || !input.getPushDownExpression().isPresent()) + && !input.getMetadataExpression().isPresent()) { return new ClpExpression(node); } - String notPushDownExpression = "NOT " + expression.getPushDownExpression().get(); - if (expression.getMetadataExpression().isPresent()) { - return new ClpExpression(notPushDownExpression, node); + + String kql = null; + if (input.getPushDownExpression().isPresent()) { + kql = "NOT " + input.getPushDownExpression().get(); } - else { - return new ClpExpression(notPushDownExpression); + + RowExpression metadataExpr = null; + if (input.getMetadataExpression().isPresent()) { + metadataExpr = new CallExpression( + standardFunctionResolution.notFunction().getName(), + standardFunctionResolution.notFunction(), + BOOLEAN, + ImmutableList.of(input.getMetadataExpression().get())); } + + return new ClpExpression(kql, metadataExpr); } /** @@ -362,6 +403,11 @@ private ClpExpression handleLike(CallExpression node) } String variableName = variable.getPushDownExpression().get(); + if (metadataFilterColumns.contains(variableName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for LIKE predicate" + node); + } RowExpression argument = node.getArguments().get(1); String pattern; @@ -375,7 +421,9 @@ else if (argument instanceof CallExpression) { return new ClpExpression(node); } if (callExpression.getArguments().size() != 1) { - throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "CAST function must have exactly one argument. Received: " + callExpression); + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "CAST function must have exactly one argument. Received: " + callExpression); } if (!(callExpression.getArguments().get(0) instanceof ConstantExpression)) { return new ClpExpression(node); @@ -488,27 +536,30 @@ private ClpExpression buildClpExpression( { boolean isVarchar = constant.getType() instanceof VarcharType; boolean isMetadataColumn = metadataFilterColumns.contains(variableName); + boolean isDataColumnsWithRangeBounds = dataColumnsWithRangeBounds.contains(variableName); String formattedLiteral = isVarchar ? "\"" + escapeKqlSpecialCharsForStringValue(literalString) + "\"" : literalString; String pushDownExpression = null; - if (operator.equals(EQUAL)) { - pushDownExpression = format("%s: %s", variableName, formattedLiteral); - } - else if (operator.equals(NOT_EQUAL)) { - pushDownExpression = format("NOT %s: %s", variableName, formattedLiteral); - } - else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !isVarchar) { - pushDownExpression = format("%s %s %s", variableName, operator.getOperator(), literalString); - } + CallExpression metadataExpression = null; + if (!isMetadataColumn) { + if (operator.equals(EQUAL)) { + pushDownExpression = format("%s: %s", variableName, formattedLiteral); + } + else if (operator.equals(NOT_EQUAL)) { + pushDownExpression = format("NOT %s: %s", variableName, formattedLiteral); + } + else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !isVarchar) { + pushDownExpression = format("%s %s %s", variableName, operator.getOperator(), literalString); + } - if (pushDownExpression == null) { - return new ClpExpression(originalNode); + if (pushDownExpression == null) { + return new ClpExpression(originalNode); + } } - CallExpression metadataExpression = null; - if (isMetadataColumn) { + if (isMetadataColumn || isDataColumnsWithRangeBounds) { metadataExpression = new CallExpression( operator.name(), originalNode.getFunctionHandle(), @@ -579,6 +630,11 @@ private Optional parseSubstringCall(CallExpression callExpression) } String varName = variable.getPushDownExpression().get(); + if (metadataFilterColumns.contains(varName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for substr call" + callExpression); + } RowExpression startExpression = callExpression.getArguments().get(1); RowExpression lengthExpression = null; if (argCount == 3) { @@ -725,17 +781,15 @@ private ClpExpression handleAnd(SpecialFormExpression node) for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); - if (expression.getPushDownExpression().isPresent()) { - pushdownKql.add(expression.getPushDownExpression().get()); - expression.getMetadataExpression().ifPresent(metadataExpressions::add); - } + expression.getPushDownExpression().ifPresent(pushdownKql::add); + expression.getMetadataExpression().ifPresent(metadataExpressions::add); expression.getRemainingExpression().ifPresent(remainingExpressions::add); } - if (pushdownKql.isEmpty()) { - return new ClpExpression(node); - } - String combinedKql = "(" + String.join(" AND ", pushdownKql) + ")"; + String combinedKql = null; + if (!pushdownKql.isEmpty()) { + combinedKql = "(" + String.join(" AND ", pushdownKql) + ")"; + } RowExpression combinedMetadataExpression = null; if (metadataExpressions.size() == 1) { @@ -781,47 +835,52 @@ private ClpExpression handleOr(SpecialFormExpression node) List pushdownKql = new ArrayList<>(); List metadataExpressions = new ArrayList<>(); - boolean allPushedDown = true; - boolean allHaveMetadataExpression = true; + boolean hasUnpushable = false; + for (RowExpression argument : node.getArguments()) { - ClpExpression expression = argument.accept(this, null); - // Note: It is possible in the future that an expression cannot be pushed down as a KQL query, but can be - // pushed down as a metadata SQL query. - if (expression.getRemainingExpression().isPresent() || !expression.getPushDownExpression().isPresent()) { - allPushedDown = false; - continue; + ClpExpression expr = argument.accept(this, null); + + boolean hasRemaining = expr.getRemainingExpression().isPresent(); + boolean hasKql = expr.getPushDownExpression().isPresent(); + boolean hasMeta = expr.getMetadataExpression().isPresent(); + + // If this arg cannot be pushed down at all, bail early + if (hasRemaining || (!hasKql && !hasMeta)) { + hasUnpushable = true; + break; } - pushdownKql.add(expression.getPushDownExpression().get()); - if (allHaveMetadataExpression && expression.getMetadataExpression().isPresent()) { - metadataExpressions.add(expression.getMetadataExpression().get()); + if (hasKql) { + pushdownKql.add(expr.getPushDownExpression().get()); } - else { - allHaveMetadataExpression = false; + + if (hasMeta) { + metadataExpressions.add(expr.getMetadataExpression().get()); } } - if (!allPushedDown) { + if (hasUnpushable) { return new ClpExpression(node); } - String combinedKql = "(" + String.join(" OR ", pushdownKql) + ")"; + String combinedKql = null; + if (!pushdownKql.isEmpty()) { + combinedKql = "(" + String.join(" OR ", pushdownKql) + ")"; + } - RowExpression combinedMetadataExpression = null; - if (allHaveMetadataExpression && !metadataExpressions.isEmpty()) { - if (metadataExpressions.size() == 1) { - combinedMetadataExpression = metadataExpressions.get(0); - } - else { - combinedMetadataExpression = new SpecialFormExpression( - node.getSourceLocation(), - OR, - BOOLEAN, - metadataExpressions); - } + // Only use metadata if every argument has metadata and none had KQL + RowExpression combinedMetadata = null; + if (metadataExpressions.size() == node.getArguments().size() && pushdownKql.isEmpty()) { + combinedMetadata = (metadataExpressions.size() == 1) + ? metadataExpressions.get(0) + : new SpecialFormExpression( + node.getSourceLocation(), + OR, + BOOLEAN, + metadataExpressions); } - return new ClpExpression(combinedKql, combinedMetadataExpression); + return new ClpExpression(combinedKql, combinedMetadata); } /** @@ -840,6 +899,11 @@ private ClpExpression handleIn(SpecialFormExpression node) return new ClpExpression(node); } String variableName = variable.getPushDownExpression().get(); + if (metadataFilterColumns.contains(variableName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for IN predicate" + node); + } StringBuilder queryBuilder = new StringBuilder(); queryBuilder.append("("); for (RowExpression argument : node.getArguments().subList(1, node.getArguments().size())) { @@ -884,6 +948,11 @@ private ClpExpression handleIsNull(SpecialFormExpression node) } String variableName = expression.getPushDownExpression().get(); + if (metadataFilterColumns.contains(variableName)) { + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, + "Metadata filter columns are not supported for IN predicate" + node); + } return new ClpExpression(format("NOT %s: *", variableName)); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java index 3d6d3aa5ce186..29146e107bb2d 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpFilterToKql.java @@ -18,6 +18,8 @@ import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.planner.TypeProvider; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.testng.annotations.Test; @@ -26,6 +28,7 @@ import java.util.Optional; import java.util.Set; +import static com.facebook.presto.common.type.BigintType.BIGINT; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -232,44 +235,58 @@ public void testMetadataExpressionGeneration() { SessionHolder sessionHolder = new SessionHolder(); Set testMetadataFilterColumns = ImmutableSet.of("fare"); + Set testDataColumnsWithRangeBounds = ImmutableSet.of("city.Region.Id"); // Normal case testPushDown( sessionHolder, "(fare > 0 AND city.Name like 'b%')", - "(fare > 0 AND city.Name: \"b*\")", - "(\"fare\" > 0)", - testMetadataFilterColumns); + "(city.Name: \"b*\")", + "fare > 0", + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); // With BETWEEN testPushDown( sessionHolder, "((fare BETWEEN 0 AND 5) AND city.Name like 'b%')", - "(fare >= 0 AND fare <= 5 AND city.Name: \"b*\")", - "(\"fare\" >= 0 AND \"fare\" <= 5)", - testMetadataFilterColumns); + "(city.Name: \"b*\")", + "fare >= 0 AND fare <= 5", + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); // The cases of that the metadata filter column exist but cannot be push down testPushDown( sessionHolder, "(fare > 0 OR city.Name like 'b%')", - "(fare > 0 OR city.Name: \"b*\")", + "(city.Name: \"b*\")", null, - testMetadataFilterColumns); + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); testPushDown( sessionHolder, "(fare > 0 AND city.Name like 'b%') OR city.Region.Id = 1", - "((fare > 0 AND city.Name: \"b*\") OR city.Region.Id: 1)", + "((city.Name: \"b*\") OR city.Region.Id: 1)", null, - testMetadataFilterColumns); + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); // Complicated case testPushDown( sessionHolder, "fare = 0 AND (city.Name like 'b%' OR city.Region.Id = 1)", - "(fare: 0 AND (city.Name: \"b*\" OR city.Region.Id: 1))", - "(\"fare\" = 0)", - testMetadataFilterColumns); + "((city.Name: \"b*\" OR city.Region.Id: 1))", + "fare = 0", + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); + testPushDown( + sessionHolder, + "city.Region.Id = 1 AND (city.Name like 'b%' OR fare = 0)", + "(city.Region.Id: 1 AND (city.Name: \"b*\"))", + "\"city.Region.Id\" = 1", + TypeProvider.viewOf(ImmutableMap.of("city.Region.Id", BIGINT)), + testMetadataFilterColumns, + testDataColumnsWithRangeBounds); } @Test @@ -307,17 +324,44 @@ public void testClpWildcardUdf() private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedRemaining) { - ClpExpression clpExpression = tryPushDown(sql, sessionHolder, ImmutableSet.of()); + ClpExpression clpExpression = tryPushDown(sql, sessionHolder, ImmutableSet.of(), ImmutableSet.of()); testFilter(clpExpression, expectedKql, expectedRemaining, sessionHolder); } - private void testPushDown(SessionHolder sessionHolder, String sql, String expectedKql, String expectedMetadataSqlQuery, Set metadataFilterColumns) + private void testPushDown( + SessionHolder sessionHolder, + String sql, + String expectedKql, + String expectedMetadataSqlQuery, + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) + { + testPushDown( + sessionHolder, + sql, + expectedKql, + expectedMetadataSqlQuery, + typeProvider, + metadataFilterColumns, + dataColumnsWithRangeBounds); + } + + private void testPushDown( + SessionHolder sessionHolder, + String sql, + String expectedKql, + String expectedMetadataSqlQuery, + TypeProvider typeProviderForMetadataExpression, + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) { - ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns); + ClpExpression clpExpression = tryPushDown(sql, sessionHolder, metadataFilterColumns, dataColumnsWithRangeBounds); testFilter(clpExpression, expectedKql, null, sessionHolder); if (expectedMetadataSqlQuery != null) { assertTrue(clpExpression.getMetadataExpression().isPresent()); - assertEquals(clpExpression.getMetadataExpression().get(), getRowExpression(expectedMetadataSqlQuery, sessionHolder)); + assertEquals( + clpExpression.getMetadataExpression().get(), + getRowExpression(expectedMetadataSqlQuery, typeProviderForMetadataExpression, sessionHolder)); } else { assertFalse(clpExpression.getMetadataExpression().isPresent()); @@ -327,7 +371,8 @@ private void testPushDown(SessionHolder sessionHolder, String sql, String expect private ClpExpression tryPushDown( String sqlExpression, SessionHolder sessionHolder, - Set metadataFilterColumns) + Set metadataFilterColumns, + Set dataColumnsWithRangeBounds) { RowExpression pushDownExpression = getRowExpression(sqlExpression, sessionHolder); Map assignments = new HashMap<>(variableToColumnHandleMap); @@ -336,7 +381,8 @@ private ClpExpression tryPushDown( standardFunctionResolution, functionAndTypeManager, assignments, - metadataFilterColumns), + metadataFilterColumns, + dataColumnsWithRangeBounds), null); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index 8534c8c8fa3ea..ddfacbc285f8e 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -277,7 +277,7 @@ public void testClpGetJsonString() WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); - ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitMetadataConfig); optimizedPlan = optimizer.optimize(optimizedPlan, session.toConnectorSession(), variableAllocator, planNodeIdAllocator); PlanAssert.assertPlan( From d988cbcd8a29714ce7522d1b7b1540a9c71088e6 Mon Sep 17 00:00:00 2001 From: wraymo Date: Fri, 7 Nov 2025 21:02:56 -0500 Subject: [PATCH 22/26] apply coderabbit suggestion --- .../clp/split/ClpMySqlSplitMetadataExpressionConverter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index 1c60bcf6b3b51..fee8e13e2ede2 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -93,6 +93,7 @@ public ClpMySqlSplitMetadataExpressionConverter( */ public String transform(RowExpression expression) { + seenRequired.clear(); String sql = expression.accept(this, null); Set missing = new HashSet<>(requiredColumns); missing.removeAll(seenRequired); From fbc64987d5230e3c663fba8a71a2d648fe38ba39 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Nov 2025 13:45:52 -0500 Subject: [PATCH 23/26] Added support for incorporating metadata columns during metadata resolution; fixed a few bugs --- .../metadata/ClpMySqlMetadataProvider.java | 18 +++++++++-- .../clp/optimization/ClpComputePushDown.java | 9 ++++-- .../presto/plugin/clp/TestClpMetadata.java | 30 +++++++++++++++++-- presto-docs/src/main/sphinx/connector/clp.rst | 5 ++-- 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java index a52a24a0a15b4..fdb722b89298d 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMySqlMetadataProvider.java @@ -17,6 +17,7 @@ import com.facebook.presto.plugin.clp.ClpColumnHandle; import com.facebook.presto.plugin.clp.ClpConfig; import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.SchemaTableName; import com.google.common.collect.ImmutableList; @@ -28,8 +29,10 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; +import java.util.ArrayList; import java.util.List; +import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; public class ClpMySqlMetadataProvider @@ -56,9 +59,10 @@ public class ClpMySqlMetadataProvider private static final Logger log = Logger.get(ClpMySqlMetadataProvider.class); private final ClpConfig config; + private final ClpSplitMetadataConfig splitMetadataConfig; @Inject - public ClpMySqlMetadataProvider(ClpConfig config) + public ClpMySqlMetadataProvider(ClpConfig config, ClpSplitMetadataConfig splitMetadataConfig) { try { Class.forName("com.mysql.cj.jdbc.Driver"); @@ -68,6 +72,7 @@ public ClpMySqlMetadataProvider(ClpConfig config) throw new RuntimeException("MySQL JDBC driver not found", e); } this.config = config; + this.splitMetadataConfig = splitMetadataConfig; } @Override @@ -87,7 +92,16 @@ public List listColumnHandles(SchemaTableName schemaTableName) catch (SQLException e) { log.warn("Failed to load table schema for %s: %s", schemaTableName.getTableName(), e); } - return schemaTree.collectColumnHandles(); + + List metadataColumns = + splitMetadataConfig.getMetadataColumns(schemaTableName).entrySet().stream() + .map(entry -> new ClpColumnHandle(entry.getKey(), entry.getValue())) + .collect(toImmutableList()); + List dataColumns = schemaTree.collectColumnHandles(); + List allColumns = new ArrayList<>(dataColumns); + allColumns.addAll(metadataColumns); + + return allColumns; } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 557e68967e398..d588a5e27b14d 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -117,10 +117,13 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod Optional metadataExpression = clpExpression.getMetadataExpression(); Optional remainingPredicate = clpExpression.getRemainingExpression(); + if (!metadataExpression.isPresent() && + !metadataConfig.getRequiredColumns(clpTableHandle.getSchemaTableName()).isEmpty()) { + throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "required filters must be specified"); + } + if (kqlQuery.isPresent() || metadataExpression.isPresent()) { - if (kqlQuery.isPresent()) { - log.debug("KQL query: %s", kqlQuery.get()); - } + kqlQuery.ifPresent(s -> log.debug("KQL query: %s", s)); ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataExpression); TableHandle newTableHandle = new TableHandle( diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java index 9350729aec2d7..681d70ab1f1a6 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpMetadata.java @@ -19,6 +19,7 @@ import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; import com.facebook.presto.plugin.clp.mockdb.ClpMockMetadataDatabase; import com.facebook.presto.plugin.clp.mockdb.table.ColumnMetadataTableRows; +import com.facebook.presto.plugin.clp.split.ClpSplitMetadataConfig; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.SchemaTableName; @@ -29,6 +30,10 @@ import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.io.FileNotFoundException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; import java.util.HashSet; import java.util.Optional; @@ -36,6 +41,7 @@ import static com.facebook.presto.common.type.BooleanType.BOOLEAN; import static com.facebook.presto.common.type.DoubleType.DOUBLE; import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager; import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Boolean; import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.ClpString; @@ -54,7 +60,7 @@ public class TestClpMetadata private ClpMetadata metadata; @BeforeMethod - public void setUp() + public void setUp() throws FileNotFoundException, URISyntaxException { mockMetadataDatabase = ClpMockMetadataDatabase .builder() @@ -83,7 +89,15 @@ public void setUp() .setMetadataDbUser(mockMetadataDatabase.getUsername()) .setMetadataDbPassword(mockMetadataDatabase.getPassword()) .setMetadataTablePrefix(mockMetadataDatabase.getTablePrefix()); - ClpMetadataProvider metadataProvider = new ClpMySqlMetadataProvider(config); + + URL resource = getClass().getClassLoader().getResource("test-mysql-split-metadata.json"); + if (resource == null) { + throw new FileNotFoundException("test-mysql-split-metadata.json not found in resources"); + } + + config.setSplitMetadataConfigPath(Paths.get(resource.toURI()).toAbsolutePath().toString()); + ClpMetadataProvider metadataProvider = + new ClpMySqlMetadataProvider(config, new ClpSplitMetadataConfig(config, createTestFunctionAndTypeManager())); metadata = new ClpMetadata(config, metadataProvider); } @@ -115,6 +129,7 @@ public void testGetTableMetadata() ClpTableHandle clpTableHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME)); ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); ImmutableSet columnMetadata = ImmutableSet.builder() + // data columns .add(ColumnMetadata.builder() .setName("a_bigint") .setType(BIGINT) @@ -150,6 +165,17 @@ public void testGetTableMetadata() RowType.field("h", new ArrayType(VARCHAR)))))))) .setNullable(true) .build()) + // metadata columns + .add(ColumnMetadata.builder() + .setName("level") + .setType(BIGINT) + .setNullable(true) + .build()) + .add(ColumnMetadata.builder() + .setName("author") + .setType(VARCHAR) + .setNullable(true) + .build()) .build(); assertEquals(columnMetadata, ImmutableSet.copyOf(tableMetadata.getColumns())); } diff --git a/presto-docs/src/main/sphinx/connector/clp.rst b/presto-docs/src/main/sphinx/connector/clp.rst index bf7f345348811..ab1e41930deb6 100644 --- a/presto-docs/src/main/sphinx/connector/clp.rst +++ b/presto-docs/src/main/sphinx/connector/clp.rst @@ -195,8 +195,9 @@ Example:: Filter Rules ------------ -Each entry in ``filterRules`` specifies a data column or a metadata column that must or can appear in query filters. -These rules ensure that queries contain sufficient constraints for efficient split selection. +Each entry in ``filterRules`` specifies either a data column that has a range mapping to a metadata column or a metadata +column that must appear in query filters. These rules ensure that queries contain sufficient constraints for efficient +split selection. Supported fields: From f2db72c909ce9cd7c88c52933636156e647a9ef4 Mon Sep 17 00:00:00 2001 From: wraymo Date: Mon, 10 Nov 2025 15:13:03 -0500 Subject: [PATCH 24/26] apply coderabbit suggestions --- .../split/ClpMySqlSplitMetadataExpressionConverter.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index fee8e13e2ede2..bcefa6d036c87 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -41,6 +41,7 @@ import static com.facebook.presto.common.function.OperatorType.IS_DISTINCT_FROM; import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -108,7 +109,7 @@ public String visitCall(CallExpression node, Void context) { FunctionHandle functionHandle = node.getFunctionHandle(); if (functionResolution.isNotFunction(functionHandle)) { - return format("NOT " + node.getArguments().get(0).accept(this, null)); + return format("NOT (%s)", node.getArguments().get(0).accept(this, null)); } FunctionMetadata functionMetadata = functionManager.getFunctionMetadata(node.getFunctionHandle()); @@ -133,7 +134,7 @@ public String visitCall(CallExpression node, Void context) } } - throw new PrestoException(ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query" + node); + throw new PrestoException(CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query: " + node); } @Override @@ -149,7 +150,8 @@ public String visitSpecialForm(SpecialFormExpression node, Void context) case IS_NULL: return "(" + node.getArguments().get(0).accept(this, context) + " IS NULL)"; default: - throw new PrestoException(ClpErrorCode.CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query" + node); + throw new PrestoException( + CLP_PUSHDOWN_UNSUPPORTED_EXPRESSION, "Unsupported metadata query: " + node); } } From 5e58a6057f4cc11e1073e737005672abf2231099 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 11 Nov 2025 09:18:01 -0500 Subject: [PATCH 25/26] fix format issue --- .../clp/split/ClpMySqlSplitMetadataExpressionConverter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java index bcefa6d036c87..f1c14415ae3a7 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitMetadataExpressionConverter.java @@ -17,7 +17,6 @@ import com.facebook.presto.common.type.DecimalType; import com.facebook.presto.common.type.Decimals; import com.facebook.presto.common.type.Type; -import com.facebook.presto.plugin.clp.ClpErrorCode; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.FunctionHandle; import com.facebook.presto.spi.function.FunctionMetadata; From 81373999c20a7a90f4f0a5468661b1d9d3ff7d26 Mon Sep 17 00:00:00 2001 From: wraymo Date: Tue, 11 Nov 2025 10:20:35 -0500 Subject: [PATCH 26/26] validate required columns in split provider --- .../clp/optimization/ClpComputePushDown.java | 19 ------------------- .../clp/split/ClpMySqlSplitProvider.java | 8 ++++++-- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index d588a5e27b14d..47eb74d46732c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -21,7 +21,6 @@ import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; -import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.TableHandle; import com.facebook.presto.spi.VariableAllocator; @@ -37,7 +36,6 @@ import java.util.Map; import java.util.Optional; -import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; import static java.util.Objects.requireNonNull; @@ -76,18 +74,6 @@ public Rewriter(PlanNodeIdAllocator idAllocator) this.idAllocator = idAllocator; } - @Override - public PlanNode visitTableScan(TableScanNode node, RewriteContext context) - { - TableHandle tableHandle = node.getTable(); - ClpTableHandle clpTableHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); - if (!metadataConfig.getRequiredColumns(clpTableHandle.getSchemaTableName()).isEmpty()) { - throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "required filters must be specified"); - } - - return super.visitTableScan(node, context); - } - @Override public PlanNode visitFilter(FilterNode node, RewriteContext context) { @@ -117,11 +103,6 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod Optional metadataExpression = clpExpression.getMetadataExpression(); Optional remainingPredicate = clpExpression.getRemainingExpression(); - if (!metadataExpression.isPresent() && - !metadataConfig.getRequiredColumns(clpTableHandle.getSchemaTableName()).isEmpty()) { - throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "required filters must be specified"); - } - if (kqlQuery.isPresent() || metadataExpression.isPresent()) { kqlQuery.ifPresent(s -> log.debug("KQL query: %s", s)); diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 529ccd6d9b1ab..d0905611a2ada 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -18,6 +18,7 @@ import com.facebook.presto.plugin.clp.ClpSplit; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.function.FunctionMetadataManager; import com.facebook.presto.spi.function.StandardFunctionResolution; @@ -32,6 +33,7 @@ import java.sql.SQLException; import java.util.List; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_MANDATORY_COLUMN_NOT_IN_FILTER; import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; import static java.lang.String.format; @@ -83,9 +85,8 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) String tableName = clpTableHandle.getSchemaTableName().getTableName(); String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName); + SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); if (clpTableLayoutHandle.getMetadataExpression().isPresent()) { - SchemaTableName schemaTableName = clpTableHandle.getSchemaTableName(); - ClpMySqlSplitMetadataExpressionConverter converter = new ClpMySqlSplitMetadataExpressionConverter( functionManager, @@ -96,6 +97,9 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) String metadataFilterQuery = converter.transform(clpTableLayoutHandle.getMetadataExpression().get()); archivePathQuery += " AND (" + metadataFilterQuery + ")"; } + else if (!metadataConfig.getRequiredColumns(schemaTableName).isEmpty()) { + throw new PrestoException(CLP_MANDATORY_COLUMN_NOT_IN_FILTER, "No required columns specified in the filter"); + } log.debug("Query for archive: %s", archivePathQuery); try (Connection connection = getConnection()) {