From b8a18231698e66b61d277bae33cc710a3d3af383 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Sat, 29 Nov 2025 21:01:53 +0800 Subject: [PATCH 1/2] [CALCITE-5787] Add interface in RelNode for getInputFieldsUsed --- .../calcite/rel/metadata/BuiltInMetadata.java | 38 ++++++ .../metadata/DefaultRelMetadataProvider.java | 1 + .../rel/metadata/RelMdInputFieldsUsed.java | 120 ++++++++++++++++++ .../rel/metadata/RelMetadataQuery.java | 17 +++ .../apache/calcite/util/BuiltInMethod.java | 2 + .../apache/calcite/test/RelMetadataTest.java | 86 +++++++++++++ 6 files changed, 264 insertions(+) create mode 100644 core/src/main/java/org/apache/calcite/rel/metadata/RelMdInputFieldsUsed.java diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java b/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java index b0bc96b39c92..473241e8c3be 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java @@ -76,6 +76,44 @@ interface Handler extends MetadataHandler { } } + /** + * Metadata that identifies, per input, which fields (columns) of each + * input are referenced by a relational expression. + * + *

For a relational expression with N inputs, this returns an + * {@link ImmutableList} of length N. Each element is an + * {@link ImmutableBitSet} with bits set for zero-based field ordinals of + * that input which are referenced by the expression. + * + *

Returns empty {@link ImmutableList} if information cannot be determined. + */ + public interface InputFieldsUsed extends Metadata { + MetadataDef DEF = + MetadataDef.of(InputFieldsUsed.class, InputFieldsUsed.Handler.class, + BuiltInMethod.INPUT_FIELDS_USED.method); + + /** + * Returns, for each input of this relational expression, a bit set of the + * referenced field ordinals. + * + * @return an {@link ImmutableList} of {@link ImmutableBitSet} of length N + * where N is the number of inputs, or empty {@link ImmutableList} + * if the information is not available + */ + ImmutableList getInputFieldsUsed(); + + /** Handler API. */ + @FunctionalInterface + interface Handler extends MetadataHandler { + ImmutableList getInputFieldsUsed(RelNode r, + RelMetadataQuery mq); + + @Override default MetadataDef getDef() { + return DEF; + } + } + } + /** Metadata about which combinations of columns are unique identifiers. */ public interface UniqueKeys extends Metadata { MetadataDef DEF = diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/DefaultRelMetadataProvider.java b/core/src/main/java/org/apache/calcite/rel/metadata/DefaultRelMetadataProvider.java index d47e5aac23d4..c4b62b83f482 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/DefaultRelMetadataProvider.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/DefaultRelMetadataProvider.java @@ -62,6 +62,7 @@ protected DefaultRelMetadataProvider() { RelMdSelectivity.SOURCE, RelMdExplainVisibility.SOURCE, RelMdPredicates.SOURCE, + RelMdInputFieldsUsed.SOURCE, RelMdAllPredicates.SOURCE, RelMdCollation.SOURCE, RelMdFunctionalDependency.SOURCE)); diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdInputFieldsUsed.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdInputFieldsUsed.java new file mode 100644 index 000000000000..a2b33589f887 --- /dev/null +++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdInputFieldsUsed.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.rel.metadata; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Calc; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexProgram; +import org.apache.calcite.util.ImmutableBitSet; + +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Set; + +/** + * Metadata provider to determine which input fields are used by a RelNode. + */ +public class RelMdInputFieldsUsed + implements MetadataHandler { + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource( + new RelMdInputFieldsUsed(), BuiltInMetadata.InputFieldsUsed.Handler.class); + + @Override public MetadataDef getDef() { + return BuiltInMetadata.InputFieldsUsed.DEF; + } + + public ImmutableList getInputFieldsUsed(RelNode rel, + RelMetadataQuery mq) { + return ImmutableList.of(); + } + + public ImmutableList getInputFieldsUsed(TableScan scan, + RelMetadataQuery mq) { + final BuiltInMetadata.InputFieldsUsed.Handler handler = + scan.getTable().unwrap(BuiltInMetadata.InputFieldsUsed.Handler.class); + if (handler != null) { + return handler.getInputFieldsUsed(scan, mq); + } + final int fieldCount = scan.getRowType().getFieldCount(); + return ImmutableList.of(ImmutableBitSet.range(fieldCount)); + } + + public ImmutableList getInputFieldsUsed(Project project, + RelMetadataQuery mq) { + final ImmutableBitSet bits = RelOptUtil.InputFinder.bits(project.getProjects(), null); + return ImmutableList.of(bits); + } + + public ImmutableList getInputFieldsUsed(Filter filter, + RelMetadataQuery mq) { + return mq.getInputFieldsUsed(filter.getInput()); + } + + public ImmutableList getInputFieldsUsed(Calc calc, + RelMetadataQuery mq) { + final RexProgram program = calc.getProgram(); + final List expandedProjects = program.expandList(program.getProjectList()); + final RexNode cond = program.getCondition() == null + ? null + : program.expandLocalRef(program.getCondition()); + final ImmutableBitSet bits = RelOptUtil.InputFinder.bits(expandedProjects, cond); + return ImmutableList.of(bits); + } + + public ImmutableList getInputFieldsUsed(Join join, + RelMetadataQuery mq) { + List leftInputFieldsUsed = mq.getInputFieldsUsed(join.getLeft()); + List rightInputFieldsUsed = mq.getInputFieldsUsed(join.getRight()); + assert leftInputFieldsUsed.size() == 1 && rightInputFieldsUsed.size() == 1; + + ImmutableBitSet rightUsedBits = rightInputFieldsUsed.get(0); + if (join.getJoinType() == JoinRelType.SEMI + || join.getJoinType() == JoinRelType.ANTI) { + rightUsedBits = ImmutableBitSet.of(); + } + + return ImmutableList.of(leftInputFieldsUsed.get(0), rightUsedBits); + } + + public ImmutableList getInputFieldsUsed(SetOp setOp, + RelMetadataQuery mq) { + final ImmutableList.Builder builder = ImmutableList.builder(); + for (RelNode input : setOp.getInputs()) { + ImmutableList inputFieldsBits = mq.getInputFieldsUsed(input); + assert inputFieldsBits.size() == 1; + builder.add(inputFieldsBits.get(0)); + } + return builder.build(); + } + + public ImmutableList getInputFieldsUsed(Aggregate agg, + RelMetadataQuery mq) { + Set fields = RelOptUtil.getAllFields(agg); + return ImmutableList.of(ImmutableBitSet.of(fields)); + } +} diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java b/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java index 804484625457..f86f1de868fd 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/RelMetadataQuery.java @@ -110,6 +110,7 @@ public class RelMetadataQuery extends RelMetadataQueryBase { private BuiltInMetadata.UniqueKeys.Handler uniqueKeysHandler; private BuiltInMetadata.LowerBoundCost.Handler lowerBoundCostHandler; private BuiltInMetadata.FunctionalDependency.Handler functionalDependencyHandler; + private BuiltInMetadata.InputFieldsUsed.Handler inputFieldsUsedHandler; /** * Creates the instance with {@link JaninoRelMetadataProvider} instance @@ -158,6 +159,7 @@ public RelMetadataQuery(MetadataHandlerProvider provider) { this.lowerBoundCostHandler = provider.handler(BuiltInMetadata.LowerBoundCost.Handler.class); this.functionalDependencyHandler = provider.handler(BuiltInMetadata.FunctionalDependency.Handler.class); + this.inputFieldsUsedHandler = provider.handler(BuiltInMetadata.InputFieldsUsed.Handler.class); } /** Creates and initializes the instance that will serve as a prototype for @@ -193,6 +195,7 @@ private RelMetadataQuery(@SuppressWarnings("unused") boolean dummy) { this.lowerBoundCostHandler = initialHandler(BuiltInMetadata.LowerBoundCost.Handler.class); this.functionalDependencyHandler = initialHandler(BuiltInMetadata.FunctionalDependency.Handler.class); + this.inputFieldsUsedHandler = initialHandler(BuiltInMetadata.InputFieldsUsed.Handler.class); } private RelMetadataQuery( @@ -225,6 +228,7 @@ private RelMetadataQuery( this.uniqueKeysHandler = prototype.uniqueKeysHandler; this.lowerBoundCostHandler = prototype.lowerBoundCostHandler; this.functionalDependencyHandler = prototype.functionalDependencyHandler; + this.inputFieldsUsedHandler = prototype.inputFieldsUsedHandler; } //~ Methods ---------------------------------------------------------------- @@ -1058,4 +1062,17 @@ public ArrowSet getFDs(RelNode rel) { } } } + + /** + * Returns the input fields are used by a RelNode. + */ + public ImmutableList getInputFieldsUsed(RelNode rel) { + for (;;) { + try { + return inputFieldsUsedHandler.getInputFieldsUsed(rel, this); + } catch (MetadataHandlerProvider.NoHandler e) { + inputFieldsUsedHandler = revise(BuiltInMetadata.InputFieldsUsed.Handler.class); + } + } + } } diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index 5fec119d65bb..8fc993f39690 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -64,6 +64,7 @@ import org.apache.calcite.rel.metadata.BuiltInMetadata.ExplainVisibility; import org.apache.calcite.rel.metadata.BuiltInMetadata.ExpressionLineage; import org.apache.calcite.rel.metadata.BuiltInMetadata.FunctionalDependency; +import org.apache.calcite.rel.metadata.BuiltInMetadata.InputFieldsUsed; import org.apache.calcite.rel.metadata.BuiltInMetadata.LowerBoundCost; import org.apache.calcite.rel.metadata.BuiltInMetadata.MaxRowCount; import org.apache.calcite.rel.metadata.BuiltInMetadata.Measure; @@ -896,6 +897,7 @@ public enum BuiltInMethod { STR_TO_MAP(SqlFunctions.class, "strToMap", String.class, String.class, String.class), SUBSTRING_INDEX(SqlFunctions.class, "substringIndex", String.class, String.class, int.class), SELECTIVITY(Selectivity.class, "getSelectivity", RexNode.class), + INPUT_FIELDS_USED(InputFieldsUsed.class, "getInputFieldsUsed"), UNIQUE_KEYS(UniqueKeys.class, "getUniqueKeys", boolean.class), AVERAGE_ROW_SIZE(Size.class, "averageRowSize"), AVERAGE_COLUMN_SIZES(Size.class, "averageColumnSizes"), diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java index 9213bc23454b..605847b08933 100644 --- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java @@ -43,6 +43,7 @@ import org.apache.calcite.rel.SingleRel; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Calc; import org.apache.calcite.rel.core.Correlate; import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; @@ -52,6 +53,7 @@ import org.apache.calcite.rel.core.Minus; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Sample; +import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableModify; import org.apache.calcite.rel.core.TableScan; @@ -931,6 +933,90 @@ final RelMetadataFixture sql(String sql) { assertThat(fd2, sameInstance(fd1)); } + // ---------------------------------------------------------------------- + // Tests for InputFieldsUsed metadata in RelMdInputFieldsUsed + // ---------------------------------------------------------------------- + + @Test void testInputFieldsUsedSemiJoin() { + final RelBuilder relBuilder = RelBuilderTest.createBuilder(); + relBuilder.scan("EMP"); + relBuilder.scan("DEPT"); + // Build semi-join on DEPTNO + relBuilder.semiJoin( + relBuilder.equals(relBuilder.field(2, 0, "DEPTNO"), + relBuilder.field(2, 1, "DEPTNO"))); + final Join join = (Join) relBuilder.build(); + final RelMetadataQuery mq = join.getCluster().getMetadataQuery(); + final List inputFields = mq.getInputFieldsUsed(join); + + // For SEMI join expect left input fields to be all columns of left input + // and right input fields to be empty (semi-join does not require right output). + final int leftCount = join.getLeft().getRowType().getFieldCount(); + assertThat(inputFields, hasSize(2)); + assertThat(inputFields.get(0), equalTo(ImmutableBitSet.range(leftCount))); + assertThat(inputFields.get(1).isEmpty(), is(true)); + } + + @Test void testInputFieldsUsedUnionSetOp() { + final RelBuilder builder = RelBuilderTest.createBuilder(); + builder.scan("DEPT").project(builder.field(1)); // name + builder.scan("EMP").project(builder.field(2)); // job + builder.union(true); + final SetOp setOp = (SetOp) builder.build(); + final RelMetadataQuery mq = setOp.getCluster().getMetadataQuery(); + final List inputFields = mq.getInputFieldsUsed(setOp); + assertThat( + inputFields, equalTo( + ImmutableList.of(ImmutableBitSet.of(1), ImmutableBitSet.of(2)))); + } + + @Test void testInputFieldsUsedProject() { + final RelBuilder builder = RelBuilderTest.createBuilder(); + final RelNode project = builder + .scan("EMP") + .project(builder.field(0), builder.field(2)) + .build(); + final RelMetadataQuery mq = project.getCluster().getMetadataQuery(); + final java.util.List inputFields = mq.getInputFieldsUsed(project); + + assertThat(inputFields, hasSize(1)); + assertThat(inputFields.get(0), equalTo(ImmutableBitSet.of(0, 2))); + } + + @Test void testInputFieldsUsedFilter() { + final RelBuilder builder = RelBuilderTest.createBuilder(); + final RelNode filter = builder + .scan("EMP") + .filter(builder.equals(builder.field(2), builder.literal(10))) + .build(); + final RelMetadataQuery mq = filter.getCluster().getMetadataQuery(); + final List inputFields = mq.getInputFieldsUsed(filter); + + final int fieldCount = filter.getInput(0).getRowType().getFieldCount(); + assertThat(inputFields, hasSize(1)); + assertThat(inputFields.get(0), equalTo(ImmutableBitSet.range(fieldCount))); + } + + @Test void testInputFieldsUsedCalc() { + final RelBuilder builder = RelBuilderTest.createBuilder(); + final RelNode proj = builder + .scan("EMP") + .project(builder.field(0), builder.field(2)) + .build(); + final HepProgram program = new HepProgramBuilder() + .addRuleInstance(CoreRules.PROJECT_TO_CALC) + .build(); + final HepPlanner planner = new HepPlanner(program); + planner.setRoot(proj); + final RelNode calc = planner.findBestExp(); + assertThat(calc, instanceOf(Calc.class)); + + final RelMetadataQuery mq = calc.getCluster().getMetadataQuery(); + final List inputFields = mq.getInputFieldsUsed(calc); + assertThat(inputFields, hasSize(1)); + assertThat(inputFields.get(0), equalTo(ImmutableBitSet.of(0, 2))); + } + // ---------------------------------------------------------------------- // Tests for getColumnOrigins // ---------------------------------------------------------------------- From 31aeb5e213cdc66735e1b891e57952cb88321528 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Tue, 2 Dec 2025 07:04:49 +0800 Subject: [PATCH 2/2] Update doc --- .../org/apache/calcite/rel/metadata/BuiltInMetadata.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java b/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java index 473241e8c3be..0f297a95a56c 100644 --- a/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java +++ b/core/src/main/java/org/apache/calcite/rel/metadata/BuiltInMetadata.java @@ -77,8 +77,12 @@ interface Handler extends MetadataHandler { } /** - * Metadata that identifies, per input, which fields (columns) of each - * input are referenced by a relational expression. + * Metadata that identifies, per input, which fields of each + * input are referenced by a relational expression ({@link RelNode}). + * Here, "referenced" means the input field is used by the parent + * RelNode. Operators such as Filter, while not inherently consuming + * all input fields, must preserve them since parent RelNodes may depend on + * these fields. Thus, Filter is regarded as utilizing all fields. * *

For a relational expression with N inputs, this returns an * {@link ImmutableList} of length N. Each element is an